Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 48 additions & 52 deletions src/haddock/clis/re/clustfcc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
"""haddock3-re clustfcc subcommand."""

from pathlib import Path
import shutil
from fcc.scripts import cluster_fcc

from pathlib import Path

from haddock import log
from haddock.core.defaults import INTERACTIVE_RE_SUFFIX
Expand All @@ -17,8 +15,9 @@
rank_clusters,
write_structure_list,
)
from haddock.libs.libontology import ModuleIO
from haddock.libs.libfcc import read_matrix
from haddock.libs.libinteractive import look_for_capri, rewrite_capri_tables
from haddock.libs.libontology import ModuleIO
from haddock.modules.analysis.clustfcc.clustfcc import (
get_cluster_centers,
iterate_clustering,
Expand All @@ -32,72 +31,72 @@ def add_clustfcc_arguments(clustfcc_subcommand):
clustfcc_subcommand.add_argument(
"clustfcc_dir",
help="The clustfcc directory to recluster.",
)
)

clustfcc_subcommand.add_argument(
"-f",
"--clust_cutoff",
help="Minimum fraction of common contacts to be considered in a cluster.", # noqa: E501
required=False,
type=float,
)
)

clustfcc_subcommand.add_argument(
"-s",
"--strictness",
help="Strictness factor.",
required=False,
type=float,
)
)

clustfcc_subcommand.add_argument(
"-t",
"--min_population",
help="Clustering population threshold.",
required=False,
type=int,
)
)

clustfcc_subcommand.add_argument(
"-p",
"--plot_matrix",
help="Generate the matrix plot with the clusters.",
required=False,
default=False,
action='store_true',
)
action="store_true",
)

return clustfcc_subcommand


def reclustfcc(
clustfcc_dir: str,
clust_cutoff: Union[bool, float] = None,
strictness: Union[bool, float] = None,
min_population: Union[bool, int] = None,
plot_matrix : bool = True,
) -> Path:
clustfcc_dir: str,
clust_cutoff: Union[bool, float] = None,
strictness: Union[bool, float] = None,
min_population: Union[bool, int] = None,
plot_matrix: bool = True,
) -> Path:
"""
Recluster the models in the clustfcc directory.

Parameters
----------
clustfcc_dir : str
Path to the clustfcc directory.

clust_cutoff : Union[bool, float]
Fraction of common contacts to not be considered a singleton model.

strictness : Union[bool, float]
Fraction of common contacts to be considered to be part of the same
cluster.

min_population : Union[bool, int]
Minimum cluster population.

plot_matrix : bool
Should the corresponding matrix plot be generated.

Returns
-------
outdir : Path
Expand All @@ -121,8 +120,8 @@ def reclustfcc(

# load the original clustering parameters via json
clustfcc_params = read_config(Path(clustfcc_dir, "params.cfg"))
key = list(clustfcc_params['final_cfg'].keys())[0]
clustfcc_params = clustfcc_params['final_cfg'][key]
key = list(clustfcc_params["final_cfg"].keys())[0]
clustfcc_params = clustfcc_params["final_cfg"][key]
log.info(f"Previous clustering parameters: {clustfcc_params}")

# adjust the parameters
Expand All @@ -135,18 +134,17 @@ def reclustfcc(
clustfcc_params["plot_matrix"] = plot_matrix

# load the fcc matrix
pool = cluster_fcc.read_matrix(
pool = read_matrix(
Path(clustfcc_dir, "fcc.matrix"),
clustfcc_params['clust_cutoff'],
clustfcc_params['strictness'],
)
clustfcc_params["clust_cutoff"],
clustfcc_params["strictness"],
)

# iterate clustering until at least one cluster is found
clusters, min_population = iterate_clustering(
pool,
clustfcc_params['min_population']
)
clustfcc_params['min_population'] = min_population
pool, clustfcc_params["min_population"]
)
clustfcc_params["min_population"] = min_population
log.info(f"Updated clustering parameters: {clustfcc_params}")

# Prepare output and read the elements
Expand All @@ -160,20 +158,20 @@ def reclustfcc(
_score_dic, sorted_score_dic = rank_clusters(clt_dic, min_population)

output_models = add_cluster_info(sorted_score_dic, clt_dic)

# Write unclustered structures
write_structure_list(models,
output_models,
out_fname=Path(outdir, "clustfcc.tsv"))
write_structure_list(
models, output_models, out_fname=Path(outdir, "clustfcc.tsv")
)

write_clustfcc_file(
clusters,
clt_centers,
clt_dic,
clustfcc_params,
sorted_score_dic,
output_fname=Path(outdir, 'clustfcc.txt')
)
output_fname=Path(outdir, "clustfcc.txt"),
)

save_config(clustfcc_params, Path(outdir, "params.cfg"))

Expand All @@ -183,7 +181,7 @@ def reclustfcc(
if caprieval_folder:
log.info("Rewriting capri tables")
rewrite_capri_tables(caprieval_folder, clt_dic, outdir)

else:
output_models = models

Expand All @@ -194,25 +192,23 @@ def reclustfcc(
final_order_idx, labels, cluster_ids = [], [], []
for pdb in output_models:
final_order_idx.append(models.index(pdb))
labels.append(pdb.file_name.replace('.pdb', ''))
labels.append(pdb.file_name.replace(".pdb", ""))
cluster_ids.append(pdb.clt_id)
# Get custom cluster data
matrix_cluster_dt, cluster_limits = get_cluster_matrix_plot_clt_dt(
cluster_ids
)
matrix_cluster_dt, cluster_limits = get_cluster_matrix_plot_clt_dt(cluster_ids)
# Define output filename
html_matrix_basepath = Path(outdir, 'fcc_matrix')
html_matrix_basepath = Path(outdir, "fcc_matrix")
# Plot matrix
html_matrixpath = plot_cluster_matrix(
Path(clustfcc_dir, "fcc.matrix"),
final_order_idx,
labels,
dttype='FCC',
dttype="FCC",
diag_fill=1,
output_fname=html_matrix_basepath,
matrix_cluster_dt=matrix_cluster_dt,
cluster_limits=cluster_limits,
)
)
log.info(f"Plotting matrix in {html_matrixpath}")

return outdir
Loading