From 5b27a26a29e29094a816d7fa6d989f3f1d138100 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Dec 2025 10:14:05 +0000 Subject: [PATCH 1/4] Initial plan From 256b52ea0655c242cd5a2a63038286ad2b57244e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Dec 2025 10:23:34 +0000 Subject: [PATCH 2/4] Refactor summary_lines to use SummaryResult dataclass Co-authored-by: y1zhou <17245097+y1zhou@users.noreply.github.com> --- ipsae.py | 230 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 169 insertions(+), 61 deletions(-) diff --git a/ipsae.py b/ipsae.py index 9329870..0bec5ca 100644 --- a/ipsae.py +++ b/ipsae.py @@ -269,6 +269,100 @@ def csv_header_line() -> str: return "i,AlignChn,ScoredChain,AlignResNum,AlignResType,AlignRespLDDT,n0chn,n0dom,n0res,d0chn,d0dom,d0res,ipTM_pae,ipSAE_d0chn,ipSAE_d0dom,ipSAE\n" +@dataclass +class SummaryResult: + """Container for chain-pair summary score results. + + Attributes: + Chn1: first chain identifier + Chn2: second chain identifier + PAE: PAE cutoff value + Dist: Distance cutoff for CA-CA contacts + Type: "asym" or "max"; asym means asymmetric ipTM/ipSAE values; max is maximum of asym values + ipSAE: ipSAE value for given PAE cutoff and d0 determined by number of residues in 2nd chain with PAE str: + """Format the summary result as a fixed-width string.""" + pae_str = str(self.PAE).zfill(2) + dist_str = str(self.Dist).zfill(2) + return ( + f"{self.Chn1} {self.Chn2} {pae_str:3} {dist_str:3} {self.Type:5} " + f"{self.ipSAE:8.6f} " + f"{self.ipSAE_d0chn:8.6f} " + f"{self.ipSAE_d0dom:8.6f} " + f"{self.ipTM_af:5.3f} " + f"{self.ipTM_d0chn:8.6f} " + f"{self.pDockQ:8.4f} " + f"{self.pDockQ2:8.4f} " + f"{self.LIS:8.4f} " + f"{self.n0res:5d} " + f"{self.n0chn:5d} " + f"{self.n0dom:5d} " + f"{self.d0res:6.2f} " + f"{self.d0chn:6.2f} " + f"{self.d0dom:6.2f} " + f"{self.nres1:5d} " + f"{self.nres2:5d} " + f"{self.dist1:5d} " + f"{self.dist2:5d} " + f"{self.Model}\n" + ) + + @staticmethod + def header_line() -> str: + """Return the header line for the summary output.""" + return "Chn1 Chn2 PAE Dist Type ipSAE ipSAE_d0chn ipSAE_d0dom ipTM_af ipTM_d0chn pDockQ pDockQ2 LIS n0res n0chn n0dom d0res d0chn d0dom nres1 nres2 dist1 dist2 Model\n" + + @staticmethod + def csv_header_line() -> str: + """Return the CSV header line for the summary output.""" + return "Chn1,Chn2,PAE,Dist,Type,ipSAE,ipSAE_d0chn,ipSAE_d0dom,ipTM_af,ipTM_d0chn,pDockQ,pDockQ2,LIS,n0res,n0chn,n0dom,d0res,d0chn,d0dom,nres1,nres2,dist1,dist2,Model\n" + + @dataclass class ScoreResults: """Container for calculated scores and output data. @@ -282,7 +376,7 @@ class ScoreResults: lis_scores: Dictionary of LIS scores (by chain pair). metrics: Dictionary of pDockQ, pDockQ2, and LIS scores for each chain pair. by_res_data: Lists of per-residue scores. - summary_lines: List of summarized chain-pair scores. + summary_data: List of chain-pair summary score results. pymol_script: List of formatted strings for PyMOL script. """ @@ -294,7 +388,7 @@ class ScoreResults: metrics: dict[str, dict[str, float]] # {`_`: {metric_name: value}} by_res_data: list[PerResScoreResults] - summary_lines: list[str] # Storing the formatted lines for the summary output file + summary_data: list[SummaryResult] # List of chain-pair summary score results pymol_script: list[str] @@ -938,15 +1032,12 @@ def aggregate_byres_scores( d0chn: dict[str, dict[str, float]], d0dom: dict[str, dict[str, float]], pdb_stem: str, -) -> tuple[list, list, dict[str, dict[str, float]]]: +) -> tuple[list[SummaryResult], list[str], dict[str, dict[str, float]]]: """Aggregate per-residue scores into chain-pair-specific scores.""" # Store results in a structured way results_metrics: dict[str, dict[str, float]] = {} - summary_lines = [] - summary_lines.append( - "\nChn1 Chn2 PAE Dist Type ipSAE ipSAE_d0chn ipSAE_d0dom ipTM_af ipTM_d0chn pDockQ pDockQ2 LIS n0res n0chn n0dom d0res d0chn d0dom nres1 nres2 dist1 dist2 Model\n" - ) + summary_data: list[SummaryResult] = [] pymol_lines = [] pymol_lines.append( @@ -962,8 +1053,8 @@ def get_max_info(values_array, c1, c2): idx = np.argmax(vals) return vals[idx], residues[idx].residue_str, idx - pae_str = str(int(pae_cutoff)).zfill(2) - dist_str = str(int(dist_cutoff)).zfill(2) + pae_int = int(pae_cutoff) + dist_int = int(dist_cutoff) chainpairs = set() for c1 in unique_chains: for c2 in unique_chains: @@ -1000,30 +1091,34 @@ def get_max_info(values_array, c1, c2): if iptm_af == 0.0 and pae_data.iptm != -1.0: iptm_af = pae_data.iptm # Fallback to global if per-pair not found - outstring = ( - f"{c1} {c2} {pae_str:3} {dist_str:3} {'asym':5} " - f"{ipsae_res_val:8.6f} " - f"{ipsae_chn_val:8.6f} " - f"{ipsae_dom_val:8.6f} " - f"{iptm_af:5.3f} " - f"{iptm_chn_val:8.6f} " - f"{pDockQ[c1][c2]:8.4f} " - f"{pDockQ2[c1][c2]:8.4f} " - f"{LIS[c1][c2]:8.4f} " - f"{int(n0res_val):5d} " - f"{int(n0chn[c1][c2]):5d} " - f"{int(n0dom[c1][c2]):5d} " - f"{d0res_val:6.2f} " - f"{d0chn[c1][c2]:6.2f} " - f"{d0dom[c1][c2]:6.2f} " - f"{res1_cnt:5d} " - f"{res2_cnt:5d} " - f"{dist1_cnt:5d} " - f"{dist2_cnt:5d} " - f"{pdb_stem}\n" + summary_result = SummaryResult( + Chn1=c1, + Chn2=c2, + PAE=pae_int, + Dist=dist_int, + Type="asym", + ipSAE=float(ipsae_res_val), + ipSAE_d0chn=float(ipsae_chn_val), + ipSAE_d0dom=float(ipsae_dom_val), + ipTM_af=float(iptm_af), + ipTM_d0chn=float(iptm_chn_val), + pDockQ=float(pDockQ[c1][c2]), + pDockQ2=float(pDockQ2[c1][c2]), + LIS=float(LIS[c1][c2]), + n0res=int(n0res_val), + n0chn=int(n0chn[c1][c2]), + n0dom=int(n0dom[c1][c2]), + d0res=float(d0res_val), + d0chn=float(d0chn[c1][c2]), + d0dom=float(d0dom[c1][c2]), + nres1=res1_cnt, + nres2=res2_cnt, + dist1=dist1_cnt, + dist2=dist2_cnt, + Model=pdb_stem, ) - summary_lines.append(outstring) - pymol_lines.append("# " + outstring) + summary_data.append(summary_result) + pymol_lines.append("# " + summary_result.to_formatted_line()) # Store in results dict results_metrics[f"{c1}_{c2}"] = { @@ -1117,33 +1212,36 @@ def get_max_of_pair(arr, k1, k2): len(dist_unique_residues_chain2[c2][c1]), ) - outstring = ( - f"{c2} {c1} {pae_str:3} {dist_str:3} {'max':5} " - f"{ipsae_res_max:8.6f} " - f"{ipsae_chn_max:8.6f} " - f"{ipsae_dom_max:8.6f} " - f"{iptm_af_max:5.3f} " - f"{iptm_chn_max:8.6f} " - f"{pDockQ[c1][c2]:8.4f} " - f"{pdockq2_max:8.4f} " - f"{lis_avg:8.4f} " - f"{int(n0res_max):5d} " - f"{int(n0chn[c1][c2]):5d} " - f"{int(n0dom_max):5d} " - f"{d0res_max:6.2f} " - f"{d0chn[c1][c2]:6.2f} " - f"{d0dom_max:6.2f} " - f"{res1_max:5d} " - f"{res2_max:5d} " - f"{dist1_max:5d} " - f"{dist2_max:5d} " - f"{pdb_stem}\n" + summary_result = SummaryResult( + Chn1=c2, + Chn2=c1, + PAE=pae_int, + Dist=dist_int, + Type="max", + ipSAE=float(ipsae_res_max), + ipSAE_d0chn=float(ipsae_chn_max), + ipSAE_d0dom=float(ipsae_dom_max), + ipTM_af=float(iptm_af_max), + ipTM_d0chn=float(iptm_chn_max), + pDockQ=float(pDockQ[c1][c2]), + pDockQ2=float(pdockq2_max), + LIS=float(lis_avg), + n0res=int(n0res_max), + n0chn=int(n0chn[c1][c2]), + n0dom=int(n0dom_max), + d0res=float(d0res_max), + d0chn=float(d0chn[c1][c2]), + d0dom=float(d0dom_max), + nres1=res1_max, + nres2=res2_max, + dist1=dist1_max, + dist2=dist2_max, + Model=pdb_stem, ) - summary_lines.append(outstring) - summary_lines.append("\n") - pymol_lines.append("# " + outstring) + summary_data.append(summary_result) + pymol_lines.append("# " + summary_result.to_formatted_line()) - return summary_lines, pymol_lines, results_metrics + return summary_data, pymol_lines, results_metrics def calculate_scores( @@ -1354,7 +1452,7 @@ def calculate_scores( # We need to store these to generate the summary table # Store results in a structured way - summary_lines, pymol_lines, results_metrics = aggregate_byres_scores( + summary_data, pymol_lines, results_metrics = aggregate_byres_scores( residues, pae_cutoff, dist_cutoff, @@ -1388,7 +1486,7 @@ def calculate_scores( lis_scores=LIS, metrics=results_metrics, by_res_data=by_res_lines, - summary_lines=summary_lines, + summary_data=summary_data, pymol_script=pymol_lines, ) @@ -1406,7 +1504,13 @@ def write_outputs(results: ScoreResults, output_prefix: str | Path) -> None: output_prefix: The prefix for the output filenames (including path). """ with Path(f"{output_prefix}.txt").open("w") as f: - f.writelines(results.summary_lines) + f.write("\n") # Leading newline + f.write(SummaryResult.header_line()) + for summary in results.summary_data: + f.write(summary.to_formatted_line()) + # Add newline after "max" line (end of each chain pair group) + if summary.Type == "max": + f.write("\n") with Path(f"{output_prefix}_byres.txt").open("w") as f: f.write(results.by_res_data[0].header_line()) @@ -1536,7 +1640,11 @@ def main( else: # Print summary to stdout print("#" * 90 + "\n# Summary\n" + "#" * 90) - print("".join(results.summary_lines)) + print("\n" + SummaryResult.header_line(), end="") + for summary in results.summary_data: + print(summary.to_formatted_line(), end="") + if summary.Type == "max": + print() print("#" * 90 + "\n# Per-residue scores\n" + "#" * 90) print(results.by_res_data[0].header_line()) print("".join(x.to_formatted_line() for x in results.by_res_data)) From 3b72ef12df2ee1e079a913b2a9d61efcc5d43f2d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Dec 2025 10:25:41 +0000 Subject: [PATCH 3/4] Update aggregate_byres_scores docstring with return type details Co-authored-by: y1zhou <17245097+y1zhou@users.noreply.github.com> --- ipsae.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ipsae.py b/ipsae.py index 0bec5ca..86226b1 100644 --- a/ipsae.py +++ b/ipsae.py @@ -1033,7 +1033,14 @@ def aggregate_byres_scores( d0dom: dict[str, dict[str, float]], pdb_stem: str, ) -> tuple[list[SummaryResult], list[str], dict[str, dict[str, float]]]: - """Aggregate per-residue scores into chain-pair-specific scores.""" + """Aggregate per-residue scores into chain-pair-specific scores. + + Returns: + A tuple containing: + - List of SummaryResult objects with chain-pair scores. + - List of PyMOL script lines. + - Dictionary of metrics for each chain pair. + """ # Store results in a structured way results_metrics: dict[str, dict[str, float]] = {} From 1e11f5a3e48826c189e5166a2c0f98196b15eba7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Dec 2025 14:38:57 +0000 Subject: [PATCH 4/4] Rename SummaryResult to ChainPairScoreResults and update field names Co-authored-by: y1zhou <17245097+y1zhou@users.noreply.github.com> --- ipsae.py | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/ipsae.py b/ipsae.py index 86226b1..33fd0f7 100644 --- a/ipsae.py +++ b/ipsae.py @@ -270,7 +270,7 @@ def csv_header_line() -> str: @dataclass -class SummaryResult: +class ChainPairScoreResults: """Container for chain-pair summary score results. Attributes: @@ -375,8 +375,8 @@ class ScoreResults: pdockq2_scores: Dictionary of pDockQ2 scores (by chain pair). lis_scores: Dictionary of LIS scores (by chain pair). metrics: Dictionary of pDockQ, pDockQ2, and LIS scores for each chain pair. - by_res_data: Lists of per-residue scores. - summary_data: List of chain-pair summary score results. + by_res_scores: Lists of per-residue scores. + chain_pair_scores: List of chain-pair summary score results. pymol_script: List of formatted strings for PyMOL script. """ @@ -387,8 +387,10 @@ class ScoreResults: lis_scores: dict[str, dict[str, float]] # {c1: {c2: score}} metrics: dict[str, dict[str, float]] # {`_`: {metric_name: value}} - by_res_data: list[PerResScoreResults] - summary_data: list[SummaryResult] # List of chain-pair summary score results + by_res_scores: list[PerResScoreResults] + chain_pair_scores: list[ + ChainPairScoreResults + ] # List of chain-pair summary score results pymol_script: list[str] @@ -1032,19 +1034,19 @@ def aggregate_byres_scores( d0chn: dict[str, dict[str, float]], d0dom: dict[str, dict[str, float]], pdb_stem: str, -) -> tuple[list[SummaryResult], list[str], dict[str, dict[str, float]]]: +) -> tuple[list[ChainPairScoreResults], list[str], dict[str, dict[str, float]]]: """Aggregate per-residue scores into chain-pair-specific scores. Returns: A tuple containing: - - List of SummaryResult objects with chain-pair scores. + - List of ChainPairScoreResults objects with chain-pair scores. - List of PyMOL script lines. - Dictionary of metrics for each chain pair. """ # Store results in a structured way results_metrics: dict[str, dict[str, float]] = {} - summary_data: list[SummaryResult] = [] + chain_pair_scores: list[ChainPairScoreResults] = [] pymol_lines = [] pymol_lines.append( @@ -1098,7 +1100,7 @@ def get_max_info(values_array, c1, c2): if iptm_af == 0.0 and pae_data.iptm != -1.0: iptm_af = pae_data.iptm # Fallback to global if per-pair not found - summary_result = SummaryResult( + summary_result = ChainPairScoreResults( Chn1=c1, Chn2=c2, PAE=pae_int, @@ -1124,7 +1126,7 @@ def get_max_info(values_array, c1, c2): dist2=dist2_cnt, Model=pdb_stem, ) - summary_data.append(summary_result) + chain_pair_scores.append(summary_result) pymol_lines.append("# " + summary_result.to_formatted_line()) # Store in results dict @@ -1219,7 +1221,7 @@ def get_max_of_pair(arr, k1, k2): len(dist_unique_residues_chain2[c2][c1]), ) - summary_result = SummaryResult( + summary_result = ChainPairScoreResults( Chn1=c2, Chn2=c1, PAE=pae_int, @@ -1245,10 +1247,10 @@ def get_max_of_pair(arr, k1, k2): dist2=dist2_max, Model=pdb_stem, ) - summary_data.append(summary_result) + chain_pair_scores.append(summary_result) pymol_lines.append("# " + summary_result.to_formatted_line()) - return summary_data, pymol_lines, results_metrics + return chain_pair_scores, pymol_lines, results_metrics def calculate_scores( @@ -1459,7 +1461,7 @@ def calculate_scores( # We need to store these to generate the summary table # Store results in a structured way - summary_data, pymol_lines, results_metrics = aggregate_byres_scores( + chain_pair_scores, pymol_lines, results_metrics = aggregate_byres_scores( residues, pae_cutoff, dist_cutoff, @@ -1492,8 +1494,8 @@ def calculate_scores( pdockq2_scores=pDockQ2, lis_scores=LIS, metrics=results_metrics, - by_res_data=by_res_lines, - summary_data=summary_data, + by_res_scores=by_res_lines, + chain_pair_scores=chain_pair_scores, pymol_script=pymol_lines, ) @@ -1512,16 +1514,16 @@ def write_outputs(results: ScoreResults, output_prefix: str | Path) -> None: """ with Path(f"{output_prefix}.txt").open("w") as f: f.write("\n") # Leading newline - f.write(SummaryResult.header_line()) - for summary in results.summary_data: + f.write(ChainPairScoreResults.header_line()) + for summary in results.chain_pair_scores: f.write(summary.to_formatted_line()) # Add newline after "max" line (end of each chain pair group) if summary.Type == "max": f.write("\n") with Path(f"{output_prefix}_byres.txt").open("w") as f: - f.write(results.by_res_data[0].header_line()) - for res_line in results.by_res_data: + f.write(results.by_res_scores[0].header_line()) + for res_line in results.by_res_scores: f.write(res_line.to_formatted_line()) with Path(f"{output_prefix}.pml").open("w") as f: @@ -1647,14 +1649,14 @@ def main( else: # Print summary to stdout print("#" * 90 + "\n# Summary\n" + "#" * 90) - print("\n" + SummaryResult.header_line(), end="") - for summary in results.summary_data: + print("\n" + ChainPairScoreResults.header_line(), end="") + for summary in results.chain_pair_scores: print(summary.to_formatted_line(), end="") if summary.Type == "max": print() print("#" * 90 + "\n# Per-residue scores\n" + "#" * 90) - print(results.by_res_data[0].header_line()) - print("".join(x.to_formatted_line() for x in results.by_res_data)) + print(results.by_res_scores[0].header_line()) + print("".join(x.to_formatted_line() for x in results.by_res_scores)) print("#" * 90 + "\n# PyMOL script\n" + "#" * 90) print("".join(results.pymol_script))