[stability-pipeline] bugfix postprocessing with multiple chains + rem…

…ove prints
KULL-Centre · Sep 5, 2022 · a6523b7 · a6523b7
1 parent ce3e1cc
commit a6523b7
Show file tree

Hide file tree

Showing 5 changed files with 17 additions and 9 deletions.
diff --git a/software/rosetta_ddG_pipeline/helper.py b/software/rosetta_ddG_pipeline/helper.py
@@ -601,7 +601,12 @@ def generate_output(folder, output_name='ddG.out', sys_name='', version=1, prism
     seqdicfile = os.path.join(folder.prepare_checking, 'structure_input.json')
     with open(seqdicfile, 'r') as fp:
         sec_all = json.load(fp)
-        rosetta_seq = sec_all['strucdata'][chain_id][0]
+        seqss = ''
+        for ind, cha in enumerate(sec_all['strucdata'].keys()):
+            seqss = seqss + sec_all['strucdata'][cha][0]
+            if cha == chain_id:
+                break
+        rosetta_seq = seqss#sec_all['strucdata'][chain_id][0]
         sequence_pdbnbr = sec_all['strucdata'][chain_id][2]
         seqdic = sec_all['resdata']
         minkey = min(sec_all['resdata_reverse'], key=sec_all['resdata_reverse'].get)

diff --git a/software/rosetta_ddG_pipeline/mp_prepare.py b/software/rosetta_ddG_pipeline/mp_prepare.py
@@ -444,20 +444,16 @@ def calc_deepTMHMM(fasta_file, tmp_output_path):
                 fp.write('manual-generated spanfile from DeepTMHMM\n')
                 if does_repeat == True:
                     fp.write(f'{num_span*num_repeats} {int(total_length)*(num_repeats-1)}\n')
-                    print(f'{num_span*num_repeats} {int(total_length)*(num_repeats-1)}\n')
                 else:
                     fp.write(f'{num_span} {int(total_length)}\n')
-                    print(f'{num_span} {int(total_length)}\n')
                 fp.write(f'{order}\n')
                 fp.write('n2c\n')
                 for index, row in TM_df.iterrows():
                     fp.write(f"\t\t{row['start']+res_dic[0]-1}\t{row['end']+res_dic[0]-1}\n")
-                    print(f"{row['start']}+{res_dic[0]}-1\t{row['end']}+{res_dic[0]}-1\n")
                 if does_repeat == True:
                     for reps in range(1, num_repeats-1):
                         for index, row in TM_df.iterrows():
                             fp.write(f"\t\t{row['start']+res_dic[0]-1+(len_repeats*reps)}\t{row['end']+res_dic[0]-1+(len_repeats*reps)}\n")
-                            print(f"\t\t{row['start']+res_dic[0]-1+(len_repeats*reps)}\t{row['end']+res_dic[0]-1+(len_repeats*reps)}\n")
 
             spanfiles.append(span_file)
     print("Span process done")

diff --git a/software/rosetta_ddG_pipeline/parse_cartesian_functions.py b/software/rosetta_ddG_pipeline/parse_cartesian_functions.py
@@ -3,13 +3,17 @@
 import os
 import json
 
-def rosetta_cartesian_read(pathtofile, protein_seq='abcd'):
+def rosetta_cartesian_read(pathtofile, protein_seq='abcd', struc_dat=''):
     """This script takes the individual score files in the run folder and outputs a dictionary of dGs"""
 
     score_file = open(pathtofile, "r")
     score_data = score_file.readlines()
     score_file.close()
 
+    if struc_dat!='':
+        with open(struc_dat) as json_file:
+            strucdata = json.load(json_file)
+
     aminocodes = {
         "ALA": "A",
         "CYS": "C",
@@ -43,7 +47,10 @@ def rosetta_cartesian_read(pathtofile, protein_seq='abcd'):
             one_letter = aminocodes[three_letter_code]
             res_number = description[:-3]
             dg = float(score_fields[3])
-            key.append(protein_seq[int(res_number) - 1] + res_number + one_letter)
+            if struc_dat=='':
+                key.append(protein_seq[int(res_number) - 1] + res_number + one_letter)
+            else:
+                key.append(strucdata['resdata'][str(res_number)][0] + res_number + one_letter)
         key = ":".join(key)
         if key in cartesian_scores:
             cartesian_scores[key].append(dg)

diff --git a/software/rosetta_ddG_pipeline/parse_rosetta_ddgs.py b/software/rosetta_ddG_pipeline/parse_rosetta_ddgs.py
@@ -18,7 +18,7 @@ def parse_rosetta_ddgs(sys_name, chain_id, fasta_seq, ddG_input, ddG_output):
     subprocess.call(shell_command, cwd=path_to_run_folder, shell=True)
 
     rosetta_cartesian_ddgs_dict = ddgs_from_dg(rosetta_cartesian_read(
-        join(path_to_run_folder, rosetta_summary_file), fasta_seq))
+        join(path_to_run_folder, rosetta_summary_file), fasta_seq, struc_dat = ''))
     line = []
     list_keys = list(rosetta_cartesian_ddgs_dict.keys())
     uniprot_numbering_ddgs_dict = {}

diff --git a/software/rosetta_ddG_pipeline/parser_ddg_v2.py b/software/rosetta_ddG_pipeline/parser_ddg_v2.py
@@ -89,7 +89,7 @@ def parse_rosetta_ddgs(sys_name, chain_id, fasta_seq, ddG_run, ddG_output, struc
     subprocess.call(shell_command, cwd=path_to_run_folder, shell=True)
 
     rosetta_cartesian_ddgs_dict, rosetta_cartesian_ddgs_array = ddgs_from_dg(rosetta_cartesian_read(
-        join(path_to_run_folder, rosetta_summary_file), fasta_seq), scale_factor=scale_factor)
+        join(path_to_run_folder, rosetta_summary_file), fasta_seq, struc_dat=structure_input), scale_factor=scale_factor)
 
 
     protein_sequence=fasta_seq