Skip to content

Commit

Permalink
[stability-pipeline] bugfix postprocessing with multiple chains + rem…
Browse files Browse the repository at this point in the history
…ove prints
  • Loading branch information
j0kaso committed Sep 5, 2022
1 parent ce3e1cc commit a6523b7
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 9 deletions.
7 changes: 6 additions & 1 deletion software/rosetta_ddG_pipeline/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,12 @@ def generate_output(folder, output_name='ddG.out', sys_name='', version=1, prism
seqdicfile = os.path.join(folder.prepare_checking, 'structure_input.json')
with open(seqdicfile, 'r') as fp:
sec_all = json.load(fp)
rosetta_seq = sec_all['strucdata'][chain_id][0]
seqss = ''
for ind, cha in enumerate(sec_all['strucdata'].keys()):
seqss = seqss + sec_all['strucdata'][cha][0]
if cha == chain_id:
break
rosetta_seq = seqss#sec_all['strucdata'][chain_id][0]
sequence_pdbnbr = sec_all['strucdata'][chain_id][2]
seqdic = sec_all['resdata']
minkey = min(sec_all['resdata_reverse'], key=sec_all['resdata_reverse'].get)
Expand Down
4 changes: 0 additions & 4 deletions software/rosetta_ddG_pipeline/mp_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,20 +444,16 @@ def calc_deepTMHMM(fasta_file, tmp_output_path):
fp.write('manual-generated spanfile from DeepTMHMM\n')
if does_repeat == True:
fp.write(f'{num_span*num_repeats} {int(total_length)*(num_repeats-1)}\n')
print(f'{num_span*num_repeats} {int(total_length)*(num_repeats-1)}\n')
else:
fp.write(f'{num_span} {int(total_length)}\n')
print(f'{num_span} {int(total_length)}\n')
fp.write(f'{order}\n')
fp.write('n2c\n')
for index, row in TM_df.iterrows():
fp.write(f"\t\t{row['start']+res_dic[0]-1}\t{row['end']+res_dic[0]-1}\n")
print(f"{row['start']}+{res_dic[0]}-1\t{row['end']}+{res_dic[0]}-1\n")
if does_repeat == True:
for reps in range(1, num_repeats-1):
for index, row in TM_df.iterrows():
fp.write(f"\t\t{row['start']+res_dic[0]-1+(len_repeats*reps)}\t{row['end']+res_dic[0]-1+(len_repeats*reps)}\n")
print(f"\t\t{row['start']+res_dic[0]-1+(len_repeats*reps)}\t{row['end']+res_dic[0]-1+(len_repeats*reps)}\n")

spanfiles.append(span_file)
print("Span process done")
Expand Down
11 changes: 9 additions & 2 deletions software/rosetta_ddG_pipeline/parse_cartesian_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
import os
import json

def rosetta_cartesian_read(pathtofile, protein_seq='abcd'):
def rosetta_cartesian_read(pathtofile, protein_seq='abcd', struc_dat=''):
"""This script takes the individual score files in the run folder and outputs a dictionary of dGs"""

score_file = open(pathtofile, "r")
score_data = score_file.readlines()
score_file.close()

if struc_dat!='':
with open(struc_dat) as json_file:
strucdata = json.load(json_file)

aminocodes = {
"ALA": "A",
"CYS": "C",
Expand Down Expand Up @@ -43,7 +47,10 @@ def rosetta_cartesian_read(pathtofile, protein_seq='abcd'):
one_letter = aminocodes[three_letter_code]
res_number = description[:-3]
dg = float(score_fields[3])
key.append(protein_seq[int(res_number) - 1] + res_number + one_letter)
if struc_dat=='':
key.append(protein_seq[int(res_number) - 1] + res_number + one_letter)
else:
key.append(strucdata['resdata'][str(res_number)][0] + res_number + one_letter)
key = ":".join(key)
if key in cartesian_scores:
cartesian_scores[key].append(dg)
Expand Down
2 changes: 1 addition & 1 deletion software/rosetta_ddG_pipeline/parse_rosetta_ddgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def parse_rosetta_ddgs(sys_name, chain_id, fasta_seq, ddG_input, ddG_output):
subprocess.call(shell_command, cwd=path_to_run_folder, shell=True)

rosetta_cartesian_ddgs_dict = ddgs_from_dg(rosetta_cartesian_read(
join(path_to_run_folder, rosetta_summary_file), fasta_seq))
join(path_to_run_folder, rosetta_summary_file), fasta_seq, struc_dat = ''))
line = []
list_keys = list(rosetta_cartesian_ddgs_dict.keys())
uniprot_numbering_ddgs_dict = {}
Expand Down
2 changes: 1 addition & 1 deletion software/rosetta_ddG_pipeline/parser_ddg_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def parse_rosetta_ddgs(sys_name, chain_id, fasta_seq, ddG_run, ddG_output, struc
subprocess.call(shell_command, cwd=path_to_run_folder, shell=True)

rosetta_cartesian_ddgs_dict, rosetta_cartesian_ddgs_array = ddgs_from_dg(rosetta_cartesian_read(
join(path_to_run_folder, rosetta_summary_file), fasta_seq), scale_factor=scale_factor)
join(path_to_run_folder, rosetta_summary_file), fasta_seq, struc_dat=structure_input), scale_factor=scale_factor)


protein_sequence=fasta_seq
Expand Down

0 comments on commit a6523b7

Please sign in to comment.