diff --git a/software/rosetta_ddG_pipeline/helper.py b/software/rosetta_ddG_pipeline/helper.py index ed50b18..515f47d 100644 --- a/software/rosetta_ddG_pipeline/helper.py +++ b/software/rosetta_ddG_pipeline/helper.py @@ -601,7 +601,12 @@ def generate_output(folder, output_name='ddG.out', sys_name='', version=1, prism seqdicfile = os.path.join(folder.prepare_checking, 'structure_input.json') with open(seqdicfile, 'r') as fp: sec_all = json.load(fp) - rosetta_seq = sec_all['strucdata'][chain_id][0] + seqss = '' + for ind, cha in enumerate(sec_all['strucdata'].keys()): + seqss = seqss + sec_all['strucdata'][cha][0] + if cha == chain_id: + break + rosetta_seq = seqss#sec_all['strucdata'][chain_id][0] sequence_pdbnbr = sec_all['strucdata'][chain_id][2] seqdic = sec_all['resdata'] minkey = min(sec_all['resdata_reverse'], key=sec_all['resdata_reverse'].get) diff --git a/software/rosetta_ddG_pipeline/mp_prepare.py b/software/rosetta_ddG_pipeline/mp_prepare.py index b3106fa..739983e 100644 --- a/software/rosetta_ddG_pipeline/mp_prepare.py +++ b/software/rosetta_ddG_pipeline/mp_prepare.py @@ -444,20 +444,16 @@ def calc_deepTMHMM(fasta_file, tmp_output_path): fp.write('manual-generated spanfile from DeepTMHMM\n') if does_repeat == True: fp.write(f'{num_span*num_repeats} {int(total_length)*(num_repeats-1)}\n') - print(f'{num_span*num_repeats} {int(total_length)*(num_repeats-1)}\n') else: fp.write(f'{num_span} {int(total_length)}\n') - print(f'{num_span} {int(total_length)}\n') fp.write(f'{order}\n') fp.write('n2c\n') for index, row in TM_df.iterrows(): fp.write(f"\t\t{row['start']+res_dic[0]-1}\t{row['end']+res_dic[0]-1}\n") - print(f"{row['start']}+{res_dic[0]}-1\t{row['end']}+{res_dic[0]}-1\n") if does_repeat == True: for reps in range(1, num_repeats-1): for index, row in TM_df.iterrows(): fp.write(f"\t\t{row['start']+res_dic[0]-1+(len_repeats*reps)}\t{row['end']+res_dic[0]-1+(len_repeats*reps)}\n") - print(f"\t\t{row['start']+res_dic[0]-1+(len_repeats*reps)}\t{row['end']+res_dic[0]-1+(len_repeats*reps)}\n") spanfiles.append(span_file) print("Span process done") diff --git a/software/rosetta_ddG_pipeline/parse_cartesian_functions.py b/software/rosetta_ddG_pipeline/parse_cartesian_functions.py index 1b4b63f..196a2f0 100755 --- a/software/rosetta_ddG_pipeline/parse_cartesian_functions.py +++ b/software/rosetta_ddG_pipeline/parse_cartesian_functions.py @@ -3,13 +3,17 @@ import os import json -def rosetta_cartesian_read(pathtofile, protein_seq='abcd'): +def rosetta_cartesian_read(pathtofile, protein_seq='abcd', struc_dat=''): """This script takes the individual score files in the run folder and outputs a dictionary of dGs""" score_file = open(pathtofile, "r") score_data = score_file.readlines() score_file.close() + if struc_dat!='': + with open(struc_dat) as json_file: + strucdata = json.load(json_file) + aminocodes = { "ALA": "A", "CYS": "C", @@ -43,7 +47,10 @@ def rosetta_cartesian_read(pathtofile, protein_seq='abcd'): one_letter = aminocodes[three_letter_code] res_number = description[:-3] dg = float(score_fields[3]) - key.append(protein_seq[int(res_number) - 1] + res_number + one_letter) + if struc_dat=='': + key.append(protein_seq[int(res_number) - 1] + res_number + one_letter) + else: + key.append(strucdata['resdata'][str(res_number)][0] + res_number + one_letter) key = ":".join(key) if key in cartesian_scores: cartesian_scores[key].append(dg) diff --git a/software/rosetta_ddG_pipeline/parse_rosetta_ddgs.py b/software/rosetta_ddG_pipeline/parse_rosetta_ddgs.py index d306e7e..2de1c6a 100755 --- a/software/rosetta_ddG_pipeline/parse_rosetta_ddgs.py +++ b/software/rosetta_ddG_pipeline/parse_rosetta_ddgs.py @@ -18,7 +18,7 @@ def parse_rosetta_ddgs(sys_name, chain_id, fasta_seq, ddG_input, ddG_output): subprocess.call(shell_command, cwd=path_to_run_folder, shell=True) rosetta_cartesian_ddgs_dict = ddgs_from_dg(rosetta_cartesian_read( - join(path_to_run_folder, rosetta_summary_file), fasta_seq)) + join(path_to_run_folder, rosetta_summary_file), fasta_seq, struc_dat = '')) line = [] list_keys = list(rosetta_cartesian_ddgs_dict.keys()) uniprot_numbering_ddgs_dict = {} diff --git a/software/rosetta_ddG_pipeline/parser_ddg_v2.py b/software/rosetta_ddG_pipeline/parser_ddg_v2.py index deafaf9..65f66ee 100644 --- a/software/rosetta_ddG_pipeline/parser_ddg_v2.py +++ b/software/rosetta_ddG_pipeline/parser_ddg_v2.py @@ -89,7 +89,7 @@ def parse_rosetta_ddgs(sys_name, chain_id, fasta_seq, ddG_run, ddG_output, struc subprocess.call(shell_command, cwd=path_to_run_folder, shell=True) rosetta_cartesian_ddgs_dict, rosetta_cartesian_ddgs_array = ddgs_from_dg(rosetta_cartesian_read( - join(path_to_run_folder, rosetta_summary_file), fasta_seq), scale_factor=scale_factor) + join(path_to_run_folder, rosetta_summary_file), fasta_seq, struc_dat=structure_input), scale_factor=scale_factor) protein_sequence=fasta_seq