Skip to content

Commit

Permalink
[mp-stability-pipeline] add LAR and TMspan to prism files
Browse files Browse the repository at this point in the history
  • Loading branch information
j0kaso committed Jun 7, 2021
1 parent 6439ea4 commit 38e7626
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 4 deletions.
15 changes: 12 additions & 3 deletions software/rosetta_ddG_pipeline/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,12 @@ def generate_emission_stats(test_dir):
def generate_output(folder, output_name='ddG.out', sys_name='', version=1, prism_nr='XXX', chain_id='A', output_gaps=False, bfac=True, zip_files=True, sha_tag='', MP=False):
# generate emission stats
generate_emission_stats(folder.output[:-7])
if MP:
span_file = glob.glob(os.path.join(folder.prepare_checking[:-9], 'mp_files', 'membrane_span', '*.span'))[0]
lipid_file = glob.glob(os.path.join(folder.prepare_checking[:-9], 'mp_files', 'mp_lipid_acc', '*.json'))[0]
else:
span_file = ''
lipid_file = ''

ddg_file = os.path.join(folder.ddG_run, output_name)
pdb_file_raw = os.path.join(folder.ddG_input, 'input.pdb')
Expand Down Expand Up @@ -565,7 +571,8 @@ def generate_output(folder, output_name='ddG.out', sys_name='', version=1, prism
ddG_postprocessing(ddg_file, ddg_sorted_file, sec_all=None, startnr=1, chain_id=chain_id)
prism_file = os.path.join(folder.ddG_output, f'prism_rosetta_{prism_nr}_{sys_name}.txt')
rosetta_to_prism(ddg_sorted_file, prism_file, rosetta_seq, rosetta_info=None,
version=version, sys_name=sys_name, first_residue_number=1, sha_tag=sha_tag, MP=MP)
version=version, sys_name=sys_name, first_residue_number=1, sha_tag=sha_tag, MP=MP,
span_file=span_file, lipid_file=lipid_file)
create_copy(prism_file, folder.output)
create_copy(pdb_file, folder.output, name=f'{sys_name}_final.pdb')

Expand All @@ -583,7 +590,8 @@ def generate_output(folder, output_name='ddG.out', sys_name='', version=1, prism
prism_gap_shifted_file = os.path.join(folder.ddG_output, f'prism_rosetta_{prism_nr}_{sys_name}_gap-shifted.txt')
ddG_postprocessing(ddg_file, ddg_shifted_gap_file, sec_all=sec_all, startnr=first_residue_number, chain_id=chain_id)
rosetta_to_prism(ddg_shifted_gap_file, prism_gap_shifted_file, sequence, rosetta_info=None,
version=version, sys_name=sys_name, first_residue_number=first_residue_number, sha_tag=sha_tag, MP=MP)
version=version, sys_name=sys_name, first_residue_number=first_residue_number, sha_tag=sha_tag, MP=MP,
span_file=span_file, lipid_file=lipid_file)
create_copy(prism_gap_shifted_file, folder.output)

pdb_gap_shifted_file = os.path.join(folder.ddG_output, 'relaxed_gap_shifted.pdb')
Expand All @@ -595,7 +603,8 @@ def generate_output(folder, output_name='ddG.out', sys_name='', version=1, prism
ddG_postprocessing(ddg_file, ddg_gap_file, sec_all=sec_all, startnr=1, chain_id=chain_id)
prism_gap_file = os.path.join(folder.ddG_output, f'prism_rosetta_{prism_nr}_{sys_name}-gap.txt')
rosetta_to_prism(ddg_gap_file, prism_gap_file, sequence, rosetta_info=None,
version=version, sys_name=sys_name, first_residue_number=1, sha_tag=sha_tag, MP=MP)
version=version, sys_name=sys_name, first_residue_number=1, sha_tag=sha_tag, MP=MP,
span_file=span_file, lipid_file=lipid_file)
create_copy(prism_gap_file, folder.output)

pdb_gap_file = os.path.join(folder.ddG_output, 'relaxed_gap.pdb')
Expand Down
38 changes: 37 additions & 1 deletion software/rosetta_ddG_pipeline/prism_rosetta_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

# Standard library imports
from datetime import datetime
import json
import logging as logger
import re
import subprocess
Expand All @@ -24,7 +25,17 @@
from PrismData import PrismParser, VariantData


def rosetta_to_prism(ddg_file, prism_file, sequence, rosetta_info=None, version=1, sys_name='', first_residue_number=1, sha_tag='', MP=False):
def span_multi(x, region):
out_tms = []
for resi in x:
if int(resi) in region:
out_tms.append('True')
else:
out_tms.append('False')
return ":".join(out_tms)

def rosetta_to_prism(ddg_file, prism_file, sequence, rosetta_info=None, version=1, sys_name='',
first_residue_number=1, sha_tag='', MP=False, span_file='', lipid_file=''):

sequence = sequence.replace('-', 'X')
# create prism file with rosetta values
Expand Down Expand Up @@ -53,6 +64,26 @@ def rosetta_to_prism(ddg_file, prism_file, sequence, rosetta_info=None, version=
}
dataframeset = pd.DataFrame(data)

if span_file!='':
TM_regions = []
with open(span_file) as fp:
next(fp)
next(fp)
next(fp)
next(fp)
for line in fp:
line = line.strip().split()
TM_regions += range(int(line[0]), int(line[1])+1)
dataframeset['TMspan'] = dataframeset['resi'].apply(lambda x: span_multi(x, TM_regions))

if lipid_file!='':
with open(lipid_file, 'r') as fp:
data = json.load(fp)
lipid_df = pd.DataFrame.from_dict( data, orient='index', columns=['LAR']).reset_index(drop=False)#.T.set_index('index')
lipid_df = lipid_df.loc[lipid_df['LAR']=='true'].reset_index(drop=True)
lipid_df = lipid_df['index'].astype(int).unique()
dataframeset['LAR'] = dataframeset['resi'].apply(lambda x: span_multi(x, lipid_df))

sha = sha_tag.split('tag')[0]
tag = sha_tag.split('tag')[1]

Expand Down Expand Up @@ -85,6 +116,11 @@ def rosetta_to_prism(ddg_file, prism_file, sequence, rosetta_info=None, version=
"std_ddG": f"std Rosetta ddG values (std((MUT-mean(WT)){units})",
},
}
if span_file!='':
metadata['columns']['TMspan'] = 'Residue within the TM region defined by the Rosetta span file'
if lipid_file!='':
metadata['columns']['LAR'] = 'Lipid accessible residue defined by Rosetta'

if first_residue_number != 1:
metadata['protein']['first_residue_number'] = first_residue_number

Expand Down

0 comments on commit 38e7626

Please sign in to comment.