diff --git a/tools/peptide_statistics_hpp/peptide_protein_cosine.py b/tools/peptide_statistics_hpp/peptide_protein_cosine.py index 28aee26..e1e95a3 100755 --- a/tools/peptide_statistics_hpp/peptide_protein_cosine.py +++ b/tools/peptide_statistics_hpp/peptide_protein_cosine.py @@ -8,7 +8,7 @@ from itertools import chain import explorer_export import read_mappings -from python_ms_utilities import mapping, resources, fdr, proteosafe +from python_ms_utilities import processing, mapping, resources, fdr, proteosafe import pandas as pd from datetime import datetime import requests @@ -96,21 +96,6 @@ def integer_mod_mass(sequence): mods = [int(round(float(m))) if i == 0 else -int(round(float(m))) for ms in mods.split('+') for i,m in enumerate(ms.split('-')) if m != '' ] return sum(mods) -def add_brackets(pep): - aa_breakpoints = [] - pep = pep.replace('+','[+').replace('-','[-') - for i,aa in enumerate(pep[1:]): - if not pep[i-1].isalpha() and pep[i].isalpha(): - aa_breakpoints.append(i) - elif not pep[i].isalpha() and i == (len(pep)-2): - aa_breakpoints.append(i+2) - for i,breakpoint in enumerate(reversed(aa_breakpoints)): - end_bracket = ']' - if i == len(aa_breakpoints)-1 and pep[0] == '[': - end_bracket = ']-' - pep = pep[:breakpoint] + end_bracket + pep[breakpoint:] - return pep - protein_type = lambda protein, proteome: 'Contaminant' if proteome.proteins[protein].db == 'con' else ('TrEMBL' if proteome.proteins[protein].db == 'tr' else ('Canonical' if proteome.proteins[protein].iso == None else 'Isoform')) def msv_to_pxd(msv, msv_mapping): @@ -126,7 +111,7 @@ def correct_usi(usi_input, msv_mapping): else: split_usi = usi_input.split(':') split_usi[1] = msv_to_pxd(split_usi[1], msv_mapping) - split_usi[5] = '/'.join([add_brackets(split_usi[5].split('/')[0]),split_usi[5].split('/')[1]]) + split_usi[5] = '/'.join([processing.inspect_to_proforma(split_usi[5].split('/')[0]),split_usi[5].split('/')[1]]) return ':'.join(split_usi) except: return ''