From f1260605b7bcebe3989958c0c7a9cef4f08b2117 Mon Sep 17 00:00:00 2001 From: benpullman Date: Mon, 22 Nov 2021 15:48:34 -0800 Subject: [PATCH 1/2] initial read mgf --- .../cosine_to_synthetics.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tools/peptide_statistics_hpp/cosine_to_synthetics.py b/tools/peptide_statistics_hpp/cosine_to_synthetics.py index 51f2128..7bdf387 100644 --- a/tools/peptide_statistics_hpp/cosine_to_synthetics.py +++ b/tools/peptide_statistics_hpp/cosine_to_synthetics.py @@ -82,6 +82,17 @@ def read_mzxml_spectrum(spectrum): None ) +def read_mgf_spectrum(spectrum): + peaks = [processing.Peak(float(p[0]),float(p[1])) for p in zip(spectrum['m/z array'],spectrum['intensity array'])] + precursor = float(spectrum['params']['pepmass'][0]) + return processing.Spectrum( + peaks, + precursor, + None, + spectrum['params']['title'], + None + ) + def get_mzxml_spectrum(mzxml_object, scan): #thermo only for now spectrum = mzxml_object.get_by_id(scan) @@ -271,14 +282,19 @@ def main(): get_spectrum_func = None read_scan = None - if len(psms_to_consider[filename]) >= min_spectra_to_load_file: + read_full_file = len(psms_to_consider[filename]) >= min_spectra_to_load_file or exts[0] == '.mgf' + + if read_full_file: if exts[0] == '.mzML': file = open(filepath, 'rb') file_object = mzml.MzML(file) get_spectrum_func = read_mzml_spectrum read_scan = lambda s: str(s['id'].split('scan=')[1]) if exts[0] == '.mgf': - pass + file = open(filepath, 'r') + file_object = mgf.MGF(file) + get_spectrum_func = read_mgf_spectrum + read_scan = lambda s: s['params']['scans'] if exts[0] == '.mzXML': file = open(filepath, 'rb') file_object = mzxml.MzXML(file) @@ -289,8 +305,6 @@ def main(): file = open(filepath, 'rb') file_object = mzml.PreIndexedMzML(file) get_spectrum_func = get_mzml_spectrum - if exts[0] == '.mgf': - pass if exts[0] == '.mzXML': file = open(filepath, 'rb') file_object = mzxml.MzXML(file,use_index = True) @@ -299,7 +313,7 @@ def main(): if file: if file_object and get_spectrum_func: print("{}: Opened file {}, ready to load".format(datetime.now().strftime("%H:%M:%S"),filename)) - if len(psms_to_consider[filename]) >= min_spectra_to_load_file: + if read_full_file: file_cosine_to_synthetic, file_explained_intensity_per_spectrum = process_spectrum_read_file(psms_to_consider,filename,synthetic_scans,tol,args.low_mass_filter,args.min_snr,threshold,file_object,get_spectrum_func,read_scan) else: file_cosine_to_synthetic, file_explained_intensity_per_spectrum = process_spectrum(psms_to_consider,filename,synthetic_scans,tol,args.low_mass_filter,args.min_snr,threshold,file_object,get_spectrum_func) From 4d10cf6a23d3a96735b921459b008e485e132b94 Mon Sep 17 00:00:00 2001 From: benpullman Date: Tue, 23 Nov 2021 12:57:47 -0800 Subject: [PATCH 2/2] fix reading of task files --- tools/peptide_statistics_hpp/cosine_to_synthetics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/peptide_statistics_hpp/cosine_to_synthetics.py b/tools/peptide_statistics_hpp/cosine_to_synthetics.py index 7bdf387..4256bf2 100644 --- a/tools/peptide_statistics_hpp/cosine_to_synthetics.py +++ b/tools/peptide_statistics_hpp/cosine_to_synthetics.py @@ -89,7 +89,7 @@ def read_mgf_spectrum(spectrum): peaks, precursor, None, - spectrum['params']['title'], + "scan={}".format(spectrum['params']['scans']), None ) @@ -242,6 +242,8 @@ def main(): elif 'f.' in filename: # checking uploads directory filepath = filename.replace('f.','/data/ccms-data/uploads/') + if not Path(filepath).exists(): + filepath = filename.replace('f.','/data/ccms-data/tasks/') else: filepath = filename