From ff07df031ae5b57b20c683b79bab2a807418f0cd Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 12:17:00 -0700 Subject: [PATCH 1/3] updating --- .gitmodules | 3 ++ .../mass-spec-package | 1 + metabolomics_spectrum_resolver/parsing.py | 32 ++++++++++++++++++- test/test_unit.py | 4 +++ 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 .gitmodules create mode 160000 metabolomics_spectrum_resolver/mass-spec-package diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e8a8b6e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "metabolomics_spectrum_resolver/mass-spec-package"] + path = metabolomics_spectrum_resolver/mass-spec-package + url = https://github.com/AkJay1722/mass-spec-package.git diff --git a/metabolomics_spectrum_resolver/mass-spec-package b/metabolomics_spectrum_resolver/mass-spec-package new file mode 160000 index 0000000..ac5f476 --- /dev/null +++ b/metabolomics_spectrum_resolver/mass-spec-package @@ -0,0 +1 @@ +Subproject commit ac5f4766e6debaa206e210cf9f62ea29e4d89c05 diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 44b8b98..2a85e4c 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -12,6 +12,9 @@ from metabolomics_spectrum_resolver.error import UsiError +from metabolomics_spectrum_resolver.zenodo_mzml_repo import mzml_repo + + timeout = 45 # seconds MS2LDA_SERVER = "http://ms2lda.org/basicviz/" @@ -46,7 +49,7 @@ # collection identifier # Unofficial proteomics spectral library identifier: MASSIVEKB # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB, MTBLS, ST - r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|)" + r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|ZENODO-\d+|)" # msRun identifier r":(.*)" # index flag @@ -136,6 +139,8 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: spectrum, source_link = _parse_metabolomics_workbench(usi) elif collection.startswith("tinymass"): spectrum, source_link = _parse_tinymass(usi) + elif collection.startswith("zenodo"): + spectrum, source_link = _parse_zenodo(usi) else: raise UsiError(f"Unknown USI collection: {match.group(1)}", 400) splash_key = splash_builder.splash( @@ -473,6 +478,31 @@ def _parse_gnps2_dataset(usi: str) -> Tuple[sus.MsmsSpectrum, str]: except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): raise UsiError("Unknown GNPS2 Dataset USI", 404) +# parsing from Zenodo +def _parse_zenodo(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + zenodo_id = match.group(1).split("-")[-1] + filename = match.group(2) + index_flag = match.group(3) + if index_flag.lower() == "scan": + scan = match.group(4) + + zenodo_obj = mzml_repo(zenodo_id) + zenodo_obj.partial_indexing = False + scan_obj = zenodo_obj.get_scan(filename, int(scan)) + + # get peaks + intensity_list = scan_obj["intensities"] + mz_list = scan_obj["mz"] + charge = scan_obj["charge"] + precursor_mz = 0 + + source_link = f"https://zenodo.org/record/{zenodo_id}" + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz_list, intensity_list) + + return spectrum, source_link + # Parse TINYMASS task spectra def _parse_tinymass(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) diff --git a/test/test_unit.py b/test/test_unit.py index 98782bf..1d303b9 100644 --- a/test/test_unit.py +++ b/test/test_unit.py @@ -219,6 +219,10 @@ def test_parse_motifdb(): assert exc_info.value.error_code == 404 +# def test_zenodo(): +# usi = "" +# spectrum, _, splash_key = parsing.parse_usi(usi) + def test_parse_timeout(): with unittest.mock.patch( "metabolomics_spectrum_resolver.parsing.requests.get", From c274e9c6c82d1126b8455a07ef527dafc1ee39a1 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 12:20:38 -0700 Subject: [PATCH 2/3] updating --- test/test_unit.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test/test_unit.py b/test/test_unit.py index 1d303b9..91b402b 100644 --- a/test/test_unit.py +++ b/test/test_unit.py @@ -218,11 +218,6 @@ def test_parse_motifdb(): parsing.parse_usi(usi.replace(":171163", ":this_index_does_not_exist")) assert exc_info.value.error_code == 404 - -# def test_zenodo(): -# usi = "" -# spectrum, _, splash_key = parsing.parse_usi(usi) - def test_parse_timeout(): with unittest.mock.patch( "metabolomics_spectrum_resolver.parsing.requests.get", From a7c794650218c75348d74cec50b0124aa487b590 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 15:17:29 -0700 Subject: [PATCH 3/3] bug fix for charge --- metabolomics_spectrum_resolver/mass-spec-package | 2 +- metabolomics_spectrum_resolver/parsing.py | 12 +++++++++++- .../templates/homepage.html | 6 ++++++ .../templates/minimal.html | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/metabolomics_spectrum_resolver/mass-spec-package b/metabolomics_spectrum_resolver/mass-spec-package index ac5f476..c1cc52a 160000 --- a/metabolomics_spectrum_resolver/mass-spec-package +++ b/metabolomics_spectrum_resolver/mass-spec-package @@ -1 +1 @@ -Subproject commit ac5f4766e6debaa206e210cf9f62ea29e4d89c05 +Subproject commit c1cc52a73645122d2eaac3ba52b1eeb346c36dc6 diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 2a85e4c..0ae35ca 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -495,7 +495,17 @@ def _parse_zenodo(usi: str) -> Tuple[sus.MsmsSpectrum, str]: intensity_list = scan_obj["intensities"] mz_list = scan_obj["mz"] charge = scan_obj["charge"] - precursor_mz = 0 + precursor_mz = scan_obj["precursor_mz"] + + try: + charge = int(charge) + except: + charge = 0 + + try: + precursor_mz = float(precursor_mz) + except: + precursor_mz = 0 source_link = f"https://zenodo.org/record/{zenodo_id}" diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 4efe0e0..6abe738 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -141,6 +141,12 @@

+ +
diff --git a/metabolomics_spectrum_resolver/templates/minimal.html b/metabolomics_spectrum_resolver/templates/minimal.html index e9bc625..a6be389 100644 --- a/metabolomics_spectrum_resolver/templates/minimal.html +++ b/metabolomics_spectrum_resolver/templates/minimal.html @@ -50,7 +50,7 @@