From e227d421bc45093f045425155d71eab5f4cb0aef Mon Sep 17 00:00:00 2001 From: florian-huber <36473328+florian-huber@users.noreply.github.com> Date: Fri, 12 Feb 2021 15:52:00 +0100 Subject: [PATCH 1/4] add mz range attributes --- spec2vec/SpectrumDocument.py | 14 ++++++++++++-- tests/test_spectrum_document.py | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/spec2vec/SpectrumDocument.py b/spec2vec/SpectrumDocument.py index 70c8925..1219d4d 100644 --- a/spec2vec/SpectrumDocument.py +++ b/spec2vec/SpectrumDocument.py @@ -38,7 +38,8 @@ class SpectrumDocument(Document): [100. 150. 200.51] substance1 """ - def __init__(self, spectrum, n_decimals: int = 2): + def __init__(self, spectrum, n_decimals: int = 2, + mz_from: float = 0.0, mz_to: float = 1000.0): """ Parameters @@ -49,16 +50,25 @@ def __init__(self, spectrum, n_decimals: int = 2): Peak positions are converted to strings with n_decimal decimals. The default is 2, which would convert a peak at 100.387 into the word "peak@100.39". + mz_from: + Set lower threshold for m/z values to take into account. + Default is 0.0. + mz_to: + Set upper threshold for m/z values to take into account. + Default is 1000.0. """ self.n_decimals = n_decimals + self.mz_from = mz_from + self.mz_to = mz_to self.weights = None super().__init__(obj=spectrum) self._add_weights() def _make_words(self): """Create word from peaks (and losses).""" + mz_array_selected = self._obj.peaks.mz[(self._obj.peaks.mz >= mz_from) & (self._obj.peaks.mz <= mz_to)] format_string = "{}@{:." + "{}".format(self.n_decimals) + "f}" - peak_words = [format_string.format("peak", mz) for mz in self._obj.peaks.mz] + peak_words = [format_string.format("peak", mz) for mz in mz_array_selected] if self._obj.losses is not None: loss_words = [format_string.format("loss", mz) for mz in self._obj.losses.mz] else: diff --git a/tests/test_spectrum_document.py b/tests/test_spectrum_document.py index 67624cd..4fd732f 100644 --- a/tests/test_spectrum_document.py +++ b/tests/test_spectrum_document.py @@ -54,6 +54,22 @@ def test_spectrum_document_init_default_with_losses(): assert next(spectrum_document) == "peak@10.00" +def test_spectrum_document_init_default_peaks_outside_mz_range(): + """Use default n_decimal and test if peaks outside mz_range are excluded.""" + mz = numpy.array([310, 320, 330, 540], dtype="float") + intensities = numpy.array([1, 0.01, 0.1, 1], dtype="float") + metadata = dict(precursor_mz=100.0) + spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata) + spectrum_document = SpectrumDocument(spectrum, mz_to=500.0) + + assert spectrum_document.n_decimals == 2, "Expected different default for n_decimals" + assert len(spectrum_document) == 3 + assert spectrum_document.words == [ + "peak@310.00", "peak@320.00", "peak@330.00" + ] + assert next(spectrum_document) == "peak@10.00" + + def test_spectrum_document_init_n_decimals_1(): """Use n_decimal=1 and add losses.""" mz = numpy.array([10, 20, 30, 40], dtype="float") From 29047e6bc77d10dfc1054cbd0ad90c9f1b6660af Mon Sep 17 00:00:00 2001 From: florian-huber <36473328+florian-huber@users.noreply.github.com> Date: Fri, 12 Feb 2021 16:09:53 +0100 Subject: [PATCH 2/4] fix self. --- spec2vec/SpectrumDocument.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec2vec/SpectrumDocument.py b/spec2vec/SpectrumDocument.py index 1219d4d..6669137 100644 --- a/spec2vec/SpectrumDocument.py +++ b/spec2vec/SpectrumDocument.py @@ -66,7 +66,7 @@ def __init__(self, spectrum, n_decimals: int = 2, def _make_words(self): """Create word from peaks (and losses).""" - mz_array_selected = self._obj.peaks.mz[(self._obj.peaks.mz >= mz_from) & (self._obj.peaks.mz <= mz_to)] + mz_array_selected = self._obj.peaks.mz[(self._obj.peaks.mz >= self.mz_from) & (self._obj.peaks.mz <= self.mz_to)] format_string = "{}@{:." + "{}".format(self.n_decimals) + "f}" peak_words = [format_string.format("peak", mz) for mz in mz_array_selected] if self._obj.losses is not None: From 9fda32ca8200fb255a070c261feab4a81e501c25 Mon Sep 17 00:00:00 2001 From: florian-huber <36473328+florian-huber@users.noreply.github.com> Date: Fri, 12 Feb 2021 16:14:18 +0100 Subject: [PATCH 3/4] fix test --- tests/test_spectrum_document.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_spectrum_document.py b/tests/test_spectrum_document.py index 4fd732f..ab2b4b4 100644 --- a/tests/test_spectrum_document.py +++ b/tests/test_spectrum_document.py @@ -60,7 +60,7 @@ def test_spectrum_document_init_default_peaks_outside_mz_range(): intensities = numpy.array([1, 0.01, 0.1, 1], dtype="float") metadata = dict(precursor_mz=100.0) spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata) - spectrum_document = SpectrumDocument(spectrum, mz_to=500.0) + spectrum_document = SpectrumDocument(spectrum_in, mz_to=500.0) assert spectrum_document.n_decimals == 2, "Expected different default for n_decimals" assert len(spectrum_document) == 3 From 30512007e475e8c2f24686574e5584004739d222 Mon Sep 17 00:00:00 2001 From: florian-huber <36473328+florian-huber@users.noreply.github.com> Date: Fri, 12 Feb 2021 16:20:54 +0100 Subject: [PATCH 4/4] fix test --- tests/test_spectrum_document.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_spectrum_document.py b/tests/test_spectrum_document.py index ab2b4b4..80eaed5 100644 --- a/tests/test_spectrum_document.py +++ b/tests/test_spectrum_document.py @@ -67,7 +67,7 @@ def test_spectrum_document_init_default_peaks_outside_mz_range(): assert spectrum_document.words == [ "peak@310.00", "peak@320.00", "peak@330.00" ] - assert next(spectrum_document) == "peak@10.00" + assert next(spectrum_document) == "peak@310.00" def test_spectrum_document_init_n_decimals_1():