diff --git a/spec2vec/SpectrumDocument.py b/spec2vec/SpectrumDocument.py index 70c8925..6669137 100644 --- a/spec2vec/SpectrumDocument.py +++ b/spec2vec/SpectrumDocument.py @@ -38,7 +38,8 @@ class SpectrumDocument(Document): [100. 150. 200.51] substance1 """ - def __init__(self, spectrum, n_decimals: int = 2): + def __init__(self, spectrum, n_decimals: int = 2, + mz_from: float = 0.0, mz_to: float = 1000.0): """ Parameters @@ -49,16 +50,25 @@ def __init__(self, spectrum, n_decimals: int = 2): Peak positions are converted to strings with n_decimal decimals. The default is 2, which would convert a peak at 100.387 into the word "peak@100.39". + mz_from: + Set lower threshold for m/z values to take into account. + Default is 0.0. + mz_to: + Set upper threshold for m/z values to take into account. + Default is 1000.0. """ self.n_decimals = n_decimals + self.mz_from = mz_from + self.mz_to = mz_to self.weights = None super().__init__(obj=spectrum) self._add_weights() def _make_words(self): """Create word from peaks (and losses).""" + mz_array_selected = self._obj.peaks.mz[(self._obj.peaks.mz >= self.mz_from) & (self._obj.peaks.mz <= self.mz_to)] format_string = "{}@{:." + "{}".format(self.n_decimals) + "f}" - peak_words = [format_string.format("peak", mz) for mz in self._obj.peaks.mz] + peak_words = [format_string.format("peak", mz) for mz in mz_array_selected] if self._obj.losses is not None: loss_words = [format_string.format("loss", mz) for mz in self._obj.losses.mz] else: diff --git a/tests/test_spectrum_document.py b/tests/test_spectrum_document.py index 67624cd..80eaed5 100644 --- a/tests/test_spectrum_document.py +++ b/tests/test_spectrum_document.py @@ -54,6 +54,22 @@ def test_spectrum_document_init_default_with_losses(): assert next(spectrum_document) == "peak@10.00" +def test_spectrum_document_init_default_peaks_outside_mz_range(): + """Use default n_decimal and test if peaks outside mz_range are excluded.""" + mz = numpy.array([310, 320, 330, 540], dtype="float") + intensities = numpy.array([1, 0.01, 0.1, 1], dtype="float") + metadata = dict(precursor_mz=100.0) + spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata) + spectrum_document = SpectrumDocument(spectrum_in, mz_to=500.0) + + assert spectrum_document.n_decimals == 2, "Expected different default for n_decimals" + assert len(spectrum_document) == 3 + assert spectrum_document.words == [ + "peak@310.00", "peak@320.00", "peak@330.00" + ] + assert next(spectrum_document) == "peak@310.00" + + def test_spectrum_document_init_n_decimals_1(): """Use n_decimal=1 and add losses.""" mz = numpy.array([10, 20, 30, 40], dtype="float")