diff --git a/README.md b/README.md index bba87b0..257e97a 100644 --- a/README.md +++ b/README.md @@ -16,3 +16,8 @@ When using this code, please cite: [![DOI:10.1016/j.aca.2023.340789](http://img. [cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/ [cc-by-nc-sa-image]: https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png [cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg + +## Project handover package + +A standalone refactor of the retention-model code is available in `chromatogram_handover/`. +It is structured as an installable mini-repository focused on computing chromatograms from retention parameters and gradient programs. diff --git a/chromatogram_handover/.gitignore b/chromatogram_handover/.gitignore new file mode 100644 index 0000000..41c33c8 --- /dev/null +++ b/chromatogram_handover/.gitignore @@ -0,0 +1,3 @@ + +__pycache__/ +*.pyc diff --git a/chromatogram_handover/README.md b/chromatogram_handover/README.md new file mode 100644 index 0000000..b112c3c --- /dev/null +++ b/chromatogram_handover/README.md @@ -0,0 +1,52 @@ +# chromatogram-handover + +This folder is a standalone, handover-friendly Python package extracted from +`rm_code` in `Jimbo994/AutoLC-BO`. + +## Goal + +Make it straightforward to compute chromatograms from: +- retention parameters (`k0`, `S`), and +- gradient programs (`phi`, `time`, `t_init`), + +without navigating the full research repository. + +## Install (editable) + +```bash +pip install -e . +``` + +## Minimal usage + +```python +from chromatogram_handover import ( + GradientProgram, + compute_chromatogram, + load_retention_parameters, +) + +params = load_retention_parameters("../data/RetentionParams_Section4.5.txt") +gradient = GradientProgram( + phi=(0.1, 0.4, 0.8), + time=(0.0, 15.0, 30.0), + t_init=0.5, +) + +chrom = compute_chromatogram( + parameters=params, + gradient=gradient, + t0=0.8, + dwell_time=0.12, + n_theoretical=25000, +) + +print(chrom.retention_times) +print(chrom.peak_widths) +``` + +## Notes for handover + +- `chromatogram_handover.model` contains the refactored multisegment retention model. +- Data classes (`GradientProgram`, `RetentionParameters`, `Chromatogram`) provide an explicit API. +- Input validation is built into the gradient and retention parameter constructors. diff --git a/chromatogram_handover/pyproject.toml b/chromatogram_handover/pyproject.toml new file mode 100644 index 0000000..5d95b4d --- /dev/null +++ b/chromatogram_handover/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "chromatogram-handover" +version = "0.1.0" +description = "Refactored chromatogram simulator extracted from AutoLC-BO rm_code" +readme = "README.md" +requires-python = ">=3.10" +dependencies = ["numpy>=1.24"] + +[tool.pytest.ini_options] +pythonpath = ["src"] +addopts = "-q" diff --git a/chromatogram_handover/src/chromatogram_handover/__init__.py b/chromatogram_handover/src/chromatogram_handover/__init__.py new file mode 100644 index 0000000..0b14fcd --- /dev/null +++ b/chromatogram_handover/src/chromatogram_handover/__init__.py @@ -0,0 +1,17 @@ +from .io import load_retention_parameters +from .model import ( + Chromatogram, + GradientProgram, + RetentionParameters, + compute_chromatogram, + retention_time_and_width, +) + +__all__ = [ + "Chromatogram", + "GradientProgram", + "RetentionParameters", + "compute_chromatogram", + "retention_time_and_width", + "load_retention_parameters", +] diff --git a/chromatogram_handover/src/chromatogram_handover/io.py b/chromatogram_handover/src/chromatogram_handover/io.py new file mode 100644 index 0000000..2c14bf6 --- /dev/null +++ b/chromatogram_handover/src/chromatogram_handover/io.py @@ -0,0 +1,17 @@ +"""I/O helpers for handover workflows.""" + +from __future__ import annotations + +import numpy as np + +from .model import RetentionParameters + + +def load_retention_parameters(path: str) -> RetentionParameters: + """Load two-column text file with k0 and S values.""" + data = np.loadtxt(path, dtype=float) + if data.ndim == 1: + data = np.expand_dims(data, axis=0) + if data.shape[1] != 2: + raise ValueError("expected exactly two columns: k0 and S") + return RetentionParameters(k0=data[:, 0], s=data[:, 1]) diff --git a/chromatogram_handover/src/chromatogram_handover/model.py b/chromatogram_handover/src/chromatogram_handover/model.py new file mode 100644 index 0000000..282a991 --- /dev/null +++ b/chromatogram_handover/src/chromatogram_handover/model.py @@ -0,0 +1,215 @@ +"""Retention-model utilities for predicting chromatograms from gradient programs.""" + +from __future__ import annotations + +from dataclasses import dataclass +from math import exp, log, sqrt +from typing import Iterable + +import numpy as np + + +@dataclass(frozen=True) +class GradientProgram: + """Gradient definition used by the multisegment retention model. + + Times are in minutes and represent gradient turning points relative to the + program start. The first point is the initial composition. + """ + + phi: tuple[float, ...] + time: tuple[float, ...] + t_init: float = 0.0 + + def __post_init__(self) -> None: + if len(self.phi) != len(self.time): + raise ValueError("phi and time must have equal length") + if len(self.phi) < 2: + raise ValueError("gradient requires at least two turning points") + if any(t2 <= t1 for t1, t2 in zip(self.time, self.time[1:])): + raise ValueError("time points must be strictly increasing") + + @property + def slopes(self) -> tuple[float, ...]: + return tuple( + (self.phi[i + 1] - self.phi[i]) / (self.time[i + 1] - self.time[i]) + for i in range(len(self.phi) - 1) + ) + + +@dataclass(frozen=True) +class RetentionParameters: + """k0 and S values per analyte.""" + + k0: np.ndarray + s: np.ndarray + + @classmethod + def from_iterables( + cls, k0_values: Iterable[float], s_values: Iterable[float] + ) -> "RetentionParameters": + k0 = np.asarray(list(k0_values), dtype=float) + s = np.asarray(list(s_values), dtype=float) + if k0.shape != s.shape: + raise ValueError("k0 and s must have identical shape") + return cls(k0=k0, s=s) + + +@dataclass(frozen=True) +class Chromatogram: + """Computed chromatogram descriptors.""" + + retention_times: np.ndarray + peak_widths: np.ndarray + + +def _retention_factor(k0: float, s_value: float, phi: float) -> float: + return k0 * exp(-s_value * phi) + + +def _isocratic_peak_width(n_theoretical: float, t0: float, k: float) -> float: + return 4.0 * (1.0 / sqrt(n_theoretical)) * t0 * (1.0 + k) + + +def _gradient_compression_factor( + cumulative_width_term: float, + k_at_elution: float, + k_initial: float, + dwell_time: float, + t0: float, + t_init: float, +) -> float: + term = ((dwell_time + t_init) * ((1.0 + k_initial) ** 2)) / (k_initial**3) + g_sq = (k_at_elution**2 / (t0 * (1.0 + k_at_elution) ** 2)) * ( + term + cumulative_width_term + ) + return sqrt(g_sq) + + +def _gradient_peak_width(n_theoretical: float, t0: float, k: float, g: float) -> float: + return 4.0 * g * ((t0 * (1.0 + k)) / sqrt(n_theoretical)) + + +def retention_time_and_width( + k0: float, + s_value: float, + t0: float, + dwell_time: float, + gradient: GradientProgram, + n_theoretical: float, +) -> tuple[float, float]: + """Compute retention time and peak width for one analyte.""" + phi_init = gradient.phi[0] + k_init = _retention_factor(k0, s_value, phi_init) + frac_before_gradient = (dwell_time + gradient.t_init) / (t0 * k_init) + + if frac_before_gradient >= 1: + retention_time = t0 * (1 + k_init) + return retention_time, _isocratic_peak_width(n_theoretical, t0, k_init) + + cumulative = 0.0 + cumulative_width = 0.0 + + for segment, slope in enumerate(gradient.slopes): + phi_i = gradient.phi[segment] + phi_next = gradient.phi[segment + 1] + t_i = gradient.time[segment] + t_next = gradient.time[segment + 1] + + k_i = _retention_factor(k0, s_value, phi_i) + k_next = _retention_factor(k0, s_value, phi_next) + + if slope == 0: + add = (t_next - t_i) / (t0 * k_i) + add_w = ((t_next - t_i) * (1 + k_i) ** 2) / (k_i**3) + else: + add = (1 / (t0 * s_value * slope)) * ((1 / k_next) - (1 / k_i)) + add_w = (1 / (s_value * slope)) * ( + (1 / 3) * ((1 / k_next**3) - (1 / k_i**3)) + + (1 / k_next**2 - 1 / k_i**2) + + (1 / k_next - 1 / k_i) + ) + + if cumulative + add + frac_before_gradient < 1: + cumulative += add + cumulative_width += add_w + continue + + if slope == 0: + tr = ( + dwell_time + + gradient.t_init + + t0 + + t_i + + t0 * k_i * (1 - frac_before_gradient - cumulative) + ) + cumulative_width += ( + (tr - t0 - dwell_time - gradient.t_init - t_i) * (1 + k_i) ** 2 + ) / (k_i**3) + g = _gradient_compression_factor( + cumulative_width, k_i, k_init, dwell_time, t0, gradient.t_init + ) + return tr, _gradient_peak_width(n_theoretical, t0, k_i, g) + + tr = t0 + dwell_time + gradient.t_init + t_i + (1 / (s_value * slope)) * log( + 1 + t0 * s_value * slope * k_i * (1 - frac_before_gradient - cumulative) + ) + phi_elute = phi_i + slope * (tr - t0 - dwell_time - gradient.t_init - t_i) + k_elute = _retention_factor(k0, s_value, phi_elute) + cumulative_width += (1 / (s_value * slope)) * ( + (1 / 3) * ((1 / k_elute**3) - (1 / k_i**3)) + + (1 / k_elute**2 - 1 / k_i**2) + + (1 / k_elute - 1 / k_i) + ) + g = _gradient_compression_factor( + cumulative_width, k_elute, k_init, dwell_time, t0, gradient.t_init + ) + return tr, _gradient_peak_width(n_theoretical, t0, k_elute, g) + + phi_last = gradient.phi[-1] + k_last = _retention_factor(k0, s_value, phi_last) + t_last = gradient.time[-1] + tr = ( + dwell_time + + gradient.t_init + + t0 + + t_last + + t0 * k_last * (1 - frac_before_gradient - cumulative) + ) + cumulative_width += ((tr - t0 - dwell_time - gradient.t_init - t_last) * (1 + k_last) ** 2) / ( + k_last**3 + ) + g = _gradient_compression_factor( + cumulative_width, k_last, k_init, dwell_time, t0, gradient.t_init + ) + return tr, _gradient_peak_width(n_theoretical, t0, k_last, g) + + +def compute_chromatogram( + parameters: RetentionParameters, + gradient: GradientProgram, + t0: float, + dwell_time: float, + n_theoretical: float, +) -> Chromatogram: + """Compute and sort retention times and peak widths for all analytes.""" + retention_times = np.zeros_like(parameters.k0) + peak_widths = np.zeros_like(parameters.k0) + + for i, (k0, s_val) in enumerate(zip(parameters.k0, parameters.s, strict=True)): + tr, width = retention_time_and_width( + k0=k0, + s_value=s_val, + t0=t0, + dwell_time=dwell_time, + gradient=gradient, + n_theoretical=n_theoretical, + ) + retention_times[i] = tr + peak_widths[i] = width + + ordering = np.argsort(retention_times) + return Chromatogram( + retention_times=retention_times[ordering], + peak_widths=peak_widths[ordering], + ) diff --git a/chromatogram_handover/tests/test_model.py b/chromatogram_handover/tests/test_model.py new file mode 100644 index 0000000..670a0b9 --- /dev/null +++ b/chromatogram_handover/tests/test_model.py @@ -0,0 +1,41 @@ +import numpy as np + +from chromatogram_handover import GradientProgram, RetentionParameters, compute_chromatogram +from rm_code.retention_model import compute_chromatogram as legacy_compute_chromatogram + + +def test_refactor_matches_legacy_implementation(): + params = RetentionParameters.from_iterables( + [5.80, 5.85, 6.25, 6.29], [16.2, 24.0, 18.9, 19.7] + ) + gradient = GradientProgram(phi=(0.1, 0.2, 0.5, 0.8), time=(0.0, 5.0, 20.0, 35.0), t_init=0.5) + + new = compute_chromatogram( + parameters=params, + gradient=gradient, + t0=0.8, + dwell_time=0.12, + n_theoretical=25000, + ) + old_tr, old_w = legacy_compute_chromatogram( + k0_list=params.k0, + S_list=params.s, + t_0=0.8, + t_D=0.12, + t_init=0.5, + phi_list=list(gradient.phi), + t_list=list(gradient.time), + N=25000, + ) + + np.testing.assert_allclose(new.retention_times, old_tr) + np.testing.assert_allclose(new.peak_widths, old_w) + + +def test_invalid_gradient_raises(): + try: + GradientProgram(phi=(0.1, 0.2), time=(0.0, 0.0)) + except ValueError as exc: + assert "strictly increasing" in str(exc) + else: + raise AssertionError("Expected ValueError for non-increasing gradient time")