diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 83e7651..0000000 --- a/.flake8 +++ /dev/null @@ -1,7 +0,0 @@ -[flake8] -max-line-length = 119 -# default is 79, 119 is max for GitHub code review. -# No reason to limit ourselves to 79 if the code looks uglier or less readable. -exclude = .git,__pycache__,build,dist -per-file-ignores = - */__init__.py: F401 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e66e0d8..6a04fa4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -38,7 +38,7 @@ jobs: run: ruff format --check . - name: mypy - run: mypy . + run: mypy ./src # ---- Tests ---- - name: Run tests diff --git a/README.md b/README.md index fd25ad6..9a310fc 100644 --- a/README.md +++ b/README.md @@ -1,158 +1 @@ -# template-python-lib - -A minimal, modern **Python library template** designed to jumpstart your project with best practices and tooling. - ---- - -## ๐Ÿš€ Quick start: using this template - -### 1. Create a new repository -- Via GitHub UI: - Click **Use this template** โ†’ create `my-awesome-lib` -- Or via CLI: - ```bash - gh repo create your-org/my-awesome-lib --template JaLuka98/template-python-lib --public - ``` - -### 2. Clone your new repo and enter it -```bash -git clone https://github.com/your-org/my-awesome-lib.git -cd my-awesome-lib -``` - -### 3. Apply required customisations -Before you start developing, update the following items in your new project: - -| Item | Where to change | Notes | -|-----------------------------|-----------------------------------------------------|-------------------------------------------------| -| Package name | `src/template_python_lib/` โ†’ `src/your_package/` | Rename the directory and update all references | -| Import paths | All source and test files | Replace `template_python_lib` with your package | -| Package metadata | `pyproject.toml`, `setup.cfg` | Update name, author, description, etc. | -| Version | `pyproject.toml`, `setup.cfg` | Optionally set initial version | -| License | `pyproject.toml`, `setup.cfg`, LICENSE | Choose and update license as needed | -| README | `README.md` | Update project name and description | -| Documentation | `docs/` | Update Sphinx config and docstrings | - -### 4. Set up your environment -Set up the necessary packages in an environment of your choice: -```bash - -pip install -e .[dev] -``` - -### 5. Run tests to verify setup -```bash -pytest -``` - ---- - -## ๐Ÿ“‚ Project structure - -```plaintext -. -โ”œโ”€โ”€ src/ -โ”‚ โ””โ”€โ”€ template_python_lib/ # Your package source code -โ”œโ”€โ”€ tests/ # Test suite -โ”œโ”€โ”€ docs/ # Sphinx documentation source -โ”œโ”€โ”€ .github/workflows/ # GitHub Actions CI workflows -โ”œโ”€โ”€ .pre-commit-config.yaml # Pre-commit hooks configuration -โ”œโ”€โ”€ pyproject.toml # Build system and tool configurations -โ”œโ”€โ”€ setup.cfg # Package metadata and settings -โ”œโ”€โ”€ requirements-dev.txt # Development dependencies -โ””โ”€โ”€ README.md # This file -``` - ---- - -## ๐Ÿ› ๏ธ Local development - -- Use the `src` layout to avoid import conflicts. -- Install dev dependencies with `pip install -e .[dev]`. -- Run tests with `pytest`. -- Type-check your code with `mypy`. -- Lint your code with `ruff`. - ---- - -## ๐ŸŽจ Linting & formatting - -- **Ruff** handles linting and formatting checks. -- Run lint checks manually with: - ```bash - ruff check src tests - ``` - ---- - -## ๐Ÿ”ง Pre-commit hooks - -- Pre-commit hooks run automatically before each commit to catch issues early. -- Set up pre-commit by running: - ```bash - pre-commit install - ``` -- Hooks include: - - Ruff linting - - Adding trailing whitespaces - ---- - -## ๐Ÿ“ฆ Versioning - -- Uses **CalVer** (Calendar Versioning) format: `YYYY.MM.PATCH` (e.g., `2025.09.0`). -- Version is managed with [bump-my-version](https://github.com/callowayproject/bump-my-version). -- Bump versions with: - ```bash - bump-my-version patch - ``` - or with the dedicated GitHub action. - ---- - -## ๐Ÿ“š Documentation - -- Documentation is built with **Sphinx**. -- Source files are in the `docs/` directory. -- Ready for hosting on Read the Docs. -- Build docs locally with: - ```bash - cd docs - make html - ``` - ---- - -## โš™๏ธ Continuous Integration (CI) - -- GitHub Actions workflows automate: - - Linting and type-checking. - - Running tests on multiple Python versions. -- Ensures code quality and consistency on every push and pull request. - ---- - -## ๐Ÿค” Rationale - -This template aims to provide a minimal yet modern starting point for Python libraries, incorporating: - -- Best practices like the `src` layout to avoid import pitfalls. -- Automated tooling for testing, linting, formatting, and type checking. -- Pre-commit hooks to maintain code quality from the start. -- Clear versioning strategy with CalVer. -- Documentation setup ready for expansion and publishing. - ---- - -## ๐Ÿค Contributing - -Contributions are welcome! Please: - -- Follow the existing code style and conventions. -- Run tests and linting before submitting PRs. -- Use the pre-commit hooks to catch issues early. -- Update this README as needed. - ---- - -Thank you for using this template โ€” happy coding! +# NPKit diff --git a/docs/source/conf.py b/docs/source/conf.py index 92545db..9de13af 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -10,14 +10,14 @@ import sys import importlib -project = "template-python-lib" +project = "NPKit" try: - __version__ = importlib.import_module("template_python_lib").__version__ + __version__ = importlib.import_module("npkit").__version__ except Exception: __version__ = "0.0.0" sys.path.insert(0, os.path.abspath("../../src")) -project = "template-python-lib" +project = "NPKit" copyright = "2025, Jan Lukas Spรคh" author = "Jan Lukas Spรคh" release = __version__ diff --git a/docs/source/index.rst b/docs/source/index.rst index 6563699..8039c83 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,9 +1,9 @@ -.. CPax documentation master file, created by +.. NPkit documentation master file, created by sphinx-quickstart on Mon Mar 31 23:52:35 2025. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -CPax documentation +NPKit documentation ================== Add your content using ``reStructuredText`` syntax. See the diff --git a/examples/minimal_example.py b/examples/minimal_example.py new file mode 100644 index 0000000..7f85a36 --- /dev/null +++ b/examples/minimal_example.py @@ -0,0 +1,60 @@ +import numpy as np +from npkit import ( + Observable, + ObservableSet, + GaussianModel, + GaussianLikelihood, +) +from npkit import build_belt, invert_belt +import matplotlib.pyplot as plt +from npkit.plot import plot_belt + +# Model: two observables +obs = ObservableSet( + [ + Observable("xsec1", lambda p: 100 + 10 * p["C"] ** 2), + ] +) + +V = np.array([100]) + +model = GaussianModel(obs=obs, covariance=V) + +# Observed data (example) +y_obs = 100 +# data = Combination(names=obs.names, values=y_obs, covariance=V) + + +def like_builder(): + return GaussianLikelihood(obs, y_obs, V) + + +start = {"C": 0.0} +# bounds = {"C": (-1e6, 1e6)} +bounds = None +rng = np.random.default_rng(123) + +# Build a 68% belt +grid = np.linspace(-2, 2, 51) +belt = build_belt( + param="C", + model=model, + like_builder=like_builder, + grid=grid, + n_toys=3000, + alpha=1 - 0.6827, + rng=rng, + start=start, + bounds=bounds, +) + +print(belt) + +# Invert for observed interval +interval = invert_belt(belt, like_builder=like_builder, start=start, bounds=bounds) +print("Neyman CI:", interval) + +# Visualize the Neyman belt +ax = plot_belt(belt) +ax.set_title("Neyman Belt Visualization") +plt.show() diff --git a/examples/multiple_observable_example.py b/examples/multiple_observable_example.py new file mode 100644 index 0000000..3cb2945 --- /dev/null +++ b/examples/multiple_observable_example.py @@ -0,0 +1,52 @@ +import numpy as np +from npkit import ( + Observable, + ObservableSet, + Combination, + GaussianModel, + GaussianLikelihood, +) +from npkit import build_belt, invert_belt + +# Model: two observables +obs = ObservableSet( + [ + Observable("xsec1", lambda p: 1.0 + 0.4 * p["C"]), + Observable("xsec2", lambda p: 0.8 - 0.2 * p["C"]), + ] +) + +V = np.array([[0.04**2, 0.5 * 0.04 * 0.05], [0.5 * 0.04 * 0.05, 0.05**2]]) + +model = GaussianModel(obs=obs, covariance=V) + +# Observed data (example) +y_obs = np.array([1.05, 0.78]) +data = Combination(names=obs.names, values=y_obs, covariance=V) + + +def like_builder(): + return GaussianLikelihood(obs, y_obs, V) + + +start = {"C": 0.0} +bounds = {"C": (-2.0, 2.0)} +rng = np.random.default_rng(123) + +# Build a 68% belt +grid = np.linspace(-2, 2, 161) +belt = build_belt( + param="C", + model=model, + like_builder=like_builder, + grid=grid, + n_toys=1000, + alpha=1 - 0.6827, + rng=rng, + start=start, + bounds=bounds, +) + +# Invert for observed interval +interval = invert_belt(belt, like_builder=like_builder, start=start, bounds=bounds) +print("Neyman CI:", interval) diff --git a/examples/root_finding_example.py b/examples/root_finding_example.py deleted file mode 100644 index 6ad258d..0000000 --- a/examples/root_finding_example.py +++ /dev/null @@ -1,18 +0,0 @@ -from template_python_lib.roots.simple import newtons_method - - -# Define the function and its derivative -def f(x): - return x**2 - 2 - - -def f_prime(x): - return 2 * x - - -# Initial guess -x0 = 1.0 - -# Find the root -root = newtons_method(f, f_prime, x0) -print(f"The root is approximately: {root}") diff --git a/pyproject.toml b/pyproject.toml index fdf4397..7c7d346 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,9 +3,9 @@ requires = ["setuptools>=65.5", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "template-python-lib" +name = "NPKit" version = "2025.09.0" -description = "template-python-lib: A template repository for modern python packages." +description = "NPKit: Toolkit for frequentist interpretations and inference." authors = [ { name = "Jan Lukas Spรคh", email = "your.email@example.com" } ] @@ -19,7 +19,7 @@ classifiers = [ ] dependencies = [ - "numpy>=2.0", + "numpy>=2.0", "scipy>=1.16" ] [project.optional-dependencies] @@ -28,12 +28,28 @@ dev = ["bump-my-version>=1.2", "ruff>=0.6", "pre-commit>=3.8", "mypy>=1.11"] docs = ["sphinx>=7", "sphinx-rtd-theme", "sphinx-autodoc-typehints", "sphinx-autoapi", "myst-parser"] [project.urls] -Homepage = "https://github.com/JaLuka98/template-python-lib" -Issues = "https://github.com/JaLuka98/template-python-lib/issues" -Source = "https://github.com/JaLuka98/template-python-lib" +Homepage = "https://github.com/JaLuka98/NPKit" +Issues = "https://github.com/JaLuka98/NPKit/issues" +Source = "https://github.com/JaLuka98/NPKit" [tool.setuptools] package-dir = {"" = "src"} [tool.setuptools.packages.find] where = ["src"] + +[tool.mypy] +python_version = "3.12" +strict = true +exclude = ["^docs/", "^examples/"] + +warn_unused_ignores = true +warn_redundant_casts = true +warn_unreachable = true + +[[tool.mypy.overrides]] +module = [ + "scipy.*", + "matplotlib.*", +] +ignore_missing_imports = true diff --git a/src/npkit/__init__.py b/src/npkit/__init__.py new file mode 100644 index 0000000..03974e7 --- /dev/null +++ b/src/npkit/__init__.py @@ -0,0 +1,30 @@ +""" +NPKit โ€” Frequentist inference utilities: +- Observables and measurements +- Gaussian model (mean = prediction(params), covariance = V) +- Likelihood & profile-likelihood ratio test statistic +- Neyman belt construction and inversion +""" + +from .observables import Observable, ObservableSet, Params +from .measurements import Combination +from .likelihood import GaussianModel, GaussianLikelihood +from .stats import fit_mle, q_profile +from .neyman import Belt, build_belt, invert_belt, check_coverage + +__all__ = [ + "Observable", + "ObservableSet", + "Params", + "Combination", + "GaussianModel", + "GaussianLikelihood", + "fit_mle", + "q_profile", + "Belt", + "build_belt", + "invert_belt", + "check_coverage", +] + +__version__ = "2025.09.0" diff --git a/src/npkit/likelihood.py b/src/npkit/likelihood.py new file mode 100644 index 0000000..f61e56e --- /dev/null +++ b/src/npkit/likelihood.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Sequence, Union, cast +import numpy as np +from numpy.typing import NDArray + +from .observables import ObservableSet, Params +from .measurements import Combination + +# Flexible input types the user may pass for a covariance +CovInput = Union[ + None, float, int, np.ndarray, Sequence[float], Sequence[Sequence[float]] +] + + +def _coerce_cov(cov: object, n: int) -> NDArray[np.float64]: + """ + Accept covariance in several convenient forms and produce a (n,n) float64 matrix: + + - None -> identity(n) + - scalar -> scalar * identity(n) + - 1D shape (n,) -> diag(vector) + - 2D shape (n,n) -> as-is + + Raises if shape is incompatible or not positive-definite. + """ + if cov is None: + M = np.eye(n, dtype=float) + else: + arr = np.asarray(cov, dtype=float) + if arr.ndim == 0: # scalar + M = float(arr) * np.eye(n, dtype=float) + elif arr.ndim == 1: + if arr.size != n: + raise ValueError(f"1D covariance length {arr.size} != n={n}") + M = np.diag(arr) + elif arr.ndim == 2: + if arr.shape != (n, n): + raise ValueError(f"2D covariance shape {arr.shape} != (n,n)={(n, n)}") + M = arr + else: + raise ValueError("covariance must be None, scalar, (n,), or (n,n)") + + # Basic PD check (and symmetrise tiny asymmetries) + M = 0.5 * (M + M.T) + try: + np.linalg.cholesky(M) + except np.linalg.LinAlgError as e: # pragma: no cover + raise ValueError("covariance must be positive-definite") from e + return cast(NDArray[np.float64], M.astype(float, copy=False)) + + +@dataclass +class GaussianModel: + """ + Gaussian data model: + y ~ N(ฮผ(params), V), with ฮผ(params) = obs.predict_vector(params) + + Use this class to: + - simulate pseudo-data (toys) at given params + - construct a GaussianLikelihood for observed data + """ + + obs: ObservableSet + covariance: CovInput # flexible on input; coerced to ndarray in __post_init__ + _cov: NDArray[np.float64] | None = None # internal, set in __post_init__ + + def __post_init__(self) -> None: + n = len(self.obs.observables) + self._cov = _coerce_cov(self.covariance, n) + # Precompute inverse and logdet (kept for potential future use) + self._cov_inv: NDArray[np.float64] = cast( + NDArray[np.float64], np.linalg.inv(self._cov) + ) + sign, logdet = np.linalg.slogdet(self._cov) + if sign <= 0: + raise ValueError("covariance must be positive-definite") + self._logdet = float(logdet) + + def simulate(self, params: Params, rng: np.random.Generator) -> NDArray[np.float64]: + """ + Draw one pseudo-experiment vector y ~ N(ฮผ(params), V). + """ + mean = self.obs.predict_vector(params) + assert self._cov is not None # for type-checkers + y = rng.multivariate_normal(mean=mean, cov=self._cov) + return cast(NDArray[np.float64], np.asarray(y, dtype=float)) + + def likelihood(self, data: Combination) -> "GaussianLikelihood": + """ + Bind observed data (values must match obs.names order) to a Likelihood. + """ + if list(data.names) != self.obs.names: + raise ValueError("data.names must match ObservableSet.names") + if data.covariance is not None: + # Allow overriding covariance via data, else use model's V. + return GaussianLikelihood(self.obs, data.values, data.covariance) + assert self._cov is not None + return GaussianLikelihood(self.obs, data.values, self._cov) + + +class GaussianLikelihood: + """ + -2 log L for Gaussian model with known covariance: + nll(params) = (y - mu(params))^T V^{-1} (y - mu(params)) + const + + The additive constant is irrelevant for likelihood ratios and is omitted. + """ + + def __init__( + self, obs: ObservableSet, values: np.ndarray, covariance: CovInput + ) -> None: + self.obs = obs + self.y: NDArray[np.float64] = cast( + NDArray[np.float64], np.asarray(values, dtype=float) + ) + n = int(self.y.size) + self.V: NDArray[np.float64] = _coerce_cov(covariance, n) + self._Vinv: NDArray[np.float64] = cast( + NDArray[np.float64], np.linalg.inv(self.V) + ) + + def nll(self, params: Params) -> float: + r = self.y - self.obs.predict_vector(params) + return float(r.T @ self._Vinv @ r) diff --git a/src/npkit/measurements.py b/src/npkit/measurements.py new file mode 100644 index 0000000..b4b2f24 --- /dev/null +++ b/src/npkit/measurements.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Sequence, Optional +import numpy as np + + +@dataclass +class Combination: + """ + A set of measured values with (optional) covariance. + + Attributes + ---------- + names : list[str] + Order of observables (must match ObservableSet order for likelihood). + values : np.ndarray, shape (n,) + Measured values. + covariance : np.ndarray | None, shape (n, n) + Total covariance matrix. If None, treat as diagonal with zeros (no errors). + """ + + names: Sequence[str] + values: np.ndarray + covariance: Optional[np.ndarray] = None + + def __post_init__(self) -> None: + self.values = np.asarray(self.values, dtype=float) + if self.covariance is not None: + self.covariance = np.asarray(self.covariance, dtype=float) + if self.covariance.shape != (self.values.size, self.values.size): + raise ValueError("covariance must be (n,n) matching len(values)") diff --git a/src/npkit/neyman.py b/src/npkit/neyman.py new file mode 100644 index 0000000..183cfa0 --- /dev/null +++ b/src/npkit/neyman.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable + +import numpy as np + +from .observables import Params +from .likelihood import GaussianModel, GaussianLikelihood +from .stats import q_profile + + +@dataclass +class Belt: + """Critical quantiles of the test statistic across a parameter grid.""" + + param: str + grid: np.ndarray # shape (m,) + qcrit: np.ndarray # shape (m,) + alpha: float # size (e.g. 0.3173 for 68.27% CI) + + +def build_belt( + param: str, + model: GaussianModel, + like_builder: Callable[[], GaussianLikelihood], + grid: np.ndarray, + n_toys: int, + alpha: float, + rng: np.random.Generator, + start: Params, + bounds: dict[str, tuple[float, float]] | None = None, +) -> Belt: + """ + Build a Neyman belt by generating toys at each true parameter value. + + For each grid point C: + 1) Generate y_toy ~ N(ฮผ(C), V) + 2) Compute q(value=C) on y_toy via profile-likelihood + 3) Take the (1 - alpha)-quantile over toys as qcrit[C] + """ + m = grid.size + qcrit = np.empty(m, dtype=float) + + for i, c in enumerate(grid): + q_vals = np.empty(n_toys, dtype=float) + for t in range(n_toys): + y = model.simulate({**start, param: float(c)}, rng=rng) + + def fresh_like() -> GaussianLikelihood: + # Bind a fresh likelihood to this toy data + return GaussianLikelihood(model.obs, y, model.covariance) + + q_vals[t] = q_profile( + param=param, + value=float(c), + like_builder=fresh_like, + start=start, + bounds=bounds, + ) + qcrit[i] = float(np.quantile(q_vals, 1.0 - alpha)) + + return Belt( + param=param, grid=np.asarray(grid, dtype=float), qcrit=qcrit, alpha=alpha + ) + + +def invert_belt( + belt: Belt, + like_builder: Callable[[], GaussianLikelihood], + start: Params, + bounds: dict[str, tuple[float, float]] | None = None, +) -> tuple[float, float]: + """ + Given the observed data (implicit in like_builder), compute q_obs(C) across the grid + and return the smallest contiguous interval of C where q_obs(C) <= qcrit(C). + """ + q_obs = np.array( + [ + q_profile( + param=belt.param, + value=float(c), + like_builder=like_builder, + start=start, + bounds=bounds, + ) + for c in belt.grid + ], + dtype=float, + ) + + mask = q_obs <= belt.qcrit + if not mask.any(): + return (np.nan, np.nan) + + # Find the longest (or first) contiguous segment; here we choose the minimal covering segment + idx = np.where(mask)[0] + return (float(belt.grid[idx[0]]), float(belt.grid[idx[-1]])) + + +def check_coverage( + true_value: float, + belt: Belt, + model: GaussianModel, + start: Params, + bounds: dict[str, tuple[float, float]] | None, + n_experiments: int, + rng: np.random.Generator, +) -> float: + """ + Empirical coverage at `true_value`: + fraction of experiments whose CI (from belt inversion) contains true_value. + """ + hits = 0 + for _ in range(n_experiments): + y = model.simulate({**start, belt.param: float(true_value)}, rng=rng) + + def fresh_like() -> GaussianLikelihood: + return GaussianLikelihood(model.obs, y, model.covariance) + + lo, hi = invert_belt(belt, like_builder=fresh_like, start=start, bounds=bounds) + if lo <= true_value <= hi: + hits += 1 + return hits / n_experiments diff --git a/src/npkit/observables.py b/src/npkit/observables.py new file mode 100644 index 0000000..7d95185 --- /dev/null +++ b/src/npkit/observables.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable, Mapping, Sequence, Protocol +import numpy as np + +Params = Mapping[str, float] +PredictionFunction = Callable[[Params], float] + + +class VectorPredictor(Protocol): + """Protocol for objects that can produce a vector prediction for given params.""" + + def predict_vector(self, params: Params) -> np.ndarray: ... + + +@dataclass(frozen=True) +class Observable: + """ + A single observable defined by a prediction function. + + The function takes a parameter dict (name -> value) and returns a scalar prediction. + """ + + name: str + predict: PredictionFunction + + +@dataclass +class ObservableSet: + """ + A collection of Observables with a stable order. + + Use `predict_vector(params)` to obtain the prediction vector aligned to `names`. + """ + + observables: Sequence[Observable] + + @property + def names(self) -> list[str]: + return [o.name for o in self.observables] + + def predict_vector(self, params: Params) -> np.ndarray: + """Return prediction vector y_th(params) with shape (n,).""" + return np.asarray([o.predict(params) for o in self.observables], dtype=float) diff --git a/src/npkit/plot.py b/src/npkit/plot.py new file mode 100644 index 0000000..67f7cc3 --- /dev/null +++ b/src/npkit/plot.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from typing import Any +import numpy as np +import matplotlib.pyplot as plt + +from .neyman import Belt + + +def plot_belt( + belt: Belt, q_obs: np.ndarray | None = None, ax: Any | None = None +) -> Any: + """ + Plot q_crit(C) across the grid; optionally overlay q_obs(C). + """ + if ax is None: + fig, ax = plt.subplots() + _ = fig # silence linters if unused + ax.step( + belt.grid, belt.qcrit, where="mid", label=f"q_crit (alpha={belt.alpha:.3f})" + ) + if q_obs is not None: + ax.step(belt.grid, q_obs, where="mid", linestyle="--", label="q_obs") + ax.set_xlabel(belt.param) + ax.set_ylabel("q") + ax.legend() + return ax diff --git a/src/npkit/stats.py b/src/npkit/stats.py new file mode 100644 index 0000000..5ad9d0e --- /dev/null +++ b/src/npkit/stats.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +from typing import Tuple, Optional, Callable + +import numpy as np + +from scipy.optimize import minimize, minimize_scalar + +from .likelihood import GaussianLikelihood +from .observables import Params + + +def _require_scipy() -> None: + if minimize is None: + raise RuntimeError( + "SciPy is required for optimisation (scipy.optimize.minimize). " + "Install with `pip install scipy`." + ) + + +def _params_to_vector(param_names: list[str], params: Params) -> np.ndarray: + return np.asarray([params[name] for name in param_names], dtype=float) + + +def _vector_to_params(param_names: list[str], x: np.ndarray) -> dict[str, float]: + return {name: float(val) for name, val in zip(param_names, x)} + + +def fit_mle( + like: GaussianLikelihood, + start: Params, + bounds: Optional[dict[str, Tuple[float, float]]] = None, +) -> tuple[dict[str, float], float]: + """ + Minimise nll(params) to obtain MLE and nll_min. + + For 1 parameter, use a scalar line search (robust if the gradient is zero at start). + For >=2 parameters, use L-BFGS-B with a Powell fallback. + """ + names = list(start.keys()) + + # --- 1D robust path ----------------------------------------------------- + if len(names) == 1: + pname = names[0] + + def f1(x: float) -> float: + return like.nll({pname: float(x)}) + + if bounds and pname in bounds: + lo, hi = bounds[pname] + res = minimize_scalar(f1, bounds=(lo, hi), method="bounded") + else: + # Auto-bracket: expand until downhill is found + a, b = 0.0, 1.0 + fa, fb = f1(a), f1(b) + k = 0 + while fb >= fa and k < 12: + b *= 2.0 + fb = f1(b) + k += 1 + # If we never found downhill (pathological), still proceed + res = minimize_scalar(f1, bracket=(a, b)) + + if not res.success: + raise RuntimeError(f"MLE 1D optimisation failed: {res.message}") + return {pname: float(res.x)}, float(res.fun) + + # --- >=2D path (as before) --------------------------------------------- + names = list(start.keys()) + x0 = np.asarray([start[n] for n in names], dtype=float) + + opt_bounds = None + if bounds: + opt_bounds = [bounds.get(n, (-np.inf, np.inf)) for n in names] + + def fun(x: np.ndarray) -> float: + return like.nll({n: float(v) for n, v in zip(names, x)}) + + res = minimize(fun, x0, bounds=opt_bounds, method="L-BFGS-B") + # Fallback if stuck near start (can happen on flat/ill-conditioned surfaces) + if (not res.success) or np.allclose(res.x, x0): + res = minimize(fun, x0, bounds=opt_bounds, method="Powell") + + if not res.success: + raise RuntimeError(f"MLE optimisation failed: {res.message}") + best = {n: float(v) for n, v in zip(names, res.x)} + return best, float(res.fun) + + +def q_profile( + param: str, + value: float, + like_builder: Callable[[], GaussianLikelihood], + start: Params, + bounds: Optional[dict[str, Tuple[float, float]]] = None, +) -> float: + """ + Profile-likelihood ratio test statistic for one parameter: + + q(value) = nll(params_hat_hat(value)) - nll(params_hat) + + where params_hat_hat(value) fixes `param` = value and minimises over the others. + """ + # Unconstrained fit + like = like_builder() + _, nll_min = fit_mle(like, start=start, bounds=bounds) + + # Names of nuisance/free parameters (everything except `param`) + fixed_names = [n for n in start.keys() if n != param] + + # If there are no nuisance parameters, just evaluate NLL at the fixed value + if not fixed_names: + like = like_builder() + nll_fixed = like.nll({**start, param: float(value)}) + q = float(nll_fixed - nll_min) + return max(0.0, q) + + # Otherwise, do the constrained optimisation over nuisance parameters + like = like_builder() + fixed_start = {n: start[n] for n in fixed_names} + + def constrained_nll(x: np.ndarray) -> float: + p = {**{param: float(value)}, **{n: float(v) for n, v in zip(fixed_names, x)}} + return like.nll(p) + + _require_scipy() + x0 = np.asarray([fixed_start[n] for n in fixed_names], dtype=float) + opt_bounds = None + if bounds: + opts = [bounds.get(n, (-np.inf, np.inf)) for n in fixed_names] + # SciPy doesn't like [] for 0-dim problems; keep None in that case + if len(opts) > 0: + opt_bounds = opts + + res = minimize(constrained_nll, x0, bounds=opt_bounds, method="L-BFGS-B") + if not res.success: + raise RuntimeError(f"Constrained optimisation failed: {res.message}") + + q = float(res.fun - nll_min) + return max(0.0, q) diff --git a/src/npkit/utils.py b/src/npkit/utils.py new file mode 100644 index 0000000..8ac8cbe --- /dev/null +++ b/src/npkit/utils.py @@ -0,0 +1,7 @@ +from __future__ import annotations +import numpy as np + + +def make_rng(seed: int | None) -> np.random.Generator: + """Create a PCG64-based Generator, or numpy default if seed is None.""" + return np.random.default_rng(None if seed is None else np.random.PCG64(seed)) diff --git a/src/template_python_lib/__init__.py b/src/template_python_lib/__init__.py deleted file mode 100644 index b1e37bb..0000000 --- a/src/template_python_lib/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# template_python_lib/__init__.py - -__version__ = "2025.09.0" diff --git a/src/template_python_lib/roots/__init__.py b/src/template_python_lib/roots/__init__.py deleted file mode 100644 index c8c4eaf..0000000 --- a/src/template_python_lib/roots/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -Roots module: A simple example for a python library. -""" - -# Explicitly import the `newtons_method` function from `simple.py` -from .simple import newtons_method - -# Expose the `newtons_method` function as part of the module's public API (nicer import) -__all__ = ["newtons_method"] diff --git a/src/template_python_lib/roots/simple.py b/src/template_python_lib/roots/simple.py deleted file mode 100644 index c413941..0000000 --- a/src/template_python_lib/roots/simple.py +++ /dev/null @@ -1,28 +0,0 @@ -def newtons_method(func, func_derivative, x0, tolerance=1e-7, max_iterations=100): - """ - Find the root of a function using Newton's method. - - Parameters: - func (callable): The function for which the root is to be found. - func_derivative (callable): The derivative of the function. - x0 (float): Initial guess for the root. - tolerance (float): Convergence tolerance. - max_iterations (int): Maximum number of iterations. - - Returns: - float: The estimated root. - """ - x = x0 - for i in range(max_iterations): - f_x = func(x) - f_prime_x = func_derivative(x) - - if abs(f_x) < tolerance: - return x - - if f_prime_x == 0: - raise ValueError("Derivative is zero. No solution found.") - - x = x - f_x / f_prime_x - - raise ValueError("Maximum iterations reached. No solution found.") diff --git a/tests/test_chernoff_boundary.py b/tests/test_chernoff_boundary.py new file mode 100644 index 0000000..3575c16 --- /dev/null +++ b/tests/test_chernoff_boundary.py @@ -0,0 +1,65 @@ +# tests/test_chernoff_boundary.py +import numpy as np +from scipy.stats import norm +import pytest + +from npkit import Observable, ObservableSet, GaussianModel, GaussianLikelihood +from npkit.neyman import build_belt + + +def _q_chernoff_quantile(conf: float) -> float: + """Quantile of 0.5*delta0 + 0.5*chi2_1 at confidence 'conf' (e.g. 0.6827, 0.9545).""" + # For mixture: F(q) = Phi(sqrt(q)) -> sqrt(q) = Phi^{-1}(conf) + + z = float(norm.ppf(conf)) + return z * z + + +@pytest.mark.parametrize( + "conf, expected, n_toys, tol", + [ + (0.6827, _q_chernoff_quantile(0.6827), 20000, 0.08), # ~0.22 + (0.9545, _q_chernoff_quantile(0.9545), 25000, 0.15), # ~2.89 + ], +) +def test_boundary_chernoff_quantiles_at_zero(conf, expected, n_toys, tol): + """ + Model: y ~ N(100 + 10*C^2, sigma^2) with C >= 0, sigma^2 = 100. + At true C=0, the LR statistic follows 0.5*delta0 + 0.5*chi2_1. + The Neyman critical value at C=0 must match the mixture quantile. + """ + rng = np.random.default_rng(12345) + + # Observable with even dependence in C (non-identifiable without boundary) + obs = ObservableSet([Observable("x", lambda p: 100.0 + 10.0 * (p["C"] ** 2))]) + + V = 100.0 # variance sigma^2 + model = GaussianModel(obs=obs, covariance=V) + + # Dummy like_builder; build_belt binds toy data internally + y_obs = np.array([100.0]) + + def like_builder(): + return GaussianLikelihood(obs, y_obs, V) + + alpha = 1.0 - conf + grid = np.array([0.0]) # only test C = 0 (the boundary point) + start = {"C": 0.0} + bounds = {"C": (0.0, 1e9)} # enforce physical boundary C >= 0 + + belt = build_belt( + param="C", + model=model, + like_builder=like_builder, + grid=grid, + n_toys=n_toys, + alpha=alpha, + rng=rng, + start=start, + bounds=bounds, + ) + + q0 = float(belt.qcrit[0]) + assert abs(q0 - expected) <= tol, ( + f"C=0: q_crit={q0:.3f}, expected {expected:.3f} ยฑ {tol}" + ) diff --git a/tests/test_roots.py b/tests/test_roots.py deleted file mode 100644 index f223d91..0000000 --- a/tests/test_roots.py +++ /dev/null @@ -1,19 +0,0 @@ -from template_python_lib.roots.simple import newtons_method - - -def test_newtons_method(): - # Define the function and its derivative - def f(x): - return x**2 - 4 # Root at x = ยฑ2 - - def f_prime(x): - return 2 * x - - # Initial guess - x0 = 1.0 - - # Call the Newton's method - root = newtons_method(f, f_prime, x0) - - # Assert that the root is approximately 2 - assert abs(root - 2) < 1e-7, f"Expected root near 2, but got {root}" diff --git a/tests/test_wilks_linear.py b/tests/test_wilks_linear.py new file mode 100644 index 0000000..7bd74d9 --- /dev/null +++ b/tests/test_wilks_linear.py @@ -0,0 +1,67 @@ +# tests/test_wilks_linear.py +import numpy as np + +from npkit import ( + Observable, + ObservableSet, + GaussianModel, + GaussianLikelihood, +) +from npkit.neyman import build_belt, invert_belt + + +def test_linear_model_wilks_68pct(): + """ + y sim N(100 + 10*C, sigma^2), sigma=10, identifiable & linear. + For toys generated at true C over the grid, q_crit(C) must equal the + chi^2_1 68.27% quantile (=1) for *all* C (up to MC error). + Also, with y_obs=100 (C_hat=0), the 68% CI should be approx [-1, 1]. + """ + rng = np.random.default_rng(123) + + # One observable, linear in C + obs = ObservableSet([Observable("x", lambda p: 100.0 + 10.0 * p["C"])]) + + # Variance sigma^2 = 100 (sigma = 10) + V = 100.0 + model = GaussianModel(obs=obs, covariance=V) + + # Observed data exactly at C=0 mean + y_obs = np.array([100.0]) + + def like_builder(): + return GaussianLikelihood(obs, y_obs, V) + + alpha = 1.0 - 0.6827 # 68.27% CL + grid = np.linspace(-2.0, 2.0, 41) # step 0.1 + start = {"C": 0.0} + bounds = None + n_toys = 2500 # modest but stable for CI + + belt = build_belt( + param="C", + model=model, + like_builder=like_builder, + grid=grid, + n_toys=n_toys, + alpha=alpha, + rng=rng, + start=start, + bounds=bounds, + ) + + # --- q_crit should be roughly 1 everywhere (within MC noise) --------------------- + # mean close to 1 + q_mean = float(np.mean(belt.qcrit)) + assert 0.95 <= q_mean <= 1.05, f"mean q_crit {q_mean:.3f} not ~ 1" + + # every point within a reasonable band (occasionally a few % jitter) + assert np.all((belt.qcrit >= 0.8) & (belt.qcrit <= 1.2)), ( + f"some q_crit outside [0.8,1.2]: {belt.qcrit.min():.3f}..{belt.qcrit.max():.3f}" + ) + + # --- Invert for observed interval: expect ~[-1, 1] ------------------------ + lo, hi = invert_belt(belt, like_builder=like_builder, start=start, bounds=bounds) + assert np.isfinite(lo) and np.isfinite(hi) + assert -1.05 <= lo <= -0.95, f"lo {lo:.3f} not near -1" + assert 0.95 <= hi <= 1.05, f"hi {hi:.3f} not near +1"