From 71460c274a927188710d1a76b059aa9bbc1c07ca Mon Sep 17 00:00:00 2001
From: sikai zhang <matthew.szhang91@gmail.com>
Date: Tue, 14 Oct 2025 15:16:14 +0800
Subject: [PATCH 1/2] FEAT add max-candidates in make_narx

---
 .github/workflows/emscripten.yml |  2 +-
 .github/workflows/wheel.yml      |  2 +-
 fastcan/narx/_feature.py         | 60 ++++++++++++++++++++++--------
 fastcan/narx/_utils.py           | 21 ++++++++++-
 fastcan/narx/tests/test_narx.py  | 63 +++++++++++++++++++++++++++++++-
 pixi.lock                        |  2 +-
 pyproject.toml                   | 10 ++++-
 7 files changed, 137 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml
index dfa4b70..a0aa9ff 100644
--- a/.github/workflows/emscripten.yml
+++ b/.github/workflows/emscripten.yml
@@ -9,7 +9,7 @@ jobs:
     steps:
       - uses: actions/checkout@v5
       - name: Build WASM wheel
-        uses: pypa/cibuildwheel@v3.2.0
+        uses: pypa/cibuildwheel@v3.2.1
         env:
           CIBW_PLATFORM: pyodide
       - name: Upload package
diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml
index 14d80b5..dc5f965 100644
--- a/.github/workflows/wheel.yml
+++ b/.github/workflows/wheel.yml
@@ -33,7 +33,7 @@ jobs:
     steps:
       - uses: actions/checkout@v5
       - name: Build wheels
-        uses: pypa/cibuildwheel@v3.2.0
+        uses: pypa/cibuildwheel@v3.2.1
         env:
           CIBW_SKIP: "*_i686 *_ppc64le *_s390x *_universal2 *-musllinux_* cp314t*"
           CIBW_PROJECT_REQUIRES_PYTHON: ">=3.10"
diff --git a/fastcan/narx/_feature.py b/fastcan/narx/_feature.py
index a97889e..5bd7028 100644
--- a/fastcan/narx/_feature.py
+++ b/fastcan/narx/_feature.py
@@ -6,6 +6,7 @@
 # SPDX-License-Identifier: MIT
 
 import math
+import warnings
 from itertools import combinations_with_replacement
 from numbers import Integral
 
@@ -198,12 +199,16 @@ def make_poly_features(X, ids):
             None,
             Interval(Integral, 1, None, closed="left"),
         ],
+        "max_poly": [None, Interval(Integral, 1, None, closed="left")],
+        "random_state": ["random_state"],
     },
     prefer_skip_nested_validation=True,
 )
 def make_poly_ids(
     n_features=1,
     degree=1,
+    max_poly=None,
+    random_state=None,
 ):
     """Generate ids for polynomial features.
     (variable_index, variable_index, ...)
@@ -217,6 +222,15 @@ def make_poly_ids(
     degree : int, default=1
         The maximum degree of polynomial features.
 
+    max_poly : int, default=None
+        Maximum number of ids of polynomial features to generate.
+        Randomly selected by reservoir sampling.
+        If None, all possible ids are returned.
+
+    random_state : int or RandomState instance, default=None
+        Used when `max_poly` is not None to subsample ids of polynomial features.
+        See :term:`Glossary <random_state>` for details.
+
     Returns
     -------
     ids : array-like of shape (n_outputs, degree)
@@ -236,29 +250,45 @@ def make_poly_ids(
            [1, 2, 2],
            [2, 2, 2]])
     """
-    n_outputs = math.comb(n_features + degree, degree) - 1
-    if n_outputs > np.iinfo(np.intp).max:
+    n_total = math.comb(n_features + degree, degree) - 1
+    if n_total > np.iinfo(np.intp).max:
         msg = (
-            "The output that would result from the current configuration would"
-            f" have {n_outputs} features which is too large to be"
-            f" indexed by {np.intp().dtype.name}."
+            "The current configuration would "
+            f"result in {n_total} features which is too large to be "
+            f"indexed by {np.intp().dtype.name}."
         )
         raise ValueError(msg)
-
-    ids = np.array(
-        list(
-            combinations_with_replacement(
-                range(n_features + 1),
-                degree,
-            )
+    if n_total > 10_000_000:
+        warnings.warn(
+            "Total number of polynomial features is larger than 10,000,000! "
+            f"The current configuration would result in {n_total} features. "
+            "This may take a while.",
+            UserWarning,
+        )
+    if max_poly is not None and max_poly < n_total:
+        # reservoir sampling
+        rng = np.random.default_rng(random_state)
+        reservoir = []
+        for i, comb in enumerate(
+            combinations_with_replacement(range(n_features + 1), degree)
+        ):
+            if i < max_poly:
+                reservoir.append(comb)
+            else:
+                j = rng.integers(0, i + 1)
+                if j < max_poly:
+                    reservoir[j] = comb
+        ids = np.array(reservoir)
+    else:
+        ids = np.array(
+            list(combinations_with_replacement(range(n_features + 1), degree))
         )
-    )
 
     const_id = np.where((ids == 0).all(axis=1))
     return np.delete(ids, const_id, 0)  # remove the constant feature
 
 
-def _valiate_time_shift_poly_ids(
+def _validate_time_shift_poly_ids(
     time_shift_ids, poly_ids, n_samples=None, n_features=None, n_outputs=None
 ):
     if n_samples is None:
@@ -496,7 +526,7 @@ def tp2fd(time_shift_ids, poly_ids):
     [[-1  1]
      [ 2  3]]
     """
-    _time_shift_ids, _poly_ids = _valiate_time_shift_poly_ids(
+    _time_shift_ids, _poly_ids = _validate_time_shift_poly_ids(
         time_shift_ids,
         poly_ids,
     )
diff --git a/fastcan/narx/_utils.py b/fastcan/narx/_utils.py
index 70b88ca..fd4667a 100644
--- a/fastcan/narx/_utils.py
+++ b/fastcan/narx/_utils.py
@@ -7,7 +7,11 @@
 
 import numpy as np
 from scipy.stats import rankdata
-from sklearn.utils import check_array, check_consistent_length, column_or_1d
+from sklearn.utils import (
+    check_array,
+    check_consistent_length,
+    column_or_1d,
+)
 from sklearn.utils._param_validation import Interval, StrOptions, validate_params
 from sklearn.utils.validation import check_is_fitted
 
@@ -132,6 +136,8 @@ def _get_term_str(term_feat_ids, term_delay_ids):
             Interval(Integral, 1, None, closed="left"),
         ],
         "fit_intercept": ["boolean"],
+        "max_candidates": [None, Interval(Integral, 1, None, closed="left")],
+        "random_state": ["random_state"],
         "include_zero_delay": [None, "array-like"],
         "static_indices": [None, "array-like"],
         "refine_verbose": ["verbose"],
@@ -155,6 +161,8 @@ def make_narx(
     poly_degree=1,
     *,
     fit_intercept=True,
+    max_candidates=None,
+    random_state=None,
     include_zero_delay=None,
     static_indices=None,
     refine_verbose=1,
@@ -186,6 +194,15 @@ def make_narx(
     fit_intercept : bool, default=True
         Whether to fit the intercept. If set to False, intercept will be zeros.
 
+    max_candidates : int, default=None
+        Maximum number of candidate polynomial terms retained before selection.
+        Randomly selected by reservoir sampling.
+        If None, all candidates are considered.
+
+    random_state : int or RandomState instance, default=None
+        Used when `max_candidates` is not None to subsample candidate terms.
+        See :term:`Glossary <random_state>` for details.
+
     include_zero_delay : {None, array-like} of shape (n_features,) default=None
         Whether to include the original (zero-delay) features.
 
@@ -306,6 +323,8 @@ def make_narx(
     poly_ids_all = make_poly_ids(
         time_shift_ids_all.shape[0],
         poly_degree,
+        max_poly=max_candidates,
+        random_state=random_state,
     )
     poly_terms = make_poly_features(time_shift_vars, poly_ids_all)
 
diff --git a/fastcan/narx/tests/test_narx.py b/fastcan/narx/tests/test_narx.py
index 9eb2ef0..177f5c2 100644
--- a/fastcan/narx/tests/test_narx.py
+++ b/fastcan/narx/tests/test_narx.py
@@ -29,10 +29,18 @@ def test_narx_is_sklearn_estimator():
         check_estimator(NARX(), expected_failed_checks=expected_failures)
 
 
-def test_poly_ids():
-    with pytest.raises(ValueError, match=r"The output that would result from the .*"):
+def test_poly_ids(monkeypatch):
+    with pytest.raises(ValueError, match=r"The current configuration would .*"):
         make_poly_ids(10, 1000)
 
+    # Mock combinations_with_replacement to avoid heavy computation
+    monkeypatch.setattr(
+        "fastcan.narx._feature.combinations_with_replacement",
+        lambda *args, **kwargs: iter([[0, 0]]),
+    )
+    with pytest.warns(UserWarning, match=r"Total number of polynomial features .*"):
+        make_poly_ids(18, 10)
+
 
 def test_time_ids():
     with pytest.raises(ValueError, match=r"The length of `include_zero_delay`.*"):
@@ -553,6 +561,57 @@ def test_make_narx_refine_print(capsys):
     assert "No. of iterations: " in captured.out
 
 
+def test_make_narx_max_candidates():
+    """Test max_candidates and random_state in make_narx."""
+    rng = np.random.default_rng(12345)
+    X = rng.random((100, 2))
+    y = rng.random((100, 1))
+    max_delay = 3
+    poly_degree = 10
+    n_terms_to_select = 5
+    max_candidates = 20
+
+    # With the same random_state, the results should be identical
+    narx1 = make_narx(
+        X,
+        y,
+        n_terms_to_select=n_terms_to_select,
+        max_delay=max_delay,
+        poly_degree=poly_degree,
+        max_candidates=max_candidates,
+        random_state=123,
+        verbose=0,
+    )
+    narx2 = make_narx(
+        X,
+        y,
+        n_terms_to_select=n_terms_to_select,
+        max_delay=max_delay,
+        poly_degree=poly_degree,
+        max_candidates=max_candidates,
+        random_state=123,
+        verbose=0,
+    )
+    assert_array_equal(narx1.feat_ids, narx2.feat_ids)
+    assert_array_equal(narx1.delay_ids, narx2.delay_ids)
+
+    # With different random_state, the results should be different
+    narx3 = make_narx(
+        X,
+        y,
+        n_terms_to_select=n_terms_to_select,
+        max_delay=max_delay,
+        poly_degree=poly_degree,
+        max_candidates=max_candidates,
+        random_state=456,
+        verbose=0,
+    )
+    assert not np.array_equal(narx1.feat_ids, narx3.feat_ids)
+
+    # Check if number of selected terms is correct
+    assert narx1.feat_ids.shape[0] == n_terms_to_select
+
+
 @pytest.mark.parametrize("max_delay", [1, 3, 7, 10])
 def test_nan_split(max_delay):
     n_sessions = 10
diff --git a/pixi.lock b/pixi.lock
index d56ed82..c837ddc 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -8351,7 +8351,7 @@ packages:
 - pypi: ./
   name: fastcan
   version: 0.4.1
-  sha256: 4cba5e10ba2470a292c43ba40897b2aa523bcd0d4d7ed2979ac6bd5dd81f4ce8
+  sha256: 0c3bd4756f12a17bb430db6172b64124f1833e15085fa57c9d7e53801d2f30b5
   requires_dist:
   - scikit-learn>=1.7.0,!=1.7.1
   requires_python: '>=3.10'
diff --git a/pyproject.toml b/pyproject.toml
index c7091c5..36026ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -143,8 +143,8 @@ asv-publish = { cmd = "python -m asv publish", cwd = "asv_benchmarks" }
 asv-preview = { cmd = "python -m asv preview", cwd = "asv_benchmarks", depends-on = ["asv-publish"] }
 
 [tool.pixi.feature.test.tasks]
-test = "pytest ./tests ./fastcan/narx/tests"
-test-coverage = { cmd = "rm -rf .coverage && pytest --cov-report {{ FMT }} --cov={{ PACKAGE }} .", args = [{ arg = "PACKAGE", default = "fastcan" }, { arg = "FMT", default = "html" }] }
+test = "pytest"
+test-coverage = { cmd = "rm -rf .coverage && pytest --cov-report {{ FMT }} --cov={{ PACKAGE }}", args = [{ arg = "PACKAGE", default = "fastcan" }, { arg = "FMT", default = "html" }] }
 
 [tool.pixi.feature.build.tasks]
 build-wheel = "rm -rf dist && python -m build -wnx -Cinstall-args=--tags=runtime,python-runtime,devel"
@@ -192,6 +192,12 @@ static = { features = ["static"], no-default-feature = true }
 nogil = { features = ["nogil"], no-default-feature = true }
 wasm = { features = ["wasm"], no-default-feature = true }
 
+[tool.pytest.ini_options]
+testpaths = [ 
+    "./tests",
+    "./fastcan/narx/tests",
+]
+
 [tool.coverage.run]
 omit = ["**/tests/*"]
 

From 421b775577138c6e91fe110169fb12373841b586 Mon Sep 17 00:00:00 2001
From: sikai zhang <matthew.szhang91@gmail.com>
Date: Tue, 14 Oct 2025 15:32:27 +0800
Subject: [PATCH 2/2] fix codecov

---
 pixi.lock      | 2 +-
 pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pixi.lock b/pixi.lock
index c837ddc..41ec6cb 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -8351,7 +8351,7 @@ packages:
 - pypi: ./
   name: fastcan
   version: 0.4.1
-  sha256: 0c3bd4756f12a17bb430db6172b64124f1833e15085fa57c9d7e53801d2f30b5
+  sha256: 07bc539901f32163cadb6d96549d0b169fcc4577e48a210c96fc82b04e953309
   requires_dist:
   - scikit-learn>=1.7.0,!=1.7.1
   requires_python: '>=3.10'
diff --git a/pyproject.toml b/pyproject.toml
index 36026ae..068cf12 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -144,7 +144,7 @@ asv-preview = { cmd = "python -m asv preview", cwd = "asv_benchmarks", depends-o
 
 [tool.pixi.feature.test.tasks]
 test = "pytest"
-test-coverage = { cmd = "rm -rf .coverage && pytest --cov-report {{ FMT }} --cov={{ PACKAGE }}", args = [{ arg = "PACKAGE", default = "fastcan" }, { arg = "FMT", default = "html" }] }
+test-coverage = { cmd = "rm -rf .coverage && pytest --cov-report {{ FMT }} --cov={{ PACKAGE }}", args = [{ arg = "FMT", default = "html" }, { arg = "PACKAGE", default = "fastcan" }] }
 
 [tool.pixi.feature.build.tasks]
 build-wheel = "rm -rf dist && python -m build -wnx -Cinstall-args=--tags=runtime,python-runtime,devel"