Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/emscripten.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
steps:
- uses: actions/checkout@v5
- name: Build WASM wheel
uses: pypa/[email protected].0
uses: pypa/[email protected].1
env:
CIBW_PLATFORM: pyodide
- name: Upload package
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
steps:
- uses: actions/checkout@v5
- name: Build wheels
uses: pypa/[email protected].0
uses: pypa/[email protected].1
env:
CIBW_SKIP: "*_i686 *_ppc64le *_s390x *_universal2 *-musllinux_* cp314t*"
CIBW_PROJECT_REQUIRES_PYTHON: ">=3.10"
Expand Down
60 changes: 45 additions & 15 deletions fastcan/narx/_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# SPDX-License-Identifier: MIT

import math
import warnings
from itertools import combinations_with_replacement
from numbers import Integral

Expand Down Expand Up @@ -198,12 +199,16 @@ def make_poly_features(X, ids):
None,
Interval(Integral, 1, None, closed="left"),
],
"max_poly": [None, Interval(Integral, 1, None, closed="left")],
"random_state": ["random_state"],
},
prefer_skip_nested_validation=True,
)
def make_poly_ids(
n_features=1,
degree=1,
max_poly=None,
random_state=None,
):
"""Generate ids for polynomial features.
(variable_index, variable_index, ...)
Expand All @@ -217,6 +222,15 @@ def make_poly_ids(
degree : int, default=1
The maximum degree of polynomial features.

max_poly : int, default=None
Maximum number of ids of polynomial features to generate.
Randomly selected by reservoir sampling.
If None, all possible ids are returned.

random_state : int or RandomState instance, default=None
Used when `max_poly` is not None to subsample ids of polynomial features.
See :term:`Glossary <random_state>` for details.

Returns
-------
ids : array-like of shape (n_outputs, degree)
Expand All @@ -236,29 +250,45 @@ def make_poly_ids(
[1, 2, 2],
[2, 2, 2]])
"""
n_outputs = math.comb(n_features + degree, degree) - 1
if n_outputs > np.iinfo(np.intp).max:
n_total = math.comb(n_features + degree, degree) - 1
if n_total > np.iinfo(np.intp).max:
msg = (
"The output that would result from the current configuration would"
f" have {n_outputs} features which is too large to be"
f" indexed by {np.intp().dtype.name}."
"The current configuration would "
f"result in {n_total} features which is too large to be "
f"indexed by {np.intp().dtype.name}."
)
raise ValueError(msg)

ids = np.array(
list(
combinations_with_replacement(
range(n_features + 1),
degree,
)
if n_total > 10_000_000:
warnings.warn(
"Total number of polynomial features is larger than 10,000,000! "
f"The current configuration would result in {n_total} features. "
"This may take a while.",
UserWarning,
)
if max_poly is not None and max_poly < n_total:
# reservoir sampling
rng = np.random.default_rng(random_state)
reservoir = []
for i, comb in enumerate(
combinations_with_replacement(range(n_features + 1), degree)
):
if i < max_poly:
reservoir.append(comb)
else:
j = rng.integers(0, i + 1)
if j < max_poly:
reservoir[j] = comb
ids = np.array(reservoir)
else:
ids = np.array(
list(combinations_with_replacement(range(n_features + 1), degree))
)
)

const_id = np.where((ids == 0).all(axis=1))
return np.delete(ids, const_id, 0) # remove the constant feature


def _valiate_time_shift_poly_ids(
def _validate_time_shift_poly_ids(
time_shift_ids, poly_ids, n_samples=None, n_features=None, n_outputs=None
):
if n_samples is None:
Expand Down Expand Up @@ -496,7 +526,7 @@ def tp2fd(time_shift_ids, poly_ids):
[[-1 1]
[ 2 3]]
"""
_time_shift_ids, _poly_ids = _valiate_time_shift_poly_ids(
_time_shift_ids, _poly_ids = _validate_time_shift_poly_ids(
time_shift_ids,
poly_ids,
)
Expand Down
21 changes: 20 additions & 1 deletion fastcan/narx/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@

import numpy as np
from scipy.stats import rankdata
from sklearn.utils import check_array, check_consistent_length, column_or_1d
from sklearn.utils import (
check_array,
check_consistent_length,
column_or_1d,
)
from sklearn.utils._param_validation import Interval, StrOptions, validate_params
from sklearn.utils.validation import check_is_fitted

Expand Down Expand Up @@ -132,6 +136,8 @@ def _get_term_str(term_feat_ids, term_delay_ids):
Interval(Integral, 1, None, closed="left"),
],
"fit_intercept": ["boolean"],
"max_candidates": [None, Interval(Integral, 1, None, closed="left")],
"random_state": ["random_state"],
"include_zero_delay": [None, "array-like"],
"static_indices": [None, "array-like"],
"refine_verbose": ["verbose"],
Expand All @@ -155,6 +161,8 @@ def make_narx(
poly_degree=1,
*,
fit_intercept=True,
max_candidates=None,
random_state=None,
include_zero_delay=None,
static_indices=None,
refine_verbose=1,
Expand Down Expand Up @@ -186,6 +194,15 @@ def make_narx(
fit_intercept : bool, default=True
Whether to fit the intercept. If set to False, intercept will be zeros.

max_candidates : int, default=None
Maximum number of candidate polynomial terms retained before selection.
Randomly selected by reservoir sampling.
If None, all candidates are considered.

random_state : int or RandomState instance, default=None
Used when `max_candidates` is not None to subsample candidate terms.
See :term:`Glossary <random_state>` for details.

include_zero_delay : {None, array-like} of shape (n_features,) default=None
Whether to include the original (zero-delay) features.

Expand Down Expand Up @@ -306,6 +323,8 @@ def make_narx(
poly_ids_all = make_poly_ids(
time_shift_ids_all.shape[0],
poly_degree,
max_poly=max_candidates,
random_state=random_state,
)
poly_terms = make_poly_features(time_shift_vars, poly_ids_all)

Expand Down
63 changes: 61 additions & 2 deletions fastcan/narx/tests/test_narx.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,18 @@ def test_narx_is_sklearn_estimator():
check_estimator(NARX(), expected_failed_checks=expected_failures)


def test_poly_ids():
with pytest.raises(ValueError, match=r"The output that would result from the .*"):
def test_poly_ids(monkeypatch):
with pytest.raises(ValueError, match=r"The current configuration would .*"):
make_poly_ids(10, 1000)

# Mock combinations_with_replacement to avoid heavy computation
monkeypatch.setattr(
"fastcan.narx._feature.combinations_with_replacement",
lambda *args, **kwargs: iter([[0, 0]]),
)
with pytest.warns(UserWarning, match=r"Total number of polynomial features .*"):
make_poly_ids(18, 10)


def test_time_ids():
with pytest.raises(ValueError, match=r"The length of `include_zero_delay`.*"):
Expand Down Expand Up @@ -553,6 +561,57 @@ def test_make_narx_refine_print(capsys):
assert "No. of iterations: " in captured.out


def test_make_narx_max_candidates():
"""Test max_candidates and random_state in make_narx."""
rng = np.random.default_rng(12345)
X = rng.random((100, 2))
y = rng.random((100, 1))
max_delay = 3
poly_degree = 10
n_terms_to_select = 5
max_candidates = 20

# With the same random_state, the results should be identical
narx1 = make_narx(
X,
y,
n_terms_to_select=n_terms_to_select,
max_delay=max_delay,
poly_degree=poly_degree,
max_candidates=max_candidates,
random_state=123,
verbose=0,
)
narx2 = make_narx(
X,
y,
n_terms_to_select=n_terms_to_select,
max_delay=max_delay,
poly_degree=poly_degree,
max_candidates=max_candidates,
random_state=123,
verbose=0,
)
assert_array_equal(narx1.feat_ids, narx2.feat_ids)
assert_array_equal(narx1.delay_ids, narx2.delay_ids)

# With different random_state, the results should be different
narx3 = make_narx(
X,
y,
n_terms_to_select=n_terms_to_select,
max_delay=max_delay,
poly_degree=poly_degree,
max_candidates=max_candidates,
random_state=456,
verbose=0,
)
assert not np.array_equal(narx1.feat_ids, narx3.feat_ids)

# Check if number of selected terms is correct
assert narx1.feat_ids.shape[0] == n_terms_to_select


@pytest.mark.parametrize("max_delay", [1, 3, 7, 10])
def test_nan_split(max_delay):
n_sessions = 10
Expand Down
2 changes: 1 addition & 1 deletion pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ asv-publish = { cmd = "python -m asv publish", cwd = "asv_benchmarks" }
asv-preview = { cmd = "python -m asv preview", cwd = "asv_benchmarks", depends-on = ["asv-publish"] }

[tool.pixi.feature.test.tasks]
test = "pytest ./tests ./fastcan/narx/tests"
test-coverage = { cmd = "rm -rf .coverage && pytest --cov-report {{ FMT }} --cov={{ PACKAGE }} .", args = [{ arg = "PACKAGE", default = "fastcan" }, { arg = "FMT", default = "html" }] }
test = "pytest"
test-coverage = { cmd = "rm -rf .coverage && pytest --cov-report {{ FMT }} --cov={{ PACKAGE }}", args = [{ arg = "FMT", default = "html" }, { arg = "PACKAGE", default = "fastcan" }] }

[tool.pixi.feature.build.tasks]
build-wheel = "rm -rf dist && python -m build -wnx -Cinstall-args=--tags=runtime,python-runtime,devel"
Expand Down Expand Up @@ -192,6 +192,12 @@ static = { features = ["static"], no-default-feature = true }
nogil = { features = ["nogil"], no-default-feature = true }
wasm = { features = ["wasm"], no-default-feature = true }

[tool.pytest.ini_options]
testpaths = [
"./tests",
"./fastcan/narx/tests",
]

[tool.coverage.run]
omit = ["**/tests/*"]

Expand Down