Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fit parameters passing #391

Merged
merged 22 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c6bd80a
ADD: fit parameters in fit_estimator function signature
sami-ka Dec 28, 2023
a4bc069
ADD: fit parameters to the fit method in EnsembleEstimator abstract c…
sami-ka Dec 28, 2023
4ffd18d
ADD: fit parameters to the _fit_oof_estimator and fit method of the E…
sami-ka Dec 28, 2023
6ef5fbd
ADD: fit parameters to the fit method of the MapieRegressor class
sami-ka Dec 28, 2023
77c73e0
ADD: fit parameters to the fit method of the MapieQuantileRegressor c…
sami-ka Dec 28, 2023
31a0515
ADD: fit parameters to the fit method of the MapieTimeSeriesRegressor…
sami-ka Dec 28, 2023
66f44b2
ADD: fit parameters to the _fit_and_predict_oof_model and fit method …
sami-ka Dec 28, 2023
0533878
ADD: fit parameters to the fit method of the MapieCalibrator class
sami-ka Dec 28, 2023
377767d
UPD: authors list and changelog
sami-ka Dec 28, 2023
4f24c65
FORMAT: Flake8 linting
sami-ka Dec 28, 2023
c5066a7
BUILD(DEV): specified mypy version not compatible with python 3.10.7
sami-ka Dec 28, 2023
5b6cb1d
ADD: test of passing fit parameters
sami-ka Dec 29, 2023
13b6f3e
FIX: Test number of boosting iterations for all estimators
sami-ka Dec 29, 2023
2b2c7d6
ADD: tests for calibration, classification and regression
sami-ka Jan 3, 2024
53cc84c
STYLE: formatting
sami-ka Jan 3, 2024
2aa5ebc
DOCS: Add description for fit parameters passing test
sami-ka Jan 3, 2024
8073ad4
REFACTOR: reorganize elements used in fit parameters test
sami-ka Jan 3, 2024
bf51c92
Merge branch 'master' into 212-add-fit-params
sami-ka Jan 4, 2024
a415ce2
FIX: Merge commit mistake
sami-ka Jan 4, 2024
47eb40a
FIX: History merge
sami-ka Jan 4, 2024
1df8633
STYLE: function header one liner
sami-ka Jan 9, 2024
a4adcb6
Merge branch 'master' into 212-add-fit-params
sami-ka Jan 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ Contributors
* Sofiane Ziane <[email protected]>
* Rafael Saraiva <[email protected]>
* Mehdi Elion <[email protected]>
* Sami Kaddani <[email protected]>
To be continued ...
1 change: 1 addition & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ History
* Add (extend) the optimal estimation strategy for the bounds of the prediction intervals for regression via ConformityScore.
* Add new checks for metrics calculations.
* Fix reference for residual normalised score in documentation.
* Add possibility of passing fit parameters used by estimators.


0.7.0 (2023-09-14)
Expand Down
5 changes: 4 additions & 1 deletion mapie/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ def fit(
random_state: Optional[Union[int, np.random.RandomState, None]] = None,
shuffle: Optional[bool] = True,
stratify: Optional[ArrayLike] = None,
**fit_params,
) -> MapieCalibrator:
"""
Calibrate the estimator on given datasets, according to the chosen
Expand Down Expand Up @@ -466,6 +467,8 @@ def fit(
See ``sklearn.model_selection.train_test_split`` documentation.
If not ``None``, data is split in a stratified fashion, using this
as the class label.
**fit_params : dict
Additional fit parameters.

Returns
-------
Expand Down Expand Up @@ -509,7 +512,7 @@ def fit(
y_train
)
estimator = fit_estimator(
clone(estimator), X_train, y_train, sw_train,
clone(estimator), X_train, y_train, sw_train, **fit_params,
)
self.single_estimator_ = estimator
self.classes_ = self.single_estimator_.classes_
Expand Down
17 changes: 14 additions & 3 deletions mapie/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,7 @@ def _fit_and_predict_oof_model(
val_index: ArrayLike,
k: int,
sample_weight: Optional[ArrayLike] = None,
**fit_params,
) -> Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike]:
"""
Fit a single out-of-fold model on a given training set and
Expand Down Expand Up @@ -615,6 +616,9 @@ def _fit_and_predict_oof_model(
Sample weights. If None, then samples are equally weighted.
By default None.

**fit_params : dict
Additional fit parameters.

Returns
-------
Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike]
Expand All @@ -633,11 +637,13 @@ def _fit_and_predict_oof_model(
y_val = _safe_indexing(y, val_index)

if sample_weight is None:
estimator = fit_estimator(estimator, X_train, y_train)
estimator = fit_estimator(
estimator, X_train, y_train, **fit_params
)
else:
sample_weight_train = _safe_indexing(sample_weight, train_index)
estimator = fit_estimator(
estimator, X_train, y_train, sample_weight_train
estimator, X_train, y_train, sample_weight_train, **fit_params
)
if _num_samples(X_val) > 0:
y_pred_proba = self._predict_oof_model(estimator, X_val)
Expand Down Expand Up @@ -1047,6 +1053,7 @@ def fit(
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
size_raps: Optional[float] = .2,
**fit_params,
) -> MapieClassifier:
"""
Fit the base estimator or use the fitted base estimator.
Expand Down Expand Up @@ -1074,6 +1081,9 @@ def fit(

By default ``.2``.

**fit_params : dict
Additional fit parameters.


Returns
-------
Expand Down Expand Up @@ -1147,7 +1157,7 @@ def fit(
else:
cv = cast(BaseCrossValidator, cv)
self.single_estimator_ = fit_estimator(
clone(estimator), X, y, sample_weight
clone(estimator), X, y, sample_weight, **fit_params
)
y_pred_proba = np.empty(
(n_samples, self.n_classes_),
Expand All @@ -1162,6 +1172,7 @@ def fit(
val_index,
k,
sample_weight,
**fit_params,
)
for k, (train_index, val_index) in enumerate(cv.split(X))
)
Expand Down
28 changes: 25 additions & 3 deletions mapie/estimator/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def _fit_oof_estimator(
y: ArrayLike,
train_index: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
**fit_params,
) -> RegressorMixin:
"""
Fit a single out-of-fold model on a given training set.
Expand All @@ -204,6 +205,9 @@ def _fit_oof_estimator(
Sample weights. If None, then samples are equally weighted.
By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
RegressorMixin
Expand All @@ -216,7 +220,11 @@ def _fit_oof_estimator(
sample_weight = cast(NDArray, sample_weight)

estimator = fit_estimator(
estimator, X_train, y_train, sample_weight=sample_weight
estimator,
X_train,
y_train,
sample_weight=sample_weight,
**fit_params
)
return estimator

Expand Down Expand Up @@ -377,6 +385,7 @@ def fit(
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
**fit_params,
) -> EnsembleRegressor:
"""
Fit the base estimator under the ``single_estimator_`` attribute.
Expand All @@ -397,6 +406,9 @@ def fit(
Sample weights. If None, then samples are equally weighted.
By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
EnsembleRegressor
Expand All @@ -419,7 +431,12 @@ def fit(
)
else:
single_estimator_ = self._fit_oof_estimator(
clone(estimator), X, y, full_indexes, sample_weight
clone(estimator),
X,
y,
full_indexes,
sample_weight,
**fit_params
)
cv = cast(BaseCrossValidator, cv)
self.k_ = np.full(
Expand All @@ -432,7 +449,12 @@ def fit(
else:
estimators_ = Parallel(self.n_jobs, verbose=self.verbose)(
delayed(self._fit_oof_estimator)(
clone(estimator), X, y, train_index, sample_weight
clone(estimator),
X,
y,
train_index,
sample_weight,
**fit_params
)
for train_index, _ in cv.split(X)
)
Expand Down
4 changes: 4 additions & 0 deletions mapie/estimator/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def fit(
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
**fit_params
) -> EnsembleEstimator:
"""
Fit the base estimator under the ``single_estimator_`` attribute.
Expand All @@ -41,6 +42,9 @@ def fit(
Sample weights. If None, then samples are equally weighted.
By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
EnsembleRegressor
Expand Down
12 changes: 10 additions & 2 deletions mapie/regression/quantile_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ def fit(
random_state: Optional[Union[int, np.random.RandomState]] = None,
shuffle: Optional[bool] = True,
stratify: Optional[ArrayLike] = None,
**fit_params,
) -> MapieQuantileRegressor:
"""
Fit estimator and compute residuals used for prediction intervals.
Expand Down Expand Up @@ -533,6 +534,9 @@ def fit(

By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
MapieQuantileRegressor
Expand Down Expand Up @@ -609,8 +613,12 @@ def fit(
else:
cloned_estimator_.set_params(**params)
self.estimators_.append(fit_estimator(
cloned_estimator_, X_train, y_train, sample_weight_train
))
cloned_estimator_,
X_train,
y_train,
sample_weight_train,
**fit_params,
))
y_calib_preds[i] = self.estimators_[-1].predict(X_calib)
self.single_estimator_ = self.estimators_[2]

Expand Down
8 changes: 7 additions & 1 deletion mapie/regression/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ def fit(
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
**fit_params,
) -> MapieRegressor:
"""
Fit estimator and compute conformity scores used for
Expand Down Expand Up @@ -482,6 +483,9 @@ def fit(

By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
MapieRegressor
Expand All @@ -507,7 +511,9 @@ def fit(
self.verbose
)
# Fit the prediction function
self.estimator_ = self.estimator_.fit(X, y, sample_weight)
self.estimator_ = self.estimator_.fit(
X, y, sample_weight, **fit_params
)

# Predict on calibration data
y_pred = self.estimator_.predict_calib(X)
Expand Down
6 changes: 5 additions & 1 deletion mapie/regression/time_series_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def fit(
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
ensemble: bool = False,
**fit_params,
) -> MapieTimeSeriesRegressor:
"""
Compared to the method ``fit`` of ``MapieRegressor``, the ``fit``
Expand Down Expand Up @@ -139,12 +140,15 @@ def fit(

By default ``False``.

**fit_params : dict
Additional fit parameters.

Returns
-------
MapieTimeSeriesRegressor
The model itself.
"""
self = super().fit(X=X, y=y, sample_weight=sample_weight)
self = super().fit(X=X, y=y, sample_weight=sample_weight, **fit_params)
self.conformity_scores_ = self._relative_conformity_scores(
X, y, ensemble=ensemble
)
Expand Down
34 changes: 34 additions & 0 deletions mapie/tests/test_quantile_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@
random_state=random_state
)


def early_stopping_monitor(i, est, locals):
"""Returns True on the 3rd iteration."""
if i == 2:
return True
else:
return False


X, y = make_regression(
n_samples=500,
n_features=10,
Expand Down Expand Up @@ -762,3 +771,28 @@ def test_consistent_class() -> None:
np.testing.assert_allclose(y_pis_1[:, 0, 0], y_pis_2[:, 0, 0])
np.testing.assert_allclose(y_pis_1[:, 1, 0], y_pis_2[:, 1, 0])
np.testing.assert_allclose(y_pred_1, y_pred_2)


@pytest.mark.parametrize("strategy", [*STRATEGIES])
def test_fit_parameters_passing(
strategy: str,
) -> None:
"""
Test passing fit parameters, here early stopping at iteration 3.
"""
mapie = MapieQuantileRegressor(
estimator=gb,
**STRATEGIES[strategy]
)

mapie.fit(
X_train,
y_train,
X_calib=X_calib,
y_calib=y_calib,
sample_weight=None,
monitor=early_stopping_monitor
)

for estimator in mapie.estimators_:
assert estimator.estimators_.shape[0] == 3
8 changes: 6 additions & 2 deletions mapie/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def fit_estimator(
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[NDArray] = None,
**fit_params,
) -> Union[RegressorMixin, ClassifierMixin]:
"""
Fit an estimator on training data by distinguishing two cases:
Expand All @@ -103,6 +104,9 @@ def fit_estimator(
Sample weights. If None, then samples are equally weighted.
By default None.

**fit_params : dict
Additional fit parameters.

Returns
-------
RegressorMixin
Expand All @@ -122,9 +126,9 @@ def fit_estimator(
fit_parameters = signature(estimator.fit).parameters
supports_sw = "sample_weight" in fit_parameters
if supports_sw and sample_weight is not None:
estimator.fit(X, y, sample_weight=sample_weight)
estimator.fit(X, y, sample_weight=sample_weight, **fit_params)
else:
estimator.fit(X, y)
estimator.fit(X, y, **fit_params)
return estimator


Expand Down
2 changes: 1 addition & 1 deletion requirements.dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ bump2version==1.0.1
flake8==4.0.1
ipykernel==6.9.0
jupyter==1.0.0
mypy==0.941
mypy==0.981
numpy==1.22.3
numpydoc==1.1.0
pandas==1.3.5
Expand Down