Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fit parameters passing #391

Merged
merged 22 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c6bd80a
ADD: fit parameters in fit_estimator function signature
sami-ka Dec 28, 2023
a4bc069
ADD: fit parameters to the fit method in EnsembleEstimator abstract c…
sami-ka Dec 28, 2023
4ffd18d
ADD: fit parameters to the _fit_oof_estimator and fit method of the E…
sami-ka Dec 28, 2023
6ef5fbd
ADD: fit parameters to the fit method of the MapieRegressor class
sami-ka Dec 28, 2023
77c73e0
ADD: fit parameters to the fit method of the MapieQuantileRegressor c…
sami-ka Dec 28, 2023
31a0515
ADD: fit parameters to the fit method of the MapieTimeSeriesRegressor…
sami-ka Dec 28, 2023
66f44b2
ADD: fit parameters to the _fit_and_predict_oof_model and fit method …
sami-ka Dec 28, 2023
0533878
ADD: fit parameters to the fit method of the MapieCalibrator class
sami-ka Dec 28, 2023
377767d
UPD: authors list and changelog
sami-ka Dec 28, 2023
4f24c65
FORMAT: Flake8 linting
sami-ka Dec 28, 2023
c5066a7
BUILD(DEV): specified mypy version not compatible with python 3.10.7
sami-ka Dec 28, 2023
5b6cb1d
ADD: test of passing fit parameters
sami-ka Dec 29, 2023
13b6f3e
FIX: Test number of boosting iterations for all estimators
sami-ka Dec 29, 2023
2b2c7d6
ADD: tests for calibration, classification and regression
sami-ka Jan 3, 2024
53cc84c
STYLE: formatting
sami-ka Jan 3, 2024
2aa5ebc
DOCS: Add description for fit parameters passing test
sami-ka Jan 3, 2024
8073ad4
REFACTOR: reorganize elements used in fit parameters test
sami-ka Jan 3, 2024
bf51c92
Merge branch 'master' into 212-add-fit-params
sami-ka Jan 4, 2024
a415ce2
FIX: Merge commit mistake
sami-ka Jan 4, 2024
47eb40a
FIX: History merge
sami-ka Jan 4, 2024
1df8633
STYLE: function header one liner
sami-ka Jan 9, 2024
a4adcb6
Merge branch 'master' into 212-add-fit-params
sami-ka Jan 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ Contributors
* Arthur Phan <[email protected]>
* Rafael Saraiva <[email protected]>
* Mehdi Elion <[email protected]>

* Sami Kaddani <[email protected]>
To be continued ...
1 change: 1 addition & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ History

##### (##########)
------------------
* Add possibility of passing fit parameters used by estimators.

0.8.0 (2024-01-03)
------------------
Expand Down
5 changes: 4 additions & 1 deletion mapie/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ def fit(
random_state: Optional[Union[int, np.random.RandomState, None]] = None,
shuffle: Optional[bool] = True,
stratify: Optional[ArrayLike] = None,
**fit_params,
) -> MapieCalibrator:
"""
Calibrate the estimator on given datasets, according to the chosen
Expand Down Expand Up @@ -466,6 +467,8 @@ def fit(
See ``sklearn.model_selection.train_test_split`` documentation.
If not ``None``, data is split in a stratified fashion, using this
as the class label.
**fit_params : dict
Additional fit parameters.

Returns
-------
Expand Down Expand Up @@ -509,7 +512,7 @@ def fit(
y_train
)
estimator = fit_estimator(
clone(estimator), X_train, y_train, sw_train,
clone(estimator), X_train, y_train, sw_train, **fit_params,
)
self.single_estimator_ = estimator
self.classes_ = self.single_estimator_.classes_
Expand Down
17 changes: 14 additions & 3 deletions mapie/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,7 @@ def _fit_and_predict_oof_model(
val_index: ArrayLike,
k: int,
sample_weight: Optional[ArrayLike] = None,
**fit_params,
) -> Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike]:
"""
Fit a single out-of-fold model on a given training set and
Expand Down Expand Up @@ -615,6 +616,9 @@ def _fit_and_predict_oof_model(
Sample weights. If None, then samples are equally weighted.
By default None.

**fit_params : dict
Additional fit parameters.

Returns
-------
Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike]
Expand All @@ -633,11 +637,13 @@ def _fit_and_predict_oof_model(
y_val = _safe_indexing(y, val_index)

if sample_weight is None:
estimator = fit_estimator(estimator, X_train, y_train)
estimator = fit_estimator(
estimator, X_train, y_train, **fit_params
)
else:
sample_weight_train = _safe_indexing(sample_weight, train_index)
estimator = fit_estimator(
estimator, X_train, y_train, sample_weight_train
estimator, X_train, y_train, sample_weight_train, **fit_params
)
if _num_samples(X_val) > 0:
y_pred_proba = self._predict_oof_model(estimator, X_val)
Expand Down Expand Up @@ -1047,6 +1053,7 @@ def fit(
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
size_raps: Optional[float] = .2,
**fit_params,
) -> MapieClassifier:
"""
Fit the base estimator or use the fitted base estimator.
Expand Down Expand Up @@ -1074,6 +1081,9 @@ def fit(

By default ``.2``.

**fit_params : dict
Additional fit parameters.


Returns
-------
Expand Down Expand Up @@ -1147,7 +1157,7 @@ def fit(
else:
cv = cast(BaseCrossValidator, cv)
self.single_estimator_ = fit_estimator(
clone(estimator), X, y, sample_weight
clone(estimator), X, y, sample_weight, **fit_params
)
y_pred_proba = np.empty(
(n_samples, self.n_classes_),
Expand All @@ -1162,6 +1172,7 @@ def fit(
val_index,
k,
sample_weight,
**fit_params,
)
for k, (train_index, val_index) in enumerate(cv.split(X))
)
Expand Down
28 changes: 25 additions & 3 deletions mapie/estimator/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def _fit_oof_estimator(
y: ArrayLike,
train_index: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
**fit_params,
) -> RegressorMixin:
"""
Fit a single out-of-fold model on a given training set.
Expand All @@ -204,6 +205,9 @@ def _fit_oof_estimator(
Sample weights. If None, then samples are equally weighted.
By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
RegressorMixin
Expand All @@ -216,7 +220,11 @@ def _fit_oof_estimator(
sample_weight = cast(NDArray, sample_weight)

estimator = fit_estimator(
estimator, X_train, y_train, sample_weight=sample_weight
estimator,
X_train,
y_train,
sample_weight=sample_weight,
**fit_params
)
return estimator

Expand Down Expand Up @@ -377,6 +385,7 @@ def fit(
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
**fit_params,
) -> EnsembleRegressor:
"""
Fit the base estimator under the ``single_estimator_`` attribute.
Expand All @@ -397,6 +406,9 @@ def fit(
Sample weights. If None, then samples are equally weighted.
By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
EnsembleRegressor
Expand All @@ -419,7 +431,12 @@ def fit(
)
else:
single_estimator_ = self._fit_oof_estimator(
clone(estimator), X, y, full_indexes, sample_weight
clone(estimator),
X,
y,
full_indexes,
sample_weight,
**fit_params
)
cv = cast(BaseCrossValidator, cv)
self.k_ = np.full(
Expand All @@ -432,7 +449,12 @@ def fit(
else:
estimators_ = Parallel(self.n_jobs, verbose=self.verbose)(
delayed(self._fit_oof_estimator)(
clone(estimator), X, y, train_index, sample_weight
clone(estimator),
X,
y,
train_index,
sample_weight,
**fit_params
)
for train_index, _ in cv.split(X)
)
Expand Down
4 changes: 4 additions & 0 deletions mapie/estimator/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def fit(
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
**fit_params
) -> EnsembleEstimator:
"""
Fit the base estimator under the ``single_estimator_`` attribute.
Expand All @@ -41,6 +42,9 @@ def fit(
Sample weights. If None, then samples are equally weighted.
By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
EnsembleRegressor
Expand Down
13 changes: 11 additions & 2 deletions mapie/regression/quantile_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ def fit(
random_state: Optional[Union[int, np.random.RandomState]] = None,
shuffle: Optional[bool] = True,
stratify: Optional[ArrayLike] = None,
**fit_params,
) -> MapieQuantileRegressor:
"""
Fit estimator and compute residuals used for prediction intervals.
Expand Down Expand Up @@ -533,6 +534,9 @@ def fit(

By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
MapieQuantileRegressor
Expand Down Expand Up @@ -609,8 +613,13 @@ def fit(
else:
cloned_estimator_.set_params(**params)
self.estimators_.append(fit_estimator(
cloned_estimator_, X_train, y_train, sample_weight_train
))
cloned_estimator_,
X_train,
y_train,
sample_weight_train,
**fit_params,
)
)
y_calib_preds[i] = self.estimators_[-1].predict(X_calib)
thibaultcordier marked this conversation as resolved.
Show resolved Hide resolved
self.single_estimator_ = self.estimators_[2]

Expand Down
8 changes: 7 additions & 1 deletion mapie/regression/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ def fit(
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
**fit_params,
) -> MapieRegressor:
"""
Fit estimator and compute conformity scores used for
Expand Down Expand Up @@ -484,6 +485,9 @@ def fit(

By default ``None``.

**fit_params : dict
Additional fit parameters.

Returns
-------
MapieRegressor
Expand All @@ -509,7 +513,9 @@ def fit(
self.verbose
)
# Fit the prediction function
self.estimator_ = self.estimator_.fit(X, y, sample_weight)
self.estimator_ = self.estimator_.fit(
X, y, sample_weight, **fit_params
)

# Predict on calibration data
y_pred = self.estimator_.predict_calib(X)
Expand Down
24 changes: 23 additions & 1 deletion mapie/tests/test_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sklearn.calibration import _SigmoidCalibration
from sklearn.compose import ColumnTransformer
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
Expand Down Expand Up @@ -461,3 +461,25 @@ def test_pipeline_compatibility() -> None:
mapie = MapieCalibrator(estimator=pipe)
mapie.fit(X, y)
mapie.predict(X)


def test_fit_parameters_passing() -> None:
"""
Test passing fit parameters, here early stopping at iteration 3.
Checks that underlying GradientBoosting estimators have used 3 iterations
only during boosting, instead of default value for n_estimators (=100).
"""
gb = GradientBoostingClassifier(random_state=random_state)

mapie = MapieCalibrator(estimator=gb)
thibaultcordier marked this conversation as resolved.
Show resolved Hide resolved

def early_stopping_monitor(i, est, locals):
"""Returns True on the 3rd iteration."""
if i == 2:
return True
else:
return False

mapie.fit(X, y, monitor=early_stopping_monitor)

assert mapie.single_estimator_.estimators_.shape[0] == 3
28 changes: 28 additions & 0 deletions mapie/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sklearn.compose import ColumnTransformer
from sklearn.datasets import make_classification
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, LeaveOneOut, ShuffleSplit
Expand Down Expand Up @@ -1957,3 +1958,30 @@ def test_deprecated_method_warning(method: str) -> None:
DeprecationWarning, match=r".*WARNING: Deprecated method.*"
):
mapie_clf.fit(X_toy, y_toy)


def test_fit_parameters_passing() -> None:
"""
Test passing fit parameters, here early stopping at iteration 3.
Checks that underlying GradientBoosting estimators have used 3 iterations
only during boosting, instead of default value for n_estimators (=100).
"""
gb = GradientBoostingClassifier(random_state=random_state)

mapie = MapieClassifier(
estimator=gb, method="aps", random_state=random_state
)

def early_stopping_monitor(i, est, locals):
"""Returns True on the 3rd iteration."""
if i == 2:
return True
else:
return False

mapie.fit(X, y, monitor=early_stopping_monitor)

assert mapie.single_estimator_.estimators_.shape[0] == 3

for estimator in mapie.estimators_:
assert estimator.estimators_.shape[0] == 3
29 changes: 29 additions & 0 deletions mapie/tests/test_quantile_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,3 +762,32 @@ def test_consistent_class() -> None:
np.testing.assert_allclose(y_pis_1[:, 0, 0], y_pis_2[:, 0, 0])
np.testing.assert_allclose(y_pis_1[:, 1, 0], y_pis_2[:, 1, 0])
np.testing.assert_allclose(y_pred_1, y_pred_2)


@pytest.mark.parametrize("strategy", [*STRATEGIES])
def test_fit_parameters_passing(strategy: str) -> None:
"""
Test passing fit parameters, here early stopping at iteration 3.
Checks that underlying GradientBoosting estimators have used 3 iterations
only during boosting, instead of default value for n_estimators (=100).
"""
mapie = MapieQuantileRegressor(estimator=gb, **STRATEGIES[strategy])

def early_stopping_monitor(i, est, locals):
"""Returns True on the 3rd iteration."""
if i == 2:
return True
else:
return False

mapie.fit(
X_train,
y_train,
X_calib=X_calib,
y_calib=y_calib,
sample_weight=None,
monitor=early_stopping_monitor
)

for estimator in mapie.estimators_:
assert estimator.estimators_.shape[0] == 3
Loading
Loading