scikit-learn-contrib · LacombeLouis · Jan 12, 2024 · Dec 28, 2023 · Dec 28, 2023 · Dec 28, 2023
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -36,5 +36,5 @@ Contributors
 * Arthur Phan <[email protected]>
 * Rafael Saraiva <[email protected]>
 * Mehdi Elion <[email protected]>
-
+* Sami Kaddani <[email protected]>
 To be continued ...
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -4,6 +4,7 @@ History
 
 ##### (##########)
 ------------------
+* Add possibility of passing fit parameters used by estimators.
 
 0.8.0 (2024-01-03)
 ------------------

diff --git a/mapie/calibration.py b/mapie/calibration.py
@@ -431,6 +431,7 @@ def fit(
         random_state: Optional[Union[int, np.random.RandomState, None]] = None,
         shuffle: Optional[bool] = True,
         stratify: Optional[ArrayLike] = None,
+        **fit_params,
     ) -> MapieCalibrator:
         """
         Calibrate the estimator on given datasets, according to the chosen
@@ -466,6 +467,8 @@ def fit(
             See ``sklearn.model_selection.train_test_split`` documentation.
             If not ``None``, data is split in a stratified fashion, using this
             as the class label.
+        **fit_params : dict
+            Additional fit parameters.
 
         Returns
         -------
@@ -509,7 +512,7 @@ def fit(
                 y_train
             )
             estimator = fit_estimator(
-                clone(estimator), X_train, y_train, sw_train,
+                clone(estimator), X_train, y_train, sw_train, **fit_params,
             )
             self.single_estimator_ = estimator
             self.classes_ = self.single_estimator_.classes_

diff --git a/mapie/classification.py b/mapie/classification.py
@@ -586,6 +586,7 @@ def _fit_and_predict_oof_model(
         val_index: ArrayLike,
         k: int,
         sample_weight: Optional[ArrayLike] = None,
+        **fit_params,
     ) -> Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike]:
         """
         Fit a single out-of-fold model on a given training set and
@@ -615,6 +616,9 @@ def _fit_and_predict_oof_model(
             Sample weights. If None, then samples are equally weighted.
             By default None.
 
+        **fit_params : dict
+            Additional fit parameters.
+
         Returns
         -------
         Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike]
@@ -633,11 +637,13 @@ def _fit_and_predict_oof_model(
         y_val = _safe_indexing(y, val_index)
 
         if sample_weight is None:
-            estimator = fit_estimator(estimator, X_train, y_train)
+            estimator = fit_estimator(
+                estimator, X_train, y_train, **fit_params
+            )
         else:
             sample_weight_train = _safe_indexing(sample_weight, train_index)
             estimator = fit_estimator(
-                estimator, X_train, y_train, sample_weight_train
+                estimator, X_train, y_train, sample_weight_train, **fit_params
             )
         if _num_samples(X_val) > 0:
             y_pred_proba = self._predict_oof_model(estimator, X_val)
@@ -1047,6 +1053,7 @@ def fit(
         y: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
         size_raps: Optional[float] = .2,
+        **fit_params,
     ) -> MapieClassifier:
         """
         Fit the base estimator or use the fitted base estimator.
@@ -1074,6 +1081,9 @@ def fit(
 
             By default ``.2``.
 
+        **fit_params : dict
+            Additional fit parameters.
+
 
         Returns
         -------
@@ -1147,7 +1157,7 @@ def fit(
         else:
             cv = cast(BaseCrossValidator, cv)
             self.single_estimator_ = fit_estimator(
-                clone(estimator), X, y, sample_weight
+                clone(estimator), X, y, sample_weight, **fit_params
             )
             y_pred_proba = np.empty(
                 (n_samples, self.n_classes_),
@@ -1162,6 +1172,7 @@ def fit(
                     val_index,
                     k,
                     sample_weight,
+                    **fit_params,
                 )
                 for k, (train_index, val_index) in enumerate(cv.split(X))
             )

diff --git a/mapie/estimator/estimator.py b/mapie/estimator/estimator.py
@@ -182,6 +182,7 @@ def _fit_oof_estimator(
         y: ArrayLike,
         train_index: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
+        **fit_params,
     ) -> RegressorMixin:
         """
         Fit a single out-of-fold model on a given training set.
@@ -204,6 +205,9 @@ def _fit_oof_estimator(
             Sample weights. If None, then samples are equally weighted.
             By default ``None``.
 
+        **fit_params : dict
+            Additional fit parameters.
+
         Returns
         -------
         RegressorMixin
@@ -216,7 +220,11 @@ def _fit_oof_estimator(
             sample_weight = cast(NDArray, sample_weight)
 
         estimator = fit_estimator(
-            estimator, X_train, y_train, sample_weight=sample_weight
+            estimator,
+            X_train,
+            y_train,
+            sample_weight=sample_weight,
+            **fit_params
         )
         return estimator
 
@@ -377,6 +385,7 @@ def fit(
         X: ArrayLike,
         y: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
+        **fit_params,
     ) -> EnsembleRegressor:
         """
         Fit the base estimator under the ``single_estimator_`` attribute.
@@ -397,6 +406,9 @@ def fit(
             Sample weights. If None, then samples are equally weighted.
             By default ``None``.
 
+        **fit_params : dict
+            Additional fit parameters.
+
         Returns
         -------
         EnsembleRegressor
@@ -419,7 +431,12 @@ def fit(
             )
         else:
             single_estimator_ = self._fit_oof_estimator(
-                clone(estimator), X, y, full_indexes, sample_weight
+                clone(estimator),
+                X,
+                y,
+                full_indexes,
+                sample_weight,
+                **fit_params
             )
             cv = cast(BaseCrossValidator, cv)
             self.k_ = np.full(
@@ -432,7 +449,12 @@ def fit(
             else:
                 estimators_ = Parallel(self.n_jobs, verbose=self.verbose)(
                     delayed(self._fit_oof_estimator)(
-                        clone(estimator), X, y, train_index, sample_weight
+                        clone(estimator),
+                        X,
+                        y,
+                        train_index,
+                        sample_weight,
+                        **fit_params
                     )
                     for train_index, _ in cv.split(X)
                 )

diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py
@@ -21,6 +21,7 @@ def fit(
         X: ArrayLike,
         y: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
+        **fit_params
     ) -> EnsembleEstimator:
         """
         Fit the base estimator under the ``single_estimator_`` attribute.
@@ -41,6 +42,9 @@ def fit(
             Sample weights. If None, then samples are equally weighted.
             By default ``None``.
 
+        **fit_params : dict
+            Additional fit parameters.
+
         Returns
         -------
         EnsembleRegressor

diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py
@@ -469,6 +469,7 @@ def fit(
         random_state: Optional[Union[int, np.random.RandomState]] = None,
         shuffle: Optional[bool] = True,
         stratify: Optional[ArrayLike] = None,
+        **fit_params,
     ) -> MapieQuantileRegressor:
         """
         Fit estimator and compute residuals used for prediction intervals.
@@ -533,6 +534,9 @@ def fit(
 
             By default ``None``.
 
+        **fit_params : dict
+            Additional fit parameters.
+
         Returns
         -------
         MapieQuantileRegressor
@@ -609,8 +613,13 @@ def fit(
                 else:
                     cloned_estimator_.set_params(**params)
                 self.estimators_.append(fit_estimator(
-                    cloned_estimator_, X_train, y_train, sample_weight_train
-                ))
+                    cloned_estimator_,
+                    X_train,
+                    y_train,
+                    sample_weight_train,
+                    **fit_params,
+                    )
+                )
                 y_calib_preds[i] = self.estimators_[-1].predict(X_calib)
             self.single_estimator_ = self.estimators_[2]
 

diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py
@@ -457,6 +457,7 @@ def fit(
         X: ArrayLike,
         y: ArrayLike,
         sample_weight: Optional[ArrayLike] = None,
+        **fit_params,
     ) -> MapieRegressor:
         """
         Fit estimator and compute conformity scores used for
@@ -484,6 +485,9 @@ def fit(
 
             By default ``None``.
 
+        **fit_params : dict
+            Additional fit parameters.
+
         Returns
         -------
         MapieRegressor
@@ -509,7 +513,9 @@ def fit(
             self.verbose
         )
         # Fit the prediction function
-        self.estimator_ = self.estimator_.fit(X, y, sample_weight)
+        self.estimator_ = self.estimator_.fit(
+            X, y, sample_weight, **fit_params
+        )
 
         # Predict on calibration data
         y_pred = self.estimator_.predict_calib(X)

diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py
@@ -8,7 +8,7 @@
 from sklearn.calibration import _SigmoidCalibration
 from sklearn.compose import ColumnTransformer
 from sklearn.datasets import make_classification
-from sklearn.ensemble import RandomForestClassifier
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
 from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.model_selection import train_test_split
@@ -461,3 +461,25 @@ def test_pipeline_compatibility() -> None:
     mapie = MapieCalibrator(estimator=pipe)
     mapie.fit(X, y)
     mapie.predict(X)
+
+
+def test_fit_parameters_passing() -> None:
+    """
+    Test passing fit parameters, here early stopping at iteration 3.
+    Checks that underlying GradientBoosting estimators have used 3 iterations
+    only during boosting, instead of default value for n_estimators (=100).
+    """
+    gb = GradientBoostingClassifier(random_state=random_state)
+
+    mapie = MapieCalibrator(estimator=gb)
+
+    def early_stopping_monitor(i, est, locals):
+        """Returns True on the 3rd iteration."""
+        if i == 2:
+            return True
+        else:
+            return False
+
+    mapie.fit(X, y, monitor=early_stopping_monitor)
+
+    assert mapie.single_estimator_.estimators_.shape[0] == 3
diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py
@@ -10,6 +10,7 @@
 from sklearn.compose import ColumnTransformer
 from sklearn.datasets import make_classification
 from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import KFold, LeaveOneOut, ShuffleSplit
@@ -1957,3 +1958,30 @@ def test_deprecated_method_warning(method: str) -> None:
         DeprecationWarning, match=r".*WARNING: Deprecated method.*"
     ):
         mapie_clf.fit(X_toy, y_toy)
+
+
+def test_fit_parameters_passing() -> None:
+    """
+    Test passing fit parameters, here early stopping at iteration 3.
+    Checks that underlying GradientBoosting estimators have used 3 iterations
+    only during boosting, instead of default value for n_estimators (=100).
+    """
+    gb = GradientBoostingClassifier(random_state=random_state)
+
+    mapie = MapieClassifier(
+        estimator=gb, method="aps", random_state=random_state
+    )
+
+    def early_stopping_monitor(i, est, locals):
+        """Returns True on the 3rd iteration."""
+        if i == 2:
+            return True
+        else:
+            return False
+
+    mapie.fit(X, y, monitor=early_stopping_monitor)
+
+    assert mapie.single_estimator_.estimators_.shape[0] == 3
+
+    for estimator in mapie.estimators_:
+        assert estimator.estimators_.shape[0] == 3
diff --git a/mapie/tests/test_quantile_regression.py b/mapie/tests/test_quantile_regression.py
@@ -762,3 +762,32 @@ def test_consistent_class() -> None:
     np.testing.assert_allclose(y_pis_1[:, 0, 0], y_pis_2[:, 0, 0])
     np.testing.assert_allclose(y_pis_1[:, 1, 0], y_pis_2[:, 1, 0])
     np.testing.assert_allclose(y_pred_1, y_pred_2)
+
+
+@pytest.mark.parametrize("strategy", [*STRATEGIES])
+def test_fit_parameters_passing(strategy: str) -> None:
+    """
+    Test passing fit parameters, here early stopping at iteration 3.
+    Checks that underlying GradientBoosting estimators have used 3 iterations
+    only during boosting, instead of default value for n_estimators (=100).
+    """
+    mapie = MapieQuantileRegressor(estimator=gb, **STRATEGIES[strategy])
+
+    def early_stopping_monitor(i, est, locals):
+        """Returns True on the 3rd iteration."""
+        if i == 2:
+            return True
+        else:
+            return False
+
+    mapie.fit(
+        X_train,
+        y_train,
+        X_calib=X_calib,
+        y_calib=y_calib,
+        sample_weight=None,
+        monitor=early_stopping_monitor
+    )
+
+    for estimator in mapie.estimators_:
+        assert estimator.estimators_.shape[0] == 3
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ History @@
     ##### (##########)
     ------------------
+    * Add possibility of passing fit parameters used by estimators.
 .8.0 (2024-01-03)
     ------------------
@@ Expand Down @@