DOC: update plot_toy_model.py to v1 API. Update and improve correspon…

…ding code in quick_start.rst
scikit-learn-contrib · Jan 28, 2025 · 196ff04 · 196ff04
1 parent 99c543f
commit 196ff04
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 37 deletions.
diff --git a/doc/images/quickstart_1.png b/doc/images/quickstart_1.png
diff --git a/doc/quick_start.rst b/doc/quick_start.rst
@@ -36,18 +36,24 @@ To install directly from the github repository :
 Let us start with a basic regression problem.
 Here, we generate one-dimensional noisy data that we fit with a linear model.
 
+..
+    Comment to developers: the following piece of code is duplicated in `examples/regression/1-quickstart/plot_toy_model.py`.
+    When updating it, please replicate the changes to this other file.
+
 .. testcode::
 
     import numpy as np
-    from sklearn.linear_model import LinearRegression
     from sklearn.datasets import make_regression
     from sklearn.model_selection import train_test_split
 
-    regressor = LinearRegression()
-    X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
-    X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(X, y, test_size=0.5)
-    X_train, X_conformalize, y_train, y_conformalize = train_test_split(X_train_conformalize, y_train_conformalize,
-                                                        test_size=0.5)
+    RANDOM_STATE = 42
+    X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE)
+    X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
+        X, y, test_size=0.5, random_state=RANDOM_STATE
+    )
+    X_train, X_conformalize, y_train, y_conformalize = train_test_split(
+        X_train_conformalize, y_train_conformalize, test_size=0.5, random_state=RANDOM_STATE
+    )
 
     #  We follow a sequential ``fit``, ``conformalize``, and ``predict`` process.
     #  We set the confidence level to estimate prediction intervals at approximately one and two
@@ -56,7 +62,6 @@ Here, we generate one-dimensional noisy data that we fit with a linear model.
     from mapie_v1.regression import SplitConformalRegressor
 
     mapie_regressor = SplitConformalRegressor(
-        regressor,
         confidence_level=[0.95, 0.68],
         prefit=False,
     )
@@ -95,9 +100,9 @@ Here, we generate one-dimensional noisy data that we fit with a linear model.
         alpha=0.2
     )
     plt.title(
-        f"Target and effective coverages for "
+        f"Effective coverage for "
         f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n"
-        f"Target and effective coverages for "
+        f"Effective coverage for "
         f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}"
     )
     plt.show()

diff --git a/examples/regression/1-quickstart/plot_toy_model.py b/examples/regression/1-quickstart/plot_toy_model.py
@@ -1,54 +1,58 @@
 """
 =====================================================================================
-[Pre-v1] Plotting MAPIE prediction intervals with a toy dataset
+Plotting MAPIE prediction intervals with a toy dataset
 =====================================================================================
-**Note: we recently released MAPIE v1.0.0, which introduces breaking API changes.**
-**This notebook hasn't been updated to the new API yet.**
-
-An example plot of :class:`~mapie.regression.MapieRegressor` used
+An example plot of :class:`~mapie_v1.regression.SplitConformalRegressor` used
 in the Quickstart.
 """
 import numpy as np
 from matplotlib import pyplot as plt
 from sklearn.datasets import make_regression
-from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
 
 from mapie.metrics import regression_coverage_score
-from mapie.regression import MapieRegressor
+from mapie_v1.regression import SplitConformalRegressor
 
 RANDOM_STATE = 42
-regressor = LinearRegression()
-X, y = make_regression(
-    n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE
+X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE)
+X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
+    X, y, test_size=0.5, random_state=RANDOM_STATE
+)
+X_train, X_conformalize, y_train, y_conformalize = train_test_split(
+    X_train_conformalize, y_train_conformalize, test_size=0.5, random_state=RANDOM_STATE
 )
 
-alpha = [0.05, 0.32]
-mapie = MapieRegressor(regressor, method="plus", random_state=RANDOM_STATE)
-mapie.fit(X, y)
-y_pred, y_pis = mapie.predict(X, alpha=alpha)
+confidence_level = [0.95, 0.68]
+mapie_regressor = SplitConformalRegressor(
+    confidence_level=confidence_level, prefit=False
+)
+mapie_regressor.fit(X_train, y_train)
+mapie_regressor.conformalize(X_conformalize, y_conformalize)
+y_pred, y_pred_interval = mapie_regressor.predict_interval(X_test)
 
 coverage_scores = [
-    regression_coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
-    for i, _ in enumerate(alpha)
+    regression_coverage_score(
+        y_test, y_pred_interval[:, 0, i], y_pred_interval[:, 1, i]
+    ) for i, _ in enumerate(confidence_level)
 ]
 
 plt.xlabel("x")
 plt.ylabel("y")
-plt.scatter(X, y, alpha=0.3)
-plt.plot(X, y_pred, color="C1")
-order = np.argsort(X[:, 0])
-plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
-plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
+plt.scatter(X_test, y_test, alpha=0.3)
+plt.plot(X_test, y_pred, color="C1")
+order = np.argsort(X_test[:, 0])
+plt.plot(X_test[order], y_pred_interval[order][:, 0, 1], color="C1", ls="--")
+plt.plot(X_test[order], y_pred_interval[order][:, 1, 1], color="C1", ls="--")
 plt.fill_between(
-    X[order].ravel(),
-    y_pis[order][:, 0, 0].ravel(),
-    y_pis[order][:, 1, 0].ravel(),
+    X_test[order].ravel(),
+    y_pred_interval[order][:, 0, 0].ravel(),
+    y_pred_interval[order][:, 1, 0].ravel(),
     alpha=0.2,
 )
 plt.title(
-    f"Target and effective coverages for "
-    f"alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n"
-    f"Target and effective coverages for "
-    f"alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
+    f"Effective coverage for "
+    f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n"
+    f"Effective coverage for "
+    f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}"
 )
 plt.show()