Skip to content

Commit

Permalink
DOC: update plot_toy_model.py to v1 API. Update and improve correspon…
Browse files Browse the repository at this point in the history
…ding code in quick_start.rst
  • Loading branch information
Valentin-Laurent committed Jan 28, 2025
1 parent 99c543f commit 196ff04
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 37 deletions.
Binary file modified doc/images/quickstart_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 14 additions & 9 deletions doc/quick_start.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,24 @@ To install directly from the github repository :
Let us start with a basic regression problem.
Here, we generate one-dimensional noisy data that we fit with a linear model.

..
Comment to developers: the following piece of code is duplicated in `examples/regression/1-quickstart/plot_toy_model.py`.
When updating it, please replicate the changes to this other file.
.. testcode::

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

regressor = LinearRegression()
X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(X, y, test_size=0.5)
X_train, X_conformalize, y_train, y_conformalize = train_test_split(X_train_conformalize, y_train_conformalize,
test_size=0.5)
RANDOM_STATE = 42
X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE)
X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
X, y, test_size=0.5, random_state=RANDOM_STATE
)
X_train, X_conformalize, y_train, y_conformalize = train_test_split(
X_train_conformalize, y_train_conformalize, test_size=0.5, random_state=RANDOM_STATE
)

# We follow a sequential ``fit``, ``conformalize``, and ``predict`` process.
# We set the confidence level to estimate prediction intervals at approximately one and two
Expand All @@ -56,7 +62,6 @@ Here, we generate one-dimensional noisy data that we fit with a linear model.
from mapie_v1.regression import SplitConformalRegressor

mapie_regressor = SplitConformalRegressor(
regressor,
confidence_level=[0.95, 0.68],
prefit=False,
)
Expand Down Expand Up @@ -95,9 +100,9 @@ Here, we generate one-dimensional noisy data that we fit with a linear model.
alpha=0.2
)
plt.title(
f"Target and effective coverages for "
f"Effective coverage for "
f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n"
f"Target and effective coverages for "
f"Effective coverage for "
f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}"
)
plt.show()
Expand Down
60 changes: 32 additions & 28 deletions examples/regression/1-quickstart/plot_toy_model.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,58 @@
"""
=====================================================================================
[Pre-v1] Plotting MAPIE prediction intervals with a toy dataset
Plotting MAPIE prediction intervals with a toy dataset
=====================================================================================
**Note: we recently released MAPIE v1.0.0, which introduces breaking API changes.**
**This notebook hasn't been updated to the new API yet.**
An example plot of :class:`~mapie.regression.MapieRegressor` used
An example plot of :class:`~mapie_v1.regression.SplitConformalRegressor` used
in the Quickstart.
"""
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from mapie.metrics import regression_coverage_score
from mapie.regression import MapieRegressor
from mapie_v1.regression import SplitConformalRegressor

RANDOM_STATE = 42
regressor = LinearRegression()
X, y = make_regression(
n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE
X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE)
X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
X, y, test_size=0.5, random_state=RANDOM_STATE
)
X_train, X_conformalize, y_train, y_conformalize = train_test_split(
X_train_conformalize, y_train_conformalize, test_size=0.5, random_state=RANDOM_STATE
)

alpha = [0.05, 0.32]
mapie = MapieRegressor(regressor, method="plus", random_state=RANDOM_STATE)
mapie.fit(X, y)
y_pred, y_pis = mapie.predict(X, alpha=alpha)
confidence_level = [0.95, 0.68]
mapie_regressor = SplitConformalRegressor(
confidence_level=confidence_level, prefit=False
)
mapie_regressor.fit(X_train, y_train)
mapie_regressor.conformalize(X_conformalize, y_conformalize)
y_pred, y_pred_interval = mapie_regressor.predict_interval(X_test)

coverage_scores = [
regression_coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
for i, _ in enumerate(alpha)
regression_coverage_score(
y_test, y_pred_interval[:, 0, i], y_pred_interval[:, 1, i]
) for i, _ in enumerate(confidence_level)
]

plt.xlabel("x")
plt.ylabel("y")
plt.scatter(X, y, alpha=0.3)
plt.plot(X, y_pred, color="C1")
order = np.argsort(X[:, 0])
plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
plt.scatter(X_test, y_test, alpha=0.3)
plt.plot(X_test, y_pred, color="C1")
order = np.argsort(X_test[:, 0])
plt.plot(X_test[order], y_pred_interval[order][:, 0, 1], color="C1", ls="--")
plt.plot(X_test[order], y_pred_interval[order][:, 1, 1], color="C1", ls="--")
plt.fill_between(
X[order].ravel(),
y_pis[order][:, 0, 0].ravel(),
y_pis[order][:, 1, 0].ravel(),
X_test[order].ravel(),
y_pred_interval[order][:, 0, 0].ravel(),
y_pred_interval[order][:, 1, 0].ravel(),
alpha=0.2,
)
plt.title(
f"Target and effective coverages for "
f"alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n"
f"Target and effective coverages for "
f"alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
f"Effective coverage for "
f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n"
f"Effective coverage for "
f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}"
)
plt.show()

0 comments on commit 196ff04

Please sign in to comment.