|
| 1 | +""" |
| 2 | +============================= |
| 3 | +Quickstart |
| 4 | +============================= |
| 5 | +
|
| 6 | +This example shows a simple comparison of the expected calibration error of a |
| 7 | +non-calibrated method against a calibrated method. |
| 8 | +""" |
| 9 | +# Author: Miquel Perello Nieto <[email protected]> |
| 10 | +# License: new BSD |
| 11 | + |
| 12 | +print(__doc__) |
| 13 | + |
| 14 | +############################################################################## |
| 15 | +# First choose a classifier |
| 16 | + |
| 17 | +from sklearn.naive_bayes import GaussianNB |
| 18 | + |
| 19 | +clf = GaussianNB() |
| 20 | + |
| 21 | +############################################################################## |
| 22 | +# And a dataset |
| 23 | + |
| 24 | +from sklearn.datasets import make_classification |
| 25 | +from sklearn.model_selection import train_test_split |
| 26 | + |
| 27 | +X, y = make_classification( |
| 28 | + n_samples=100000, n_features=20, n_informative=4, n_redundant=4, |
| 29 | + random_state=42 |
| 30 | +) |
| 31 | + |
| 32 | +from sklearn.model_selection import train_test_split |
| 33 | + |
| 34 | +X_train, X_test, Y_train, Y_test = train_test_split(X, y) |
| 35 | + |
| 36 | +############################################################################## |
| 37 | +# We can see how calibrated it is after training |
| 38 | + |
| 39 | +clf.fit(X_train, Y_train) |
| 40 | + |
| 41 | +n_correct = sum(clf.predict(X_test) == Y_test) |
| 42 | +n_test = Y_test.shape[0] |
| 43 | + |
| 44 | +print(f"The classifier gets {n_correct} correct " |
| 45 | + f"predictions out of {n_test}") |
| 46 | + |
| 47 | +############################################################################## |
| 48 | +# We can asses the confidence expected calibration error |
| 49 | + |
| 50 | +from pycalib.metrics import conf_ECE |
| 51 | + |
| 52 | +scores = clf.predict_proba(X_test) |
| 53 | +cece = conf_ECE(Y_test, scores, bins=15) |
| 54 | + |
| 55 | +print(f"The classifier gets a confidence expected " |
| 56 | + f"calibration error of {cece:0.2f}") |
| 57 | + |
| 58 | +############################################################################## |
| 59 | +# Let's look at its reliability diagram |
| 60 | + |
| 61 | +from pycalib.visualisations import plot_reliability_diagram |
| 62 | + |
| 63 | +plot_reliability_diagram(labels=Y_test, scores=scores, show_histogram=True, |
| 64 | + show_bars=True, show_gaps=True) |
| 65 | + |
| 66 | +############################################################################## |
| 67 | +# We can see how a calibration can improve the conf-ECE |
| 68 | + |
| 69 | +from pycalib.models import IsotonicCalibration |
| 70 | +cal = IsotonicCalibration() |
| 71 | + |
| 72 | +############################################################################## |
| 73 | +# Now we can put together a probabilistic classifier with the chosen calibration |
| 74 | +# method |
| 75 | + |
| 76 | +from pycalib.models import CalibratedModel |
| 77 | + |
| 78 | +cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal, |
| 79 | + fit_estimator=False) |
| 80 | + |
| 81 | +############################################################################## |
| 82 | +# Now you can train both classifier and calibrator all together. |
| 83 | + |
| 84 | +cal_clf.fit(X_train, Y_train) |
| 85 | +n_correct = sum(cal_clf.predict(X_test) == Y_test) |
| 86 | + |
| 87 | +print(f"The calibrated classifier gets {n_correct} " |
| 88 | + f"correct predictions out of {n_test}") |
| 89 | + |
| 90 | +scores_cal = cal_clf.predict_proba(X_test) |
| 91 | +cece = conf_ECE(Y_test, scores_cal, bins=15) |
| 92 | + |
| 93 | +print(f"The calibrated classifier gets a confidence " |
| 94 | + f"expected calibration error of {cece:0.2f}") |
| 95 | + |
| 96 | +############################################################################## |
| 97 | +# Now you can train both classifier and calibrator all together. |
| 98 | + |
| 99 | +from pycalib.visualisations import plot_reliability_diagram |
| 100 | + |
| 101 | +plot_reliability_diagram(labels=Y_test, scores=scores_cal, show_histogram=True, |
| 102 | + show_bars=True, show_gaps=True) |
0 commit comments