Skip to content

Commit cd048c6

Browse files
committed
Extended documentaiton
1 parent 851ad1d commit cd048c6

File tree

8 files changed

+198
-484
lines changed

8 files changed

+198
-484
lines changed

examples/xmpl_quickstart.py

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""
2+
=============================
3+
Quickstart
4+
=============================
5+
6+
This example shows a simple comparison of the expected calibration error of a
7+
non-calibrated method against a calibrated method.
8+
"""
9+
# Author: Miquel Perello Nieto <[email protected]>
10+
# License: new BSD
11+
12+
print(__doc__)
13+
14+
##############################################################################
15+
# First choose a classifier
16+
17+
from sklearn.naive_bayes import GaussianNB
18+
19+
clf = GaussianNB()
20+
21+
##############################################################################
22+
# And a dataset
23+
24+
from sklearn.datasets import make_classification
25+
from sklearn.model_selection import train_test_split
26+
27+
X, y = make_classification(
28+
n_samples=100000, n_features=20, n_informative=4, n_redundant=4,
29+
random_state=42
30+
)
31+
32+
from sklearn.model_selection import train_test_split
33+
34+
X_train, X_test, Y_train, Y_test = train_test_split(X, y)
35+
36+
##############################################################################
37+
# We can see how calibrated it is after training
38+
39+
clf.fit(X_train, Y_train)
40+
41+
n_correct = sum(clf.predict(X_test) == Y_test)
42+
n_test = Y_test.shape[0]
43+
44+
print(f"The classifier gets {n_correct} correct "
45+
f"predictions out of {n_test}")
46+
47+
##############################################################################
48+
# We can asses the confidence expected calibration error
49+
50+
from pycalib.metrics import conf_ECE
51+
52+
scores = clf.predict_proba(X_test)
53+
cece = conf_ECE(Y_test, scores, bins=15)
54+
55+
print(f"The classifier gets a confidence expected "
56+
f"calibration error of {cece:0.2f}")
57+
58+
##############################################################################
59+
# Let's look at its reliability diagram
60+
61+
from pycalib.visualisations import plot_reliability_diagram
62+
63+
plot_reliability_diagram(labels=Y_test, scores=scores, show_histogram=True,
64+
show_bars=True, show_gaps=True)
65+
66+
##############################################################################
67+
# We can see how a calibration can improve the conf-ECE
68+
69+
from pycalib.models import IsotonicCalibration
70+
cal = IsotonicCalibration()
71+
72+
##############################################################################
73+
# Now we can put together a probabilistic classifier with the chosen calibration
74+
# method
75+
76+
from pycalib.models import CalibratedModel
77+
78+
cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal,
79+
fit_estimator=False)
80+
81+
##############################################################################
82+
# Now you can train both classifier and calibrator all together.
83+
84+
cal_clf.fit(X_train, Y_train)
85+
n_correct = sum(cal_clf.predict(X_test) == Y_test)
86+
87+
print(f"The calibrated classifier gets {n_correct} "
88+
f"correct predictions out of {n_test}")
89+
90+
scores_cal = cal_clf.predict_proba(X_test)
91+
cece = conf_ECE(Y_test, scores_cal, bins=15)
92+
93+
print(f"The calibrated classifier gets a confidence "
94+
f"expected calibration error of {cece:0.2f}")
95+
96+
##############################################################################
97+
# Now you can train both classifier and calibrator all together.
98+
99+
from pycalib.visualisations import plot_reliability_diagram
100+
101+
plot_reliability_diagram(labels=Y_test, scores=scores_cal, show_histogram=True,
102+
show_bars=True, show_gaps=True)

pycalib/metrics.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,7 @@ def full_ECE(y_true, probs, bins=15, power=1):
723723

724724
return s
725725

726+
726727
# TODO: Speed up computation.
727728
def _label_resampling(probs):
728729
c = probs.cumsum(axis=1)
@@ -732,11 +733,13 @@ def _label_resampling(probs):
732733
y[range(len(probs)), choices] = 1
733734
return y
734735

736+
735737
# Speed up of the previous label_resampling function
736738
def get_one_hot(targets, nb_classes):
737739
res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
738740
return res.reshape(list(targets.shape)+[nb_classes])
739741

742+
740743
def _label_resampling_v2(probs):
741744
c = probs.cumsum(axis=1)
742745
u = np.random.rand(len(c), 1)
@@ -745,7 +748,6 @@ def _label_resampling_v2(probs):
745748
return y
746749

747750

748-
749751
# TODO: Speed up computation.
750752
def _score_sampling(probs, samples=10000, ece_function=None):
751753

@@ -760,7 +762,8 @@ def _score_sampling(probs, samples=10000, ece_function=None):
760762

761763

762764
# This uses all available CPUS reducing the time by this factor
763-
def _score_sampling_v2(probs, samples=10000, ece_function=None, processes=None):
765+
def _score_sampling_v2(probs, samples=10000, ece_function=None,
766+
processes=None):
764767

765768
probs = np.array(probs)
766769

pycalib/models/__init__.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,4 @@
33
LogisticCalibration,
44
SigmoidCalibration,
55
BinningCalibration,
6-
CalibratedModel,
7-
CalibratedClassifierCV)
6+
CalibratedModel)

0 commit comments

Comments
 (0)