From 1987d0d405c40665e888e8e45eb2a23ee10dc7dc Mon Sep 17 00:00:00 2001 From: OmG Date: Fri, 17 Oct 2025 16:06:08 +0200 Subject: [PATCH 01/23] feat: Implement VannAbers calibration method for both binary and multi-class classification (based on ip200/venn-abers github repository implementation) --- AUTHORS.rst | 1 + HISTORY.rst | 1 + mapie/_venn_abers.py | 935 ++++++ mapie/calibration.py | 570 +++- mapie/tests/test_venn_abers_calibration.py | 2968 ++++++++++++++++++++ mapie/utils.py | 1 - 6 files changed, 4474 insertions(+), 2 deletions(-) create mode 100644 mapie/_venn_abers.py create mode 100644 mapie/tests/test_venn_abers_calibration.py diff --git a/AUTHORS.rst b/AUTHORS.rst index 5ba2de610..5e8aecec7 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -52,4 +52,5 @@ Contributors * Faustin Pulvéric * Chaoqi Zhang * Leena Kamran Qidwai +* Omid Gheibi To be continued ... diff --git a/HISTORY.rst b/HISTORY.rst index 1d7033025..ae9df74bb 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -4,6 +4,7 @@ History 1.x.x (2025-xx-xx) ------------------ +* Introduce VennAbers calibrator both for binary and multiclass classification 1.1.0 (2025-09-22) ------------------ diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py new file mode 100644 index 000000000..df78196b6 --- /dev/null +++ b/mapie/_venn_abers.py @@ -0,0 +1,935 @@ +import numpy as np +import sklearn +from sklearn.model_selection import StratifiedKFold, train_test_split +from sklearn.multiclass import OneVsOneClassifier +from sklearn.utils.validation import check_is_fitted +from sklearn.exceptions import NotFittedError + +sklearn.set_config(enable_metadata_routing=True) +np.seterr(divide='ignore', invalid='ignore') + +""" +Private module containing core Venn-ABERS implementation classes. + +This module contains the internal implementation details for Venn-ABERS +calibration. Users should use VennAbersCalibrator from mapie.calibration instead. +""" + + +def _geo_mean(a): + """Geometric mean calculation for Venn-ABERS.""" + return a.prod(axis=1)**(1.0/a.shape[1]) + + +def calc_p0p1(p_cal, y_cal, precision=None): + """ + Function that calculates isotonic calibration vectors + required for Venn-ABERS calibration + + This function relies on the geometric representation of isotonic + regression as the slope of the GCM (greatest convex minorant) of the CSD + (cumulative sum diagram) as decribed in [1] pages 9–13 (especially Theorem 1.1). + In particular, the function implements + algorithms 1-4 as described in Chapter 2 in [2] + + + References + ---------- + [1] Richard E. Barlow, D. J. Bartholomew, J. M. Bremner, and H. Daniel + Brunk. Statistical Inference under Order Restrictions: The Theory and + Application of Isotonic Regression. Wiley, London, 1972. + + [2] Vovk, Vladimir, Ivan Petej, and Valentina Fedorova. + "Large-scale probabilistic predictors with and without guarantees of validity." + Advances in Neural Information Processing Systems 28 (2015). + (arxiv version https://arxiv.org/pdf/1511.00213.pdf) + + + Parameters + ---------- + p_cal : {array-like}, shape (n_samples,) + Input data for calibration consisting of calibration set probabilities + + y_cal : {array-like}, shape (n_samples,) + Associated binary class labels. + + precision: int, default = None + Optional number of decimal points to which + Venn-Abers calibration probabilities p_cal are rounded to. + Yields significantly faster computation time for larger calibration datasets. + If None no rounding is applied. + + + Returns + ---------- + p_0 : {array-like}, shape (n_samples, ) + Precomputed vector storing values of the isotonic regression + fitted to a sequence that contains binary class label 0 + + p_1 : {array-like}, shape (n_samples, ) + Precomputed vector storing values of the isotonic regression + fitted to a sequence + that contains binary class label 1 + + c : {array-like}, shape (n_samples, ) + Ordered set of unique calibration probabilities + """ + if precision is not None: + cal = np.hstack((np.round(p_cal[:, 1], precision). + reshape(-1, 1), y_cal.reshape(-1, 1))) + else: + cal = np.hstack((p_cal[:, 1].reshape(-1, 1), y_cal.reshape(-1, 1))) + ix = np.argsort(cal[:, 0]) + k_sort = cal[ix, 0] + k_label_sort = cal[ix, 1] + + c = np.unique(k_sort) + ia = np.searchsorted(k_sort, c) + + w = np.zeros(len(c)) + + w[:-1] = np.diff(ia) + w[-1] = len(k_sort) - ia[-1] + + k_dash = len(c) + P = np.zeros((k_dash + 2, 2)) + + P[0, :] = -1 + + P[2:, 0] = np.cumsum(w) + P[2:-1, 1] = np.cumsum(k_label_sort)[(ia - 1)[1:]] + P[-1, 1] = np.cumsum(k_label_sort)[-1] + + p1 = np.zeros((len(c) + 1, 2)) + p1[1:, 0] = c + + P1 = P[1:] + 1 + + for i in range(len(p1)): + + P1[i, :] = P1[i, :] - 1 + + if i == 0: + grads = np.divide(P1[:, 1], P1[:, 0]) + grad = np.nanmin(grads) + p1[i, 1] = grad + c_point = 0 + else: + imp_point = P1[c_point, 1] + (P1[i, 0] - P1[c_point, 0]) * grad + + if P1[i, 1] < imp_point: + grads = np.divide((P1[i:, 1] - P1[i, 1]), (P1[i:, 0] - P1[i, 0])) + if np.sum(np.isnan(np.nanmin(grads))) == 0: + grad = np.nanmin(grads) + c_point = i + p1[i, 1] = grad + else: + p1[i, 1] = grad + + p0 = np.zeros((len(c) + 1, 2)) + p0[1:, 0] = c + + P0 = P[1:] + + for i in range(len(p1) - 1, -1, -1): + P0[i, 0] = P0[i, 0] + 1 + + if i == len(p1) - 1: + grads = np.divide((P0[:, 1] - P0[i, 1]), (P0[:, 0] - P0[i, 0])) + grad = np.nanmax(grads) + p0[i, 1] = grad + c_point = i + else: + imp_point = P0[c_point, 1] + (P0[i, 0] - P0[c_point, 0]) * grad + + if P0[i, 1] < imp_point: + grads = np.divide((P0[:, 1] - P0[i, 1]), (P0[:, 0] - P0[i, 0])) + grads[i:] = 0 + grad = np.nanmax(grads) + c_point = i + p0[i, 1] = grad + else: + p0[i, 1] = grad + return p0, p1, c + + +def calc_probs(p0, p1, c, p_test): + """ + Function that calculates Venn-Abers multiprobability outputs and + associated calibrated probabilities + + In particular, the function implements algorithms 5-6 + as described in Chapter 2 in [1] + + References + ---------- + [1] Vovk, Vladimir, Ivan Petej, and Valentina Fedorova. + "Large-scale probabilistic predictors with and without guarantees of validity." + Advances in Neural Information Processing Systems 28 (2015). + (arxiv version https://arxiv.org/pdf/1511.00213.pdf) + + + Parameters + ---------- + p0 : {array-like}, shape (n_samples, ) + Precomputed vector storing values of the isotonic regression + fitted to a sequence that contains binary class label 0 + + p1 : {array-like}, shape (n_samples, ) + Precomputed vector storing values of the isotonic regression + fitted to a sequence that contains binary class label 1 + + c : {array-like}, shape (n_samples, ) + Ordered set of unique calibration probabilities + + p_test : {array-like}, shape (n_samples, 2) + An array of probability outputs which are to be calibrated + + + Returns + ---------- + p_prime : {array-like}, shape (n_samples, 2) + Calibrated probability outputs + + p0_p1 : {array-like}, shape (n_samples, 2) + Associated multiprobability outputs + (as described in Section 4 in https://arxiv.org/pdf/1511.00213.pdf) + """ + out = p_test[:, 1] + p0_p1 = np.hstack( + (p0[np.searchsorted(c, out, 'right'), 1]. + reshape(-1, 1), p1[np.searchsorted(c, out, 'left'), 1].reshape(-1, 1))) + + p_prime = np.zeros((len(out), 2)) + p_prime[:, 1] = p0_p1[:, 1] / (1 - p0_p1[:, 0] + p0_p1[:, 1]) + p_prime[:, 0] = 1 - p_prime[:, 1] + + return p_prime, p0_p1 + + +def predict_proba_prefitted_va(p_cal, y_cal, p_test, + precision=None, va_tpe='one_vs_one'): + """ + Generate Venn-ABERS calibrated probabilities + for multiclass problems using pre-fitted calibration data. + + This function performs Venn-ABERS calibration on multiclass problems + by decomposing them into binary classification problems + using either one-vs-one or one-vs-all strategies. + It uses pre-computed calibration probabilities and + labels to calibrate test probabilities. + + Parameters + ---------- + p_cal : array-like of shape (n_cal_samples, n_classes) + Calibration set probabilities for each class. + These are the predicted probabilities + from the base classifier on the calibration set. + + y_cal : array-like of shape (n_cal_samples,) + True class labels for the calibration set. + Should contain integer class labels. + + p_test : array-like of shape (n_test_samples, n_classes) + Test set probabilities for each class that need to be calibrated. + These are the predicted probabilities from the base classifier + on the test set. + + precision : int, optional, default=None + Number of decimal places to round calibration probabilities + to for faster computation. + If None, no rounding is applied. + Useful for large calibration datasets to improve + computational efficiency. + + va_tpe : {'one_vs_one', 'one_vs_all'}, default='one_vs_one' + Strategy for decomposing multiclass problem into binary problems: + - 'one_vs_one': Creates binary classifiers for each pair of classes + - 'one_vs_all': Creates binary classifiers for each class vs all others + + Returns + ------- + p_prime : ndarray of shape (n_test_samples, n_classes) + Venn-ABERS calibrated probabilities for each class. + Probabilities are normalized + to sum to 1 across classes for each sample. + + multiclass_p0p1 : list of ndarray + List containing the multiprobability outputs (p0, p1) + for each binary problem. + The structure depends on the decomposition strategy: + - For 'one_vs_one': List of length C(n_classes, 2) + containing arrays of shape (n_test_samples, 2) for each class pair + - For 'one_vs_all': List of length n_classes containing arrays of shape + (n_test_samples, 2) for each class vs rest problem + + Notes + ----- + This function implements the Venn-ABERS calibration method as described in [1]_. + The multiclass extension uses the approach described in [2]_ for combining + binary calibrators. + + For 'one_vs_one' strategy, the final probabilities are computed using the + pairwise coupling method where each class probability is the harmonic mean + of its pairwise probabilities. + + References + ---------- + .. [1] Vovk, Vladimir, Ivan Petej, and Valentina Fedorova. "Large-scale + probabilistic predictors with and without guarantees of validity." + Advances in Neural Information Processing Systems 28 (2015). + + .. [2] Hastie, Trevor, and Robert Tibshirani. "Classification by pairwise + coupling." Advances in Neural Information Processing Systems 10 (1997). + + Examples + -------- + >>> import numpy as np + >>> # Calibration data + >>> p_cal = np.array([[0.7, 0.2, 0.1], [0.3, 0.6, 0.1], [0.1, 0.1, 0.8]]) + >>> y_cal = np.array([0, 1, 2]) + >>> # Test data + >>> p_test = np.array([[0.6, 0.3, 0.1], [0.2, 0.7, 0.1]]) + >>> + >>> p_calibrated, p0p1 = predict_proba_prefitted_va(p_cal, y_cal, p_test) + >>> print(p_calibrated.shape) + (2, 3) + """ + # Validate va_tpe parameter + if va_tpe not in ['one_vs_one', 'one_vs_all']: + raise ValueError( + f"Invalid va_tpe '{va_tpe}'. " + f"Allowed values are ['one_vs_one', 'one_vs_all']." + ) + + p_prime = None + multiclass_p0p1 = None + + if va_tpe == 'one_vs_one': + classes = np.unique(y_cal) + class_pairs = [] + for i in range(len(classes) - 1): + for j in range(i + 1, len(classes)): + class_pairs.append([classes[i], classes[j]]) + + multiclass_probs = [] + multiclass_p0p1 = [] + for i, class_pair in enumerate(class_pairs): + pairwise_indices = (y_cal == class_pair[0]) + (y_cal == class_pair[1]) + binary_cal_probs = p_cal[:, class_pair][pairwise_indices] / \ + np.sum(p_cal[:, class_pair][pairwise_indices], axis=1).reshape(-1, 1) + binary_test_probs = p_test[:, class_pair] / \ + np.sum(p_test[:, class_pair], axis=1).reshape(-1, 1) + binary_classes = y_cal[pairwise_indices] == class_pair[1] + + va = VennAbers() + va.fit(binary_cal_probs, binary_classes, precision=precision) + p_pr, p0_p1 = va.predict_proba(binary_test_probs) + multiclass_probs.append(p_pr) + multiclass_p0p1.append(p0_p1) + + p_prime = np.zeros((len(p_test), len(classes))) + + for i, cl_id, in enumerate(classes): + stack_i = [ + p[:, 0].reshape(-1, 1) + for i, p in enumerate(multiclass_probs) + if class_pairs[i][0] == cl_id] + stack_j = [ + p[:, 1].reshape(-1, 1) + for i, p in enumerate(multiclass_probs) + if class_pairs[i][1] == cl_id] + p_stack = stack_i + stack_j + + p_prime[:, i] = 1 / \ + (np.sum(np.hstack([(1 / p) for p in p_stack]), axis=1) - + (len(classes) - 2)) + + else: + classes = np.unique(y_cal) + + multiclass_probs = [] + multiclass_p0p1 = [] + for _, class_id in enumerate(classes): + class_indices = (y_cal == class_id) + binary_cal_probs = np.zeros((len(p_cal), 2)) + binary_test_probs = np.zeros((len(p_test), 2)) + binary_cal_probs[:, 1] = p_cal[:, class_id] + binary_cal_probs[:, 0] = 1 - binary_cal_probs[:, 1] + binary_test_probs[:, 1] = p_test[:, class_id] + binary_test_probs[:, 0] = 1 - binary_test_probs[:, 1] + binary_classes = class_indices + + va = VennAbers() + va.fit(binary_cal_probs, binary_classes, precision=precision) + p_pr, p0_p1 = va.predict_proba(binary_test_probs) + multiclass_probs.append(p_pr) + multiclass_p0p1.append(p0_p1) + + p_prime = np.zeros((len(p_test), len(classes))) + + for i, _ in enumerate(classes): + p_prime[:, i] = multiclass_probs[i][:, 1] + + p_prime = p_prime / np.sum(p_prime, axis=1).reshape(-1, 1) + + return p_prime, multiclass_p0p1 + + +class VennAbers: + """ + Implementation of the Venn-ABERS calibration for binary classification problems. + Venn-ABERS calibration is a method of turning machine learning + classification algorithms into probabilistic predictors that + automatically enjoys a property of validity (perfect calibration) and + is computationally efficient. + The algorithm is described in [1]. + + + References + ---------- + [1] Vovk, Vladimir, Ivan Petej, and Valentina Fedorova. + "Large-scale probabilistic predictors with and without guarantees of validity." + Advances in Neural Information Processing Systems 28 (2015). + (arxiv version https://arxiv.org/pdf/1511.00213.pdf) + + .. versionadded:: 1.0 + + + Examples + -------- + >>> import numpy as np + >>> from sklearn.datasets import make_classification + >>> from sklearn.model_selection import train_test_split + >>> from sklearn.naive_bayes import GaussianNB + >>> from mapie._venn_abers import VennAbers + >>> + >>> # Generate data and split into train/test + >>> X, y = make_classification(n_samples=1000, n_classes=2, n_informative=10) + >>> X_train, X_test, y_train, y_test = train_test_split(X, y) + >>> + >>> # Further split training data into proper training and calibration sets + >>> X_train_proper, X_cal, y_train_proper, y_cal = train_test_split( + ... X_train, y_train, test_size=0.2, shuffle=False + ... ) + >>> + >>> # Train classifier on proper training set + >>> clf = GaussianNB() + >>> _ = clf.fit(X_train_proper, y_train_proper) + >>> + >>> # Get probability predictions for calibration and test sets + >>> p_cal = clf.predict_proba(X_cal) + >>> p_test = clf.predict_proba(X_test) + >>> + >>> # Apply Venn-ABERS calibration + >>> va = VennAbers() + >>> va.fit(p_cal, y_cal) + >>> p_prime, p0_p1 = va.predict_proba(p_test) + >>> + >>> # p_prime contains calibrated probabilities + >>> print(p_prime.shape) + (250, 2) + """ + + def __init__(self): + self.p0 = None + self.p1 = None + self.c = None + + def fit(self, p_cal, y_cal, precision=None): + """Fits the VennAbers calibrator to the calibration dataset + + Parameters + ---------- + p_cal : {array-like}, shape (n_samples,) + Input data for calibration consisting of calibration set probabilities + + y_cal : {array-like}, shape (n_samples,) + Associated binary class labels. + + precision: int, default = None + Optional number of decimal points to which Venn-Abers calibration + probabilities p_cal are rounded to. + Yields significantly faster computation time for larger calibration datasets + """ + self.p0, self.p1, self.c = calc_p0p1(p_cal, y_cal, precision) + + def predict_proba(self, p_test): + """Generates Venn-Abers probability estimates + + + Parameters + ---------- + p_test : {array-like}, shape (n_samples, 2) + An array of probability outputs which are to be calibrated + + + Returns + ---------- + p_prime : {array-like}, shape (n_samples, 2) + Calibrated probability outputs + + p0_p1 : {array-like}, shape (n_samples, 2) + Associated multiprobability outputs + (as described in Section 4 in https://arxiv.org/pdf/1511.00213.pdf) + """ + p_prime, p0_p1 = calc_probs(self.p0, self.p1, self.c, p_test) + return p_prime, p0_p1 + + +class VennAbersCV: + """ + Inductive (IVAP) or Cross (CVAP) Venn-ABERS prediction method + for binary classification problems + + Implements the Inductive or Cross Venn-Abers calibration method + as described in Sections 2-4 in [1] + + References + ---------- + [1] Vovk, Vladimir, Ivan Petej, and Valentina Fedorova. + "Large-scale probabilistic predictors with and without guarantees of validity." + Advances in Neural Information Processing Systems 28 (2015). + (arxiv version https://arxiv.org/pdf/1511.00213.pdf) + + Parameters + ---------- + + estimator : sci-kit learn estimator instance, default=None + The classifier whose output need to be calibrated to provide more + accurate `predict_proba` outputs. + + inductive : bool + True to run the Inductive (IVAP) or False for Cross (CVAP) + Venn-ABERS calibtration + + n_splits: int, default=5 + For CVAP only, number of folds. Must be at least 2. + Uses sklearn.model_selection.StratifiedKFold functionality + (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html). + + cal_size : float or int, default=None + For IVAP only, uses sklearn.model_selection.train_test_split functionality + (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html). + If float, should be between 0.0 and 1.0 and represent the proportion + of the dataset to include in the proper training / calibration split. + If int, represents the absolute number of test samples. If None, the + value is set to the complement of the train size. If ``train_size`` + is also None, it will be set to 0.25. + + train_proper_size : float or int, default=None + For IVAP only, if float, should be between 0.0 and 1.0 and represent the + proportion of the dataset to include in the poroper training set split. If + int, represents the absolute number of train samples. If None, + the value is automatically set to the complement of the test size. + + random_state : int, RandomState instance or None, default=None + Controls the shuffling applied to the data before applying the split. + Pass an int for reproducible output across multiple function calls. + + shuffle : bool, default=True + Whether to shuffle the data before splitting. For IVAP if shuffle=False + then stratify must be None. For CVAP whether to shuffle each class's samples + before splitting into batches + + stratify : array-like, default=None + For IVAP only. If not None, data is split in a stratified fashion, using this as + the class labels. + + precision: int, default = None + Optional number of decimal points to which Venn-Abers calibration + probabilities p_cal are rounded to. + Yields significantly faster computation time for larger calibration datasets + """ + def __init__(self, + estimator, + inductive, + n_splits=None, + cal_size=None, + train_proper_size=None, + random_state=None, + shuffle=True, + stratify=None, + precision=None): + self.estimator = estimator + self.n_splits = n_splits + self.clf_p_cal = [] + self.clf_y_cal = [] + self.inductive = inductive + self.cal_size = cal_size + self.train_proper_size = train_proper_size + self.random_state = random_state + self.shuffle = shuffle + self.stratify = stratify + self.precision = precision + + def fit(self, _x_train, _y_train, sample_weight=None): + """ Fits the IVAP or CVAP calibrator to the training set. + + Parameters + ---------- + _x_train : {array-like}, shape (n_samples,) + Input data for calibration consisting of training set numerical features + + _y_train : {array-like}, shape (n_samples,) + Associated binary class labels. + + sample_weight : {array-like}, shape (n_samples,), optional + Sample weights for fitting the estimators. + If None, then samples are equally weighted. + """ + if self.inductive: + self.n_splits = 1 + + # Split sample_weight along with data if provided + if sample_weight is not None: + x_train_proper, x_cal, y_train_proper, \ + y_cal, sw_train, sw_cal = train_test_split( + _x_train, + _y_train, + sample_weight, + test_size=self.cal_size, + train_size=self.train_proper_size, + random_state=self.random_state, + shuffle=self.shuffle, + stratify=self.stratify) + else: + x_train_proper, x_cal, y_train_proper, y_cal = train_test_split( + _x_train, + _y_train, + test_size=self.cal_size, + train_size=self.train_proper_size, + random_state=self.random_state, + shuffle=self.shuffle, + stratify=self.stratify + ) + sw_train = None + + # Fit estimator with sample weights if provided + if sw_train is not None: + self.estimator.fit(x_train_proper, y_train_proper.flatten(), + sample_weight=sw_train) + else: + self.estimator.fit(x_train_proper, y_train_proper.flatten()) + + clf_prob = self.estimator.predict_proba(x_cal) + self.clf_p_cal.append(clf_prob) + self.clf_y_cal.append(y_cal) + else: + kf = StratifiedKFold(n_splits=self.n_splits, + shuffle=self.shuffle, + random_state=self.random_state) + for train_index, test_index in kf.split(_x_train, _y_train): + # Extract sample weights for this fold if provided + fold_sample_weight = None + if sample_weight is not None: + fold_sample_weight = sample_weight[train_index] + + # Fit estimator with sample weights if provided + if fold_sample_weight is not None: + self.estimator.fit(_x_train[train_index], + _y_train[train_index].flatten(), + sample_weight=fold_sample_weight) + else: + self.estimator.fit(_x_train[train_index], + _y_train[train_index].flatten()) + + clf_prob = self.estimator.predict_proba(_x_train[test_index]) + self.clf_p_cal.append(clf_prob) + self.clf_y_cal.append(_y_train[test_index]) + + def predict_proba(self, _x_test, loss='log', p0_p1_output=False): + """ Generates Venn-ABERS calibrated probabilities. + + + Parameters + ---------- + _x_test : {array-like}, shape (n_samples,) + Training set numerical features + + loss : str, default='log' + Log or Brier loss. For further details of calculation + see Section 4 in https://arxiv.org/pdf/1511.00213.pdf + + p0_p1_output: bool, default = False + If True, function also returns p0_p1 binary probabilistic outputs + + Returns + ---------- + p_prime: {array-like}, shape (n_samples,n_classses) + Venn-ABERS calibrated probabilities + + p0_p1: {array-like}, default = None + Venn-ABERS calibrated p0 and p1 outputs (if p0_p1_output = True) + """ + + p0p1_test = [] + clf_prob_test = self.estimator.predict_proba(_x_test) + for i in range(self.n_splits): + va = VennAbers() + va.fit(p_cal=self.clf_p_cal[i], + y_cal=self.clf_y_cal[i], + precision=self.precision) + _, probs = va.predict_proba(p_test=clf_prob_test) + p0p1_test.append(probs) + p0_stack = np.hstack([prob[:, 0].reshape(-1, 1) for prob in p0p1_test]) + p1_stack = np.hstack([prob[:, 1].reshape(-1, 1) for prob in p0p1_test]) + + p_prime = np.zeros((len(_x_test), 2)) + + if loss == 'log': + p_prime[:, 1] = _geo_mean(p1_stack) / \ + (_geo_mean(1-p0_stack) + _geo_mean(p1_stack)) + p_prime[:, 0] = 1 - p_prime[:, 1] + else: + p_prime[:, 1] = 1 / self.n_splits * ( + np.sum(p1_stack, axis=1) + + 0.5 * np.sum(p0_stack**2, axis=1) - + 0.5 * np.sum(p1_stack**2, axis=1)) + p_prime[:, 0] = 1 - p_prime[:, 1] + + if p0_p1_output: + p0_p1 = np.hstack((p0_stack, p1_stack)) + return p_prime, p0_p1 + else: + return p_prime + + +class VennAbersMultiClass: + """ + Inductive (IVAP) or Cross (CVAP) Venn-ABERS prediction method + for multi-class classification problems + + Implements the Inductive or Cross Venn-Abers calibration method + as described in [1] + + References + ---------- + [1] Manokhin, Valery. "Multi-class probabilistic classification using + inductive and cross Venn–Abers predictors." In Conformal and Probabilistic + Prediction and Applications, pp. 228-240. PMLR, 2017. + + Parameters + __________ + + estimator : sci-kit learn estimator instance + The classifier whose output need to be calibrated to provide more + accurate `predict_proba` outputs. + + inductive : bool + True to run the Inductive (IVAP) or False for Cross (CVAP) + Venn-ABERS calibtration + + n_splits: int, default=5 + For CVAP only, number of folds. Must be at least 2. + Uses sklearn.model_selection.StratifiedKFold functionality + (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html). + + cal_size : float or int, default=None + For IVAP only, uses sklearn.model_selection.train_test_split functionality + (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html). + If float, should be between 0.0 and 1.0 and represent the proportion + of the dataset to include in the proper training / calibration split. + If int, represents the absolute number of test samples. If None, the + value is set to the complement of the train size. If ``train_size`` + is also None, it will be set to 0.25. + + train_size : float or int, default=None + For IVAP only, if float, should be between 0.0 and 1.0 and represent the + proportion of the dataset to include in the poroper training set split. If + int, represents the absolute number of train samples. If None, + the value is automatically set to the complement of the test size. + + random_state : int, RandomState instance or None, default=None + Controls the shuffling applied to the data before applying the split. + Pass an int for reproducible output across multiple function calls. + + shuffle : bool, default=True + Whether to shuffle the data before splitting. For IVAP if shuffle=False + then stratify must be None. + For CVAP whether to shuffle each class's samples + before splitting into batches + + stratify : array-like, default=None + For IVAP only. If not None, data is split in a stratified fashion, + using this as the class labels. + + precision: int, default = None + Optional number of decimal points to which Venn-Abers calibration + probabilities p_cal are rounded to. + Yields significantly faster computation time for larger calibration datasets + """ + def __init__(self, + estimator, + inductive, + n_splits=None, + cal_size=None, + train_proper_size=None, + random_state=None, + shuffle=True, + stratify=None, + precision=None + ): + self.estimator = estimator + self.inductive = inductive + self.n_splits = n_splits + self.cal_size = cal_size + self.train_proper_size = train_proper_size + self.random_state = random_state + self.shuffle = shuffle + self.stratify = stratify + self.multi_class_model = [] + self.n_classes = None + self.classes = None + self.pairwise_id = [] + self.clf_ovo = None + self.multiclass_cal = [] + self.multiclass_va_estimators = [] + self.multiclass_probs = [] + self.multiclass_p0p1 = [] + self.precision = precision + + def fit(self, _x_train, _y_train, sample_weight=None): + """ + Fits the Venn-ABERS calibrator to the training set + + Parameters + ---------- + _x_train : {array-like}, shape (n_samples,) + Input data for calibration consisting of training set numerical features + + _y_train : {array-like}, shape (n_samples,) + Associated binary class labels. + + sample_weight : {array-like}, shape (n_samples,), optional + Sample weights for fitting the estimators. + If None, then samples are equally weighted. + """ + + # integrity checks + if not self.inductive and self.n_splits is None: + raise Exception("For Cross Venn ABERS please provide n_splits") + try: + check_is_fitted(self.estimator) + except NotFittedError: + if (self.inductive and self.cal_size is None) and\ + (self.train_proper_size is None): + raise Exception( + "For Inductive Venn-ABERS please provide either calibration" + "or proper train set size") + + self.classes = np.unique(_y_train) + self.n_classes = len(self.classes) + + for i in range(self.n_classes): + for j in range(i + 1, self.n_classes): + self.pairwise_id.append([self.classes[i], self.classes[j]]) + + # Fit the OneVsOne classifier with sample weights if provided + fit_params = {} + if sample_weight is not None: + fit_params["sample_weight"] = sample_weight + + # Clone the estimator to avoid modifying the original + # estimator_clone = clone(self.estimator) + + # OneVsOneClassifier will handle the estimator's preprocessing + # (e.g., if it's a pipeline, it will apply transformations internally) + self.clf_ovo = OneVsOneClassifier(self.estimator) + self.clf_ovo.fit( + _x_train, _y_train, **fit_params + ) + + for pair_id, clf_ovo_estimator in enumerate(self.clf_ovo.estimators_): + _pairwise_indices = (_y_train == self.pairwise_id[pair_id][0]) +\ + (_y_train == self.pairwise_id[pair_id][1]) + + # Extract sample weights for this pair if provided + pair_sample_weight = None + if sample_weight is not None: + pair_sample_weight = sample_weight[_pairwise_indices] + + # pair_estimator = clone(self.estimator) + + va_cv = VennAbersCV( + self.estimator, + inductive=self.inductive, + n_splits=self.n_splits, + cal_size=self.cal_size, + train_proper_size=self.train_proper_size, + random_state=self.random_state, + shuffle=self.shuffle, + stratify=self.stratify, + precision=self.precision + ) + va_cv.fit( + _x_train[_pairwise_indices], + np.array(_y_train[_pairwise_indices] == self.pairwise_id[pair_id][1]) + .reshape(-1, 1), + sample_weight=pair_sample_weight + ) + self.multiclass_va_estimators.append(va_cv) + + def predict_proba(self, _x_test, loss='log', p0_p1_output=False): + """ + Generates Venn-ABERS calibrated probabilities. + + Parameters + ---------- + _x_test : {array-like}, shape (n_samples,) + Training set numerical features + + loss : str, default='log' + Log or Brier loss. For further details of calculation + see Section 4 in https://arxiv.org/pdf/1511.00213.pdf + + p0_p1_output: bool, default = False + If True, function also returns a set p0_p1 binary probabilistic outputs + for each fold + + Returns + ---------- + p_prime: {array-like}, shape (n_samples,n_classses) + Venn-ABERS calibrated probabilities + + p0_p1: {array-like}, default = None + Venn-ABERS calibrated p0 and p1 outputs (if p0_p1_output = True) + """ + + self.multiclass_probs = [] + self.multiclass_p0p1 = [] + + if p0_p1_output: + for i, va_estimator in enumerate(self.multiclass_va_estimators): + _p_prime, _p0_p1 = va_estimator.predict_proba(_x_test, + loss=loss, + p0_p1_output=True) + self.multiclass_probs.append(_p_prime) + self.multiclass_p0p1.append(_p0_p1) + else: + for i, va_estimator in enumerate(self.multiclass_va_estimators): + _p_prime = va_estimator.predict_proba(_x_test, loss=loss) + self.multiclass_probs.append(_p_prime) + + p_prime = np.zeros((len(_x_test), self.n_classes)) + + for i, cl_id, in enumerate(self.classes): + stack_i = [ + p[:, 0].reshape(-1, 1) + for i, p in enumerate(self.multiclass_probs) + if self.pairwise_id[i][0] == cl_id] + stack_j = [ + p[:, 1].reshape(-1, 1) + for i, p in enumerate(self.multiclass_probs) + if self.pairwise_id[i][1] == cl_id] + p_stack = stack_i + stack_j + + p_prime[:, i] = 1/(np.sum(np.hstack([(1/p) for p in p_stack]), axis=1) - + (self.n_classes - 2)) + + p_prime = p_prime/np.sum(p_prime, axis=1).reshape(-1, 1) + + if p0_p1_output: + return p_prime, self.multiclass_p0p1 + else: + return p_prime diff --git a/mapie/calibration.py b/mapie/calibration.py index fb2f8ed5a..6513bc74f 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -1,12 +1,15 @@ + from __future__ import annotations import warnings from typing import Dict, Optional, Tuple, Union, cast - +from inspect import signature import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, clone from sklearn.calibration import _SigmoidCalibration from sklearn.isotonic import IsotonicRegression +from sklearn.exceptions import NotFittedError +from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state from sklearn.utils.multiclass import type_of_target from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, @@ -17,6 +20,9 @@ _check_estimator_fit_predict, _check_n_features_in, _check_null_weight, _fit_estimator, _get_calib_set) +from ._venn_abers import (predict_proba_prefitted_va, + VennAbers, VennAbersMultiClass) + class TopLabelCalibrator(BaseEstimator, ClassifierMixin): """ @@ -547,3 +553,565 @@ def predict( """ check_is_fitted(self, self.fit_attributes) return self.single_estimator_.predict(X) + + +class VennAbersCalibrator(BaseEstimator, ClassifierMixin): + """ + Venn-ABERS calibration for binary and multi-class problems. + + A class implementing binary [1] or multi-class [2] Venn-ABERS calibration. + This calibrator provides well-calibrated probabilities with validity guarantees. + The implementation is based on the reference implementation by the user ip200 [3]. + + Can be used in 3 different forms: + - Prefit Venn-ABERS: estimator is already fitted, only calibration is performed + - Inductive Venn-ABERS (IVAP): splits data into training and calibration sets + - Cross Venn-ABERS (CVAP): uses cross-validation for calibration + + Parameters + ---------- + estimator : ClassifierMixin + The classifier whose output needs to be calibrated to provide more + accurate `predict_proba` outputs. Must be a scikit-learn compatible + classifier with `fit` and `predict_proba` methods. + + cv : Optional[str], default=None + The cross-validation strategy: + + - ``"prefit"``: Assumes that ``estimator`` has been fitted already. + All data provided in ``fit`` are used for calibration only. + - ``None``: Uses inductive or cross validation based on the + ``inductive`` parameter. + + inductive : bool, default=True + Determines the calibration strategy when ``cv=None``: + + - ``True``: Inductive Venn-ABERS (IVAP) - splits data into proper + training and calibration sets. + - ``False``: Cross Venn-ABERS (CVAP) - uses k-fold cross-validation. + + n_splits : Optional[int], default=None + Number of folds for Cross Venn-ABERS (CVAP). Must be at least 2. + Only used when ``inductive=False`` and ``cv=None``. + Uses ``sklearn.model_selection.StratifiedKFold`` functionality. + + cal_size : Optional[float], default=None + Proportion of the dataset to use for calibration in Inductive + Venn-ABERS (IVAP). Only used when ``inductive=True`` and ``cv=None``. + + - If float, should be between 0.0 and 1.0. + - If int, represents the absolute number of calibration samples. + - If ``None``, uses the value provided in the ``fit`` method + (default 0.33). + + train_proper_size : Optional[float], default=None + Proportion of the dataset to use for proper training in Inductive + Venn-ABERS (IVAP). Only used when ``inductive=True`` and ``cv=None``. + + - If float, should be between 0.0 and 1.0. + - If int, represents the absolute number of training samples. + - If ``None``, automatically set to complement of ``cal_size``. + + random_state : Optional[int], default=None + Controls the shuffling applied to the data before splitting. + Pass an int for reproducible output across multiple function calls. + Can be overridden in the ``fit`` method. + + shuffle : bool, default=True + Whether to shuffle the data before splitting. + + - For IVAP: if ``shuffle=False``, then ``stratify`` must be ``None``. + - For CVAP: controls whether to shuffle each class's samples before + splitting into batches. + + Can be overridden in the ``fit`` method. + + stratify : Optional[ArrayLike], default=None + For Inductive Venn-ABERS (IVAP) only. If not ``None``, data is split + in a stratified fashion, using this as the class labels. + Can be overridden in the ``fit`` method. + + precision : Optional[int], default=None + Number of decimal points to round Venn-ABERS calibration probabilities. + Yields significantly faster computation for larger calibration datasets. + Trade-off between speed and precision. + + Attributes + ---------- + classes_ : NDArray + Array with the name of each class. + + n_classes_ : int + Number of classes in the training dataset. + + n_features_in_ : int + Number of features seen during fit. + + va_calibrator_ : Union[VennAbersMultiClass, VennAbers, None] + The fitted Venn-ABERS calibrator instance. + May be None in prefit mode with multi-class classification. + + transformers_ : Optional[Pipeline] + Trasnformers from sklearn pipeline to transform categorical attributes. + + single_estimator_ : Optional[ClassifierMixin] + The fitted estimator (only for prefit mode). + + p_cal_ : Optional[NDArray] + Calibration probabilities (only for prefit mode with multi-class). + + y_cal_ : Optional[NDArray] + Calibration labels (only for prefit mode with multi-class). + + References + ---------- + [1] Vovk, Vladimir, Ivan Petej, and Valentina Fedorova. + "Large-scale probabilistic predictors with and without guarantees + of validity." Advances in Neural Information Processing Systems 28 + (2015). https://arxiv.org/pdf/1511.00213.pdf + + [2] Manokhin, Valery. "Multi-class probabilistic classification using + inductive and cross Venn–Abers predictors." In Conformal and + Probabilistic Prediction and Applications, pp. 228-240. PMLR, 2017. + + [3] Reference implementation: + https://github.com/ip200/venn-abers/blob/main/src/venn_abers.py + + Examples + -------- + >>> import numpy as np + >>> from sklearn.datasets import make_classification + >>> from sklearn.model_selection import train_test_split + >>> from sklearn.naive_bayes import GaussianNB + >>> from mapie.calibration import VennAbersCalibrator + + **Example 1: Prefit mode** + + >>> X, y = make_classification(n_samples=1000, n_features=20, + ... n_classes=3, n_informative=10, + ... random_state=42) + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, test_size=0.2, random_state=42 + ... ) + >>> # Fit the base classifier + >>> clf = GaussianNB() + >>> _ = clf.fit(X_train, y_train) + >>> # Calibrate using prefit mode + >>> va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + >>> _ = va_cal.fit(X_test, y_test) # Use test set for calibration + >>> # Get calibrated probabilities + >>> calibrated_probs = va_cal.predict_proba(X_test) + + **Example 2: Inductive Venn-ABERS (IVAP)** + + >>> X, y = make_classification(n_samples=1000, n_features=20, + ... n_classes=2, random_state=42) + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, test_size=0.2, random_state=42 + ... ) + >>> # Inductive mode with 30% calibration split + >>> clf = GaussianNB() + >>> va_cal = VennAbersCalibrator( + ... estimator=clf, + ... inductive=True, + ... cal_size=0.3, + ... random_state=42 + ... ) + >>> _ = va_cal.fit(X_train, y_train) + >>> calibrated_probs = va_cal.predict_proba(X_test) + >>> predictions = va_cal.predict(X_test) + + **Example 3: Cross Venn-ABERS (CVAP)** + + >>> X, y = make_classification(n_samples=1000, n_features=20, + ... n_informative=10, n_classes=3, + ... random_state=42) + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, test_size=0.2, random_state=42 + ... ) + >>> # Cross validation mode with 5 folds + >>> clf = GaussianNB() + >>> va_cal = VennAbersCalibrator( + ... estimator=clf, + ... inductive=False, + ... n_splits=5, + ... random_state=42 + ... ) + >>> _ = va_cal.fit(X_train, y_train) + >>> calibrated_probs = va_cal.predict_proba(X_test) + >>> predictions = va_cal.predict(X_test) + + Notes + ----- + - Venn-ABERS calibration provides probabilistic predictions with + validity guarantees under the exchangeability assumption. + - For binary classification, the method produces well-calibrated + probabilities with minimal assumptions. + - For multi-class problems, the method uses a one-vs-one approach + to extend binary Venn-ABERS to multiple classes. + - The ``precision`` parameter can significantly speed up computation + for large datasets with minimal impact on calibration quality. + - When using ``cv="prefit"``, ensure the estimator is fitted on a + different dataset than the one used for calibration to avoid + overfitting. + + See Also + -------- + TopLabelCalibrator : Top-label calibration for multi-class problems. + sklearn.calibration.CalibratedClassifierCV : Scikit-learn's probability + calibration with isotonic regression or Platt scaling. + """ + + fit_attributes = [ + "va_calibrator_", "classes_", "n_classes_" + ] + + valid_cv = ["prefit", None] + + def __init__( + self, + estimator: Optional[ClassifierMixin] = None, + cv: Optional[str] = None, + inductive: bool = True, + n_splits: Optional[int] = None, + cal_size: Optional[float] = None, + train_proper_size: Optional[float] = None, + random_state: Optional[int] = None, + shuffle: bool = True, + stratify: Optional[ArrayLike] = None, + precision: Optional[int] = None + ) -> None: + self.estimator = estimator + self.cv = cv + self.inductive = inductive + self.n_splits = n_splits + self.cal_size = cal_size + self.train_proper_size = train_proper_size + self.random_state = random_state + self.shuffle = shuffle + self.stratify = stratify + self.precision = precision + + # Initialize attributes that will be set during fit + self.va_calibrator_: Optional[Union[VennAbersMultiClass, VennAbers]] = None + self.classes_: Optional[NDArray] = None + self.n_classes_: Optional[int] = None + self.transformers_: Optional[Pipeline] = None + self.single_estimator_: Optional[ClassifierMixin] = None + self.p_cal_: Optional[NDArray] = None + self.y_cal_: Optional[NDArray] = None + + def _check_cv(self, cv: Optional[str]) -> Optional[str]: + """ + Check if cross-validator is valid. + + Parameters + ---------- + cv : Optional[str] + Cross-validator to check. + + Returns + ------- + Optional[str] + 'prefit' or None. + + Raises + ------ + ValueError + If the cross-validator is not valid. + """ + if cv in self.valid_cv: + return cv + raise ValueError( + "Invalid cv argument. " + f"Allowed values are {self.valid_cv}." + ) + + def fit( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[NDArray] = None, + calib_size: Optional[float] = 0.33, + random_state: Optional[Union[int, np.random.RandomState, None]] = None, + shuffle: Optional[bool] = True, + stratify: Optional[ArrayLike] = None, + **fit_params + ) -> "VennAbersCalibrator": + """ + Fits the Venn-ABERS calibrator. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + Training data. + + y : ArrayLike of shape (n_samples,) + Training labels. + + sample_weight : Optional[NDArray] of shape (n_samples,) + Sample weights for fitting the out-of-fold models. + If ``None``, then samples are equally weighted. + Note that the sample weight defined are only for the training, not + for the calibration procedure. + By default ``None``. + + calib_size : Optional[float], default=0.33 + If ``cv == split`` and X_calib and y_calib are not defined, then + the calibration dataset is created with the split defined by + calib_size. For inductive Venn-ABERS, this determines the proportion + of data used for calibration. + + random_state : Optional[Union[int, np.random.RandomState, None]], default=None + Controls the shuffling applied to the data before applying the split. + Pass an int for reproducible output across multiple function calls. + + shuffle : Optional[bool], default=True + Whether to shuffle the data before splitting. If shuffle=False + then stratify must be None. + + stratify : Optional[ArrayLike], default=None + If not None, data is split in a stratified fashion, using this as + the class labels. + + **fit_params : dict + Additional parameters for the underlying estimator. + + Returns + ------- + VennAbersCalibrator + The fitted calibrator. + + Raises + ------ + ValueError + If required parameters are missing for the chosen mode. + """ + cv = self._check_cv(self.cv) + + # Check for manual mode (backward compatibility) + # If estimator is None, we expect this to be manual mode + if self.estimator is None: + raise ValueError( + "For VennAbersCalibrator, an estimator must be provided. " + "For manual calibration with pre-computed probabilities, " + "please use the VennAbers class directly from mapie._venn_abers" + ) + + # Validate inputs + X, y = indexable(X, y) + y = _check_y(y) + sample_weight, X, y = _check_null_weight(sample_weight, X, y) + # Handle categorical features + + from sklearn.pipeline import Pipeline + + last_estimator = self.estimator + X_processed = X + + if isinstance(last_estimator, Pipeline): + # Separate transformers and final estimator + transformers = self.estimator[:-1] # all steps except last + last_estimator = self.estimator[-1] # usually a classifier + + X_processed = transformers.fit_transform(X, y) + self.transformers_ = transformers + + # Set up classes + self.classes_ = np.unique(y) + self.n_classes_ = len(self.classes_) + + # Prefit mode: estimator is already fitted, only calibrate + if cv == "prefit": + try: + check_is_fitted(last_estimator) + except NotFittedError: + raise ValueError( + "For cv='prefit', the estimator must be already fitted" + ) + + # Set up classes from the fitted estimator + self.single_estimator_ = last_estimator + self.classes_ = self.single_estimator_.classes_ + self.n_classes_ = len(self.classes_) + + # Get predictions from the fitted estimator + p_cal_pred = self.single_estimator_.predict_proba(X_processed) + + # Fit Venn-ABERS calibrator on these predictions + if self.n_classes_ <= 2: + self.va_calibrator_ = VennAbers() + self.va_calibrator_.fit(p_cal_pred, y, self.precision) + else: + # For multi-class, store calibration data for later use + self.p_cal_ = np.asarray(p_cal_pred) + self.y_cal_ = np.asarray(y) + self.va_calibrator_ = None # Will be used in predict_proba + + return self + + # Standard inductive or cross validation mode + # Integrity checks + if not self.inductive and self.n_splits is None: + raise ValueError("For Cross Venn-ABERS please provide n_splits") + + # For inductive mode, use calib_size parameter + cal_size_to_use = self.cal_size if self.cal_size is not None else calib_size + + # Check random state + random_state_to_use: Optional[Union[int, np.random.RandomState]] = None + if random_state is not None: + random_state_to_use = random_state + else: + random_state_to_use = self.random_state + + # Initialize and fit the Venn-ABERS calibrator + self.va_calibrator_ = VennAbersMultiClass( + estimator=last_estimator, + inductive=self.inductive, + n_splits=self.n_splits, + cal_size=cal_size_to_use, + train_proper_size=self.train_proper_size, + random_state=random_state_to_use, + shuffle=shuffle if shuffle is not None else self.shuffle, + stratify=stratify if stratify is not None else self.stratify, + precision=self.precision + ) + + self.va_calibrator_.fit(X_processed, y, sample_weight=sample_weight) + + return self + + def predict_proba( + self, + X: ArrayLike, + loss="log" + ) -> NDArray: + """ + Prediction of the calibrated scores using fitted classifier and + Venn-ABERS calibrator. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + Test data. + + Returns + ------- + NDArray of shape (n_samples, n_classes) + Venn-ABERS calibrated probabilities. + """ + check_is_fitted(self, self.fit_attributes) + + cv = self._check_cv(self.cv) + + # Process test data + if (self.transformers_ is not None): + X_processed = self.transformers_.transform(X) + else: + X_processed = X + # Prefit mode: use fitted estimator to get probabilities, then calibrate + if cv == "prefit": + if self.single_estimator_ is None: + raise RuntimeError( + "single_estimator_ should not be None in prefit mode" + ) + + p_test_pred = self.single_estimator_.predict_proba(X_processed) + + # Type guard: ensure n_classes_ is not None after fit + if self.n_classes_ is None: + raise RuntimeError( + "n_classes_ should not be None after fitting" + ) + + if self.n_classes_ <= 2: + # Binary classification + if self.va_calibrator_ is None: + raise RuntimeError( + "va_calibrator_ should not be None for binary classification" + ) + p_prime, _ = self.va_calibrator_.predict_proba(p_test_pred) + else: + # Multi-class classification + p_prime, _ = predict_proba_prefitted_va( + self.p_cal_, + self.y_cal_, + p_test_pred, + precision=self.precision, + va_tpe='one_vs_one' + ) + + return p_prime + + # Standard inductive or cross validation mode + if self.va_calibrator_ is None: + raise RuntimeError( + "va_calibrator_ should not be None in inductive/cross-validation mode" + ) + + # Type guard: ensure we have VennAbersMultiClass instance + if not isinstance(self.va_calibrator_, VennAbersMultiClass): + raise RuntimeError( + "va_calibrator_ should be VennAbersMultiClass instance in " + "inductive/cross-validation mode" + ) + + if "loss" in signature(self.va_calibrator_.predict_proba).parameters: + p_prime = self.va_calibrator_.predict_proba( + X_processed, + loss=loss, + p0_p1_output=False + ) + else: + p_prime = self.va_calibrator_.predict_proba( + X_processed, + p0_p1_output=False + ) + + return p_prime + + def predict( + self, + X: ArrayLike, + loss="log" + ) -> NDArray: + """ + Predict the class of the estimator after Venn-ABERS calibration. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + Test data. + + Returns + ------- + NDArray of shape (n_samples,) + The predicted class labels. + """ + check_is_fitted(self, self.fit_attributes) + + # Get calibrated probabilities + p_prime = self.predict_proba(X, loss=loss) + + # Type guard: ensure n_classes_ is not None after fit + if self.n_classes_ is None: + raise RuntimeError( + "n_classes_ should not be None after fitting" + ) + + # Type guard: ensure classes_ is not None after fit + if self.classes_ is None: + raise RuntimeError( + "classes_ should not be None after fitting" + ) + + # Convert probabilities to class predictions + if self.n_classes_ <= 2: + # Binary classification + y_pred = self.classes_[(p_prime[:, 1] >= 0.5).astype(int)] + else: + # Multi-class classification + y_pred = self.classes_[np.argmax(p_prime, axis=1)] + + return y_pred diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py new file mode 100644 index 000000000..e364454dc --- /dev/null +++ b/mapie/tests/test_venn_abers_calibration.py @@ -0,0 +1,2968 @@ + +""" +Tests for VennAbersCalibrator class. +""" +from inspect import signature +from typing import Optional, Dict, Any, List, Tuple + +import numpy as np +import pandas as pd +import pytest +from sklearn.base import ClassifierMixin +from sklearn.compose import ColumnTransformer +from sklearn.datasets import make_classification +from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier +from sklearn.impute import SimpleImputer +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.naive_bayes import GaussianNB +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import OneHotEncoder +from sklearn.exceptions import NotFittedError +from sklearn.utils.validation import check_is_fitted +from mapie.calibration import VennAbersCalibrator +from mapie._venn_abers import ( + VennAbers, + VennAbersMultiClass, + predict_proba_prefitted_va +) + +random_state = 42 + +ESTIMATORS = [ + LogisticRegression(random_state=random_state), + RandomForestClassifier(random_state=random_state), + GaussianNB(), +] + +# Binary classification dataset +X_binary, y_binary = make_classification( + n_samples=10000, + n_features=20, + n_classes=2, + n_informative=10, + random_state=random_state +) + +X_binary_train, X_binary_test, y_binary_train, y_binary_test = train_test_split( + X_binary, y_binary, test_size=0.2, random_state=random_state +) + +X_binary_proper, X_binary_cal, y_binary_proper, y_binary_cal = train_test_split( + X_binary_train, y_binary_train, test_size=0.3, random_state=random_state +) + +# Multi-class classification dataset +X_multi, y_multi = make_classification( + n_samples=10000, + n_features=20, + n_classes=3, + n_informative=10, + random_state=random_state +) + +X_multi_train, X_multi_test, y_multi_train, y_multi_test = train_test_split( + X_multi, y_multi, test_size=0.2, random_state=random_state +) + +X_multi_proper, X_multi_cal, y_multi_proper, y_multi_cal = train_test_split( + X_multi_train, y_multi_train, test_size=0.3, random_state=random_state +) + + +# ============================================================================ +# Basic Initialization Tests +# ============================================================================ + +def test_initialized() -> None: + """Test that initialization does not crash.""" + VennAbersCalibrator() + + +def test_default_parameters() -> None: + """Test default values of input parameters.""" + va_cal = VennAbersCalibrator() + assert va_cal.estimator is None + assert va_cal.cv is None + assert va_cal.inductive is True + assert va_cal.n_splits is None + assert va_cal.cal_size is None + assert va_cal.train_proper_size is None + assert va_cal.random_state is None + assert va_cal.shuffle is True + assert va_cal.stratify is None + assert va_cal.precision is None + + +def test_default_fit_params() -> None: + """Test default sample weights and other parameters.""" + va_cal = VennAbersCalibrator() + assert ( + signature(va_cal.fit).parameters["sample_weight"].default + is None + ) + assert ( + signature(va_cal.fit).parameters["calib_size"].default + == 0.33 + ) + assert ( + signature(va_cal.fit).parameters["random_state"].default + is None + ) + assert ( + signature(va_cal.fit).parameters["shuffle"].default + is True + ) + assert ( + signature(va_cal.fit).parameters["stratify"].default + is None + ) + + +# ============================================================================ +# CV Parameter Tests +# ============================================================================ + +@pytest.mark.parametrize("cv", ["prefit", None]) +def test_valid_cv_argument(cv: Optional[str]) -> None: + """Test that valid cv methods work.""" + if cv == "prefit": + est = GaussianNB().fit(X_binary_train, y_binary_train) + va_cal = VennAbersCalibrator(estimator=est, cv=cv) + va_cal.fit(X_binary_cal, y_binary_cal) + else: + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + cv=cv, + inductive=True + ) + va_cal.fit(X_binary_train, y_binary_train) + + +@pytest.mark.parametrize("cv", ["split", "invalid", "cross"]) +def test_invalid_cv_argument(cv: str) -> None: + """Test that invalid cv methods raise ValueError.""" + with pytest.raises( + ValueError, + match=r".*Invalid cv argument*", + ): + va_cal = VennAbersCalibrator(estimator=GaussianNB(), cv=cv) + va_cal.fit(X_binary_train, y_binary_train) + + +def test_prefit_unfitted_estimator_raises_error() -> None: + """ + Test that VennAbersCalibrator in 'prefit' mode raises a ValueError + if the estimator is not fitted. + """ + clf = GaussianNB() # Unfitted estimator + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + with pytest.raises( + ValueError, + match=r".*For cv='prefit', the estimator must be already fitted*" + ): + va_cal.fit(X_binary_cal, y_binary_cal) + + +def test_prefit_requires_estimator() -> None: + """Test that prefit mode requires a fitted estimator.""" + va_cal = VennAbersCalibrator(cv="prefit") + with pytest.raises( + ValueError, + match=r".*an estimator must be provided*" + ): + va_cal.fit(X_binary_train, y_binary_train) + + +# ============================================================================ +# Inductive vs Cross Validation Tests +# ============================================================================ + +def test_inductive_mode_binary() -> None: + """Test Inductive Venn-ABERS (IVAP) for binary classification.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_inductive_mode_multiclass() -> None: + """Test Inductive Venn-ABERS (IVAP) for multi-class classification.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_cross_validation_mode_binary() -> None: + """Test Cross Venn-ABERS (CVAP) for binary classification.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_cross_validation_mode_multiclass() -> None: + """Test Cross Venn-ABERS (CVAP) for multi-class classification.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_cross_validation_requires_n_splits() -> None: + """Test that CVAP requires n_splits parameter.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=None + ) + with pytest.raises( + ValueError, + match=r".*For Cross Venn-ABERS please provide n_splits*" + ): + va_cal.fit(X_binary_train, y_binary_train) + + +def test_cross_validation_with_shuffle() -> None: + """Test Cross Venn-ABERS with shuffle parameter.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5, + shuffle=True, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +def test_cross_validation_with_stratify() -> None: + """Test Cross Venn-ABERS with stratify parameter.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5, + stratify=y_binary_train, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +# ============================================================================ +# Prefit Mode Tests +# ============================================================================ + +def test_prefit_mode_binary() -> None: + """Test prefit mode for binary classification.""" + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_prefit_mode_multiclass() -> None: + """Test prefit mode for multi-class classification.""" + clf = GaussianNB() + clf.fit(X_multi_proper, y_multi_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_multi_cal, y_multi_cal) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_prefit_inductive_consistency() -> None: + """Test that prefit and inductive modes give similar results.""" + # Fit estimator on proper training set + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + # Prefit mode + va_cal_prefit = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal_prefit.fit(X_binary_cal, y_binary_cal) + probs_prefit = va_cal_prefit.predict_proba(X_binary_test) + + # Inductive mode with same split + va_cal_inductive = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=len(X_binary_cal) / len(X_binary_train), + random_state=random_state + ) + # Combine proper and cal sets + X_combined = np.vstack([X_binary_proper, X_binary_cal]) + y_combined = np.hstack([y_binary_proper, y_binary_cal]) + va_cal_inductive.fit(X_combined, y_combined) + probs_inductive = va_cal_inductive.predict_proba(X_binary_test) + + # Results should be similar (not exact due to different random splits) + assert probs_prefit.shape == probs_inductive.shape + + +# ============================================================================ +# Estimator Tests +# ============================================================================ + + +@pytest.mark.parametrize("estimator", ESTIMATORS) +def test_different_estimators_binary(estimator: ClassifierMixin) -> None: + """Test VennAbersCalibrator with different base estimators (binary).""" + va_cal = VennAbersCalibrator( + estimator=estimator, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +@pytest.mark.parametrize("estimator", ESTIMATORS) +def test_different_estimators_multiclass(estimator: ClassifierMixin) -> None: + """Test VennAbersCalibrator with different base estimators (multi-class).""" + va_cal = VennAbersCalibrator( + estimator=estimator, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_estimator_none_raises_error() -> None: + """Test that None estimator raises ValueError.""" + va_cal = VennAbersCalibrator(estimator=None) + with pytest.raises( + ValueError, + match=r".*an estimator must be provided*" + ): + va_cal.fit(X_binary_train, y_binary_train) + + +def test_predict_method_multiclass() -> None: + """Test predict method for multi-class classification.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + predictions = va_cal.predict(X_multi_test) + + assert predictions.shape == (len(X_multi_test),) + assert va_cal.classes_ is not None + assert np.all(np.isin(predictions, va_cal.classes_)) + + +def test_predict_proba_consistency() -> None: + """Test that predict is consistent with predict_proba.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + predictions = va_cal.predict(X_binary_test) + probs = va_cal.predict_proba(X_binary_test) + + assert va_cal.classes_ is not None + predictions_from_probs = va_cal.classes_[np.argmax(probs, axis=1)] + + np.testing.assert_array_equal(predictions, predictions_from_probs) + + +def test_predict_proba_shape_binary() -> None: + """Test that predict_proba returns correct shape for binary classification.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), va_cal.n_classes_) + assert va_cal.n_classes_ == 2 + + +def test_predict_proba_shape_multiclass() -> None: + """Test that predict_proba returns correct shape for multi-class classification.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), va_cal.n_classes_) + assert va_cal.n_classes_ == 3 + + +def test_gradient_boosting_with_early_stopping() -> None: + """Test VennAbersCalibrator with GradientBoosting and early stopping.""" + gb = GradientBoostingClassifier( + n_estimators=100, + random_state=random_state + ) + + va_cal = VennAbersCalibrator( + estimator=gb, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +# ============================================================================ +# Sample Weight Tests +# ============================================================================ + + +def test_sample_weights_none() -> None: + """Test that sample_weight=None works correctly.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train, sample_weight=None) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +def test_sample_weights_constant() -> None: + """Test that constant sample weights give same results as None.""" + n_samples = len(X_binary_train) + weighted_estimator = GaussianNB().set_fit_request(sample_weight=True) + + va_cal_none = VennAbersCalibrator( + estimator=weighted_estimator, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal_none.fit(X_binary_train, y_binary_train, sample_weight=None) + + va_cal_ones = VennAbersCalibrator( + estimator=weighted_estimator, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal_ones.fit( + X_binary_train, y_binary_train, + sample_weight=np.ones(n_samples) + ) + + va_cal_fives = VennAbersCalibrator( + estimator=weighted_estimator, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal_fives.fit( + X_binary_train, y_binary_train, + sample_weight=np.ones(n_samples) * 5 + ) + + probs_none = va_cal_none.predict_proba(X_binary_test) + probs_ones = va_cal_ones.predict_proba(X_binary_test) + probs_fives = va_cal_fives.predict_proba(X_binary_test) + + np.testing.assert_allclose(probs_none, probs_ones, rtol=1e-2, atol=1e-2) + np.testing.assert_allclose(probs_none, probs_fives, rtol=1e-2, atol=1e-2) + + +def test_sample_weights_variable() -> None: + """Test that variable sample weights affect the results.""" + n_samples = len(X_binary_train) + + va_cal_uniform = VennAbersCalibrator( + estimator=RandomForestClassifier(random_state=random_state), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal_uniform.fit(X_binary_train, y_binary_train, sample_weight=None) + + # Create non-uniform weights + sample_weights = np.random.RandomState(random_state).uniform( + 0.1, 2.0, size=n_samples + ) + + estimator_weighted = RandomForestClassifier( + random_state=random_state + ).set_fit_request(sample_weight=True) + + va_cal_weighted = VennAbersCalibrator( + estimator=estimator_weighted, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal_weighted.fit( + X_binary_train, y_binary_train, + sample_weight=sample_weights + ) + + probs_uniform = va_cal_uniform.predict_proba(X_binary_test) + probs_weighted = va_cal_weighted.predict_proba(X_binary_test) + + # Results should be different with non-uniform weights + assert not np.allclose(probs_uniform, probs_weighted) + + +# ============================================================================ +# Random State and Reproducibility Tests +# ============================================================================ + +def test_random_state_reproducibility() -> None: + """Test that random_state ensures reproducible results.""" + va_cal1 = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=42 + ) + va_cal1.fit(X_binary_train, y_binary_train) + probs1 = va_cal1.predict_proba(X_binary_test) + + va_cal2 = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=42 + ) + va_cal2.fit(X_binary_train, y_binary_train) + probs2 = va_cal2.predict_proba(X_binary_test) + + np.testing.assert_array_equal(probs1, probs2) + + +def test_random_state_in_fit_overrides() -> None: + """Test that random_state in fit() overrides constructor parameter.""" + va_cal1 = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=42 + ) + va_cal1.fit(X_binary_train, y_binary_train, random_state=123) + probs1 = va_cal1.predict_proba(X_binary_test) + + va_cal2 = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=999 # Different from fit + ) + va_cal2.fit(X_binary_train, y_binary_train, random_state=123) + probs2 = va_cal2.predict_proba(X_binary_test) + + np.testing.assert_array_equal(probs1, probs2) + + +def test_different_random_states_give_different_results() -> None: + """Test that different random states give different results.""" + va_cal1 = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=42 + ) + va_cal1.fit(X_binary_train, y_binary_train) + probs1 = va_cal1.predict_proba(X_binary_test) + + va_cal2 = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=123 + ) + va_cal2.fit(X_binary_train, y_binary_train) + probs2 = va_cal2.predict_proba(X_binary_test) + + # Results should be different with different random states + assert not np.array_equal(probs1, probs2) + + +# ============================================================================ +# Shuffle and Stratify Tests +# ============================================================================ + +def test_shuffle_parameter() -> None: + """Test that shuffle parameter works correctly.""" + va_cal_shuffle = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + shuffle=True + ) + va_cal_shuffle.fit(X_binary_train, y_binary_train) + probs_shuffle = va_cal_shuffle.predict_proba(X_binary_test) + + va_cal_no_shuffle = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + shuffle=False + ) + va_cal_no_shuffle.fit(X_binary_train, y_binary_train) + probs_no_shuffle = va_cal_no_shuffle.predict_proba(X_binary_test) + + assert probs_shuffle.shape == probs_no_shuffle.shape + + +def test_shuffle_in_fit_overrides() -> None: + """Test that shuffle in fit() overrides constructor parameter.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + shuffle=False + ) + # Override with shuffle=True in fit + va_cal.fit(X_binary_train, y_binary_train, shuffle=True) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +def test_stratify_parameter() -> None: + """Test that stratify parameter works correctly.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + stratify=y_binary_train + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +def test_stratify_in_fit_overrides() -> None: + """Test that stratify in fit() overrides constructor parameter.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + stratify=None + ) + # Override with stratify in fit + va_cal.fit(X_binary_train, y_binary_train, stratify=y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +# ============================================================================ +# Calibration Size Tests +# ============================================================================ + +@pytest.mark.parametrize("cal_size", [0.2, 0.3, 0.4, 0.5]) +def test_different_calibration_sizes(cal_size: float) -> None: + """Test that different calibration sizes work correctly.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=cal_size, + random_state=random_state) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_cal_size_in_fit_overrides() -> None: + """Test that calib_size in fit() overrides constructor parameter.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.2, + random_state=random_state + ) + # Override with calib_size in fit + va_cal.fit(X_binary_train, y_binary_train, calib_size=0.4) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +def test_train_proper_size_parameter() -> None: + """Test that train_proper_size parameter works correctly.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + train_proper_size=0.6, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +# ============================================================================ +# N_splits Tests +# ============================================================================ + +@pytest.mark.parametrize("n_splits", [2, 3, 5, 10]) +def test_different_n_splits(n_splits: int) -> None: + """Test that different n_splits values work correctly.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=n_splits, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_n_splits_too_small_raises_error() -> None: + """Test that n_splits < 2 raises an error.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=1, + random_state=random_state + ) + with pytest.raises(ValueError): + va_cal.fit(X_binary_train, y_binary_train) + + +# ============================================================================ +# Attributes Tests +# ============================================================================ + +def test_fitted_attributes_inductive() -> None: + """Test that fitted attributes are set correctly for inductive mode.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + assert hasattr(va_cal, 'classes_') + assert hasattr(va_cal, 'n_classes_') + assert hasattr(va_cal, 'va_calibrator_') + assert va_cal.n_classes_ is not None + assert va_cal.classes_ is not None + assert va_cal.n_classes_ == 2 + assert len(va_cal.classes_) == 2 + + +def test_fitted_attributes_cross_validation() -> None: + """Test that fitted attributes are set correctly for cross validation mode.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + assert hasattr(va_cal, 'classes_') + assert hasattr(va_cal, 'n_classes_') + assert hasattr(va_cal, 'va_calibrator_') + assert va_cal.n_classes_ is not None + assert va_cal.classes_ is not None + assert va_cal.n_classes_ == 2 + assert len(va_cal.classes_) == 2 + + +def test_fitted_attributes_prefit() -> None: + """Test that fitted attributes are set correctly for prefit mode.""" + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + + assert hasattr(va_cal, 'classes_') + assert hasattr(va_cal, 'n_classes_') + assert hasattr(va_cal, 'single_estimator_') + assert va_cal.n_classes_ is not None + assert va_cal.classes_ is not None + assert va_cal.n_classes_ == 2 + assert len(va_cal.classes_) == 2 + + +# ============================================================================ +# Pipeline Compatibility Tests +# ============================================================================ + +def test_pipeline_compatibility() -> None: + """Test that VennAbersCalibrator works with sklearn pipelines.""" + X_df = pd.DataFrame( + { + "x_cat": ["A", "A", "B", "A", "A", "B"] * 10, + "x_num": [0, 1, 1, 4, np.nan, 5] * 10, + } + ) + y_series = pd.Series([0, 1, 0, 1, 0, 1] * 10) + + numeric_preprocessor = Pipeline( + [ + ("imputer", SimpleImputer(strategy="mean")), + ] + ) + categorical_preprocessor = Pipeline( + steps=[ + ("encoding", OneHotEncoder(handle_unknown="ignore")) + ] + ) + preprocessor = ColumnTransformer( + [ + ("cat", categorical_preprocessor, ["x_cat"]), + ("num", numeric_preprocessor, ["x_num"]) + ] + ) + pipe = make_pipeline(preprocessor, LogisticRegression(random_state=random_state)) + pipe.fit(X_df, y_series) + + va_cal = VennAbersCalibrator( + estimator=pipe, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_df, y_series) + predictions = va_cal.predict(X_df) + probs = va_cal.predict_proba(X_df) + + assert predictions.shape == (len(y_series),) + assert probs.shape == (len(y_series), 2) + + +def test_pipeline_prefit_mode() -> None: + """Test that VennAbersCalibrator works with prefit pipelines.""" + X_df = pd.DataFrame( + { + "x_cat": ["A", "A", "B", "A", "A", "B"] * 10, + "x_num": [0, 1, 1, 4, np.nan, 5] * 10, + } + ) + y_series = pd.Series([0, 1, 0, 1, 0, 1] * 10) + + numeric_preprocessor = Pipeline( + [ + ("imputer", SimpleImputer(strategy="mean")), + ] + ) + categorical_preprocessor = Pipeline( + steps=[ + ("encoding", OneHotEncoder(handle_unknown="ignore")) + ] + ) + preprocessor = ColumnTransformer( + [ + ("cat", categorical_preprocessor, ["x_cat"]), + ("num", numeric_preprocessor, ["x_num"]) + ] + ) + pipe = make_pipeline(preprocessor, LogisticRegression(random_state=random_state)) + pipe.fit(X_df, y_series) + + va_cal = VennAbersCalibrator(estimator=pipe, cv="prefit") + va_cal.fit(X_df, y_series) + predictions = va_cal.predict(X_df) + probs = va_cal.predict_proba(X_df) + + assert predictions.shape == (len(y_series),) + assert probs.shape == (len(y_series), 2) + + +def test_with_pipeline() -> None: + """Test VennAbersCalibrator with sklearn Pipeline.""" + from sklearn.preprocessing import StandardScaler + + pipeline = make_pipeline( + StandardScaler(), + GaussianNB() + ) + + va_cal = VennAbersCalibrator( + estimator=pipeline, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_with_column_transformer() -> None: + """Test VennAbersCalibrator with ColumnTransformer.""" + # Create a mixed dataset + X_mixed = np.column_stack([ + X_binary_train, + np.random.choice(['A', 'B', 'C'], size=len(X_binary_train)) + ]) + + preprocessor = ColumnTransformer( + transformers=[ + ('num', SimpleImputer(strategy='mean'), + list(range(X_binary_train.shape[1]))), + ('cat', OneHotEncoder(handle_unknown='ignore'), + [X_binary_train.shape[1]]) + ] + ) + + pipeline = Pipeline([ + ('preprocessor', preprocessor), + ('classifier', GaussianNB()) + ]) + + va_cal = VennAbersCalibrator( + estimator=pipeline, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + + X_test_mixed = np.column_stack([ + X_binary_test, + np.random.choice(['A', 'B', 'C'], size=len(X_binary_test)) + ]) + + va_cal.fit(X_mixed, y_binary_train) + probs = va_cal.predict_proba(X_test_mixed) + + assert probs.shape == (len(X_binary_test), 2) + +# ============================================================================ +# Multiclass Strategy Tests +# ============================================================================ + + +def test_multiclass_one_vs_one_strategy() -> None: + """Test multiclass with one_vs_one strategy.""" + # Create calibrator with explicit one_vs_one + clf = GaussianNB() + clf.fit(X_multi_proper, y_multi_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_multi_cal, y_multi_cal) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) + + +# ============================================================================ +# Fit Parameters Passing Tests +# ============================================================================ + +# def test_fit_parameters_passing() -> None: +# """ +# Test passing fit parameters, here early stopping at iteration 3. +# Checks that underlying GradientBoosting estimators have used 3 iterations +# only during boosting, instead of default value for n_estimators (=100). +# """ +# gb = GradientBoostingClassifier(random_state=random_state) + +# va_cal = VennAbersCalibrator( +# estimator=gb, +# inductive=True, +# cal_size=0.3, +# random_state=random_state +# ) + +# va_cal.fit(X_binary_train, y_binary_train) + +# # For inductive mode, check the underlying estimator +# if hasattr(va_cal.va_calibrator_, 'estimator_'): +# assert va_cal.va_calibrator_ is not None +# assert va_cal.va_calibrator_.estimator_.estimators_.shape[0] == 3 + +# ============================================================================ +# Check Fitted Tests +# ============================================================================ + +def test_check_is_fitted_after_fit() -> None: + """Test that check_is_fitted passes after fitting.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + # Should not raise an error + check_is_fitted(va_cal) + +# ============================================================================ +# Edge Cases and Error Handling Tests +# ============================================================================ + + +def test_empty_dataset_raises_error() -> None: + """Test that empty dataset raises an error.""" + X_empty = np.array([]).reshape(0, 20) + y_empty = np.array([]) + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + with pytest.raises(ValueError): + va_cal.fit(X_empty, y_empty) + + +def test_single_class_raises_error() -> None: + """Test that single class dataset raises an error.""" + X_single = X_binary_train[:10] + y_single = np.zeros(10) # All same class + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + with pytest.raises(ValueError): + va_cal.fit(X_single, y_single) + + +def test_mismatched_X_y_length_raises_error() -> None: + """Test that mismatched X and y lengths raise an error.""" + X_mismatch = X_binary_train[:50] + y_mismatch = y_binary_train[:40] + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + with pytest.raises(ValueError): + va_cal.fit(X_mismatch, y_mismatch) + + +def test_predict_before_fit_raises_error() -> None: + """Test that calling predict before fit raises an error.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + with pytest.raises(Exception): # NotFittedError or AttributeError + va_cal.predict(X_binary_test) + + +def test_predict_proba_before_fit_raises_error() -> None: + """Test that calling predict_proba before fit raises an error.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + with pytest.raises(Exception): # NotFittedError or AttributeError + va_cal.predict_proba(X_binary_test) + + +def test_invalid_cal_size_raises_error() -> None: + """Test that invalid cal_size values raise an error.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=1.5, # Invalid: > 1.0 + random_state=random_state + ) + with pytest.raises(ValueError): + va_cal.fit(X_binary_train, y_binary_train) + + +def test_negative_cal_size_raises_error() -> None: + """Test that negative cal_size raises an error.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=-0.1, + random_state=random_state + ) + with pytest.raises(ValueError): + va_cal.fit(X_binary_train, y_binary_train) + + +def test_empty_calibration_set_raises_error() -> None: + """Test that empty calibration set raises an error.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.99, # Very large cal_size leaves almost no training data + random_state=random_state + ) + # This should work but with a very small training set + try: + va_cal.fit(X_binary_train[:10], y_binary_train[:10]) + except ValueError: + # Expected if the split is invalid + pass + + +def test_very_small_dataset() -> None: + """Test with a very small dataset.""" + X_small = X_binary_train[:20] + y_small = y_binary_train[:20] + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_small, y_small) + probs = va_cal.predict_proba(X_binary_test[:5]) + + assert probs.shape == (5, 2) + + +# ============================================================================ +# Calibration Quality Tests +# ============================================================================ + + +def test_calibration_improves_probabilities() -> None: + """Test that Venn-ABERS calibration improves probability estimates.""" + # Train uncalibrated model + clf = RandomForestClassifier(random_state=random_state) + clf.fit(X_binary_proper, y_binary_proper) + uncalibrated_probs = clf.predict_proba(X_binary_test) + + # Train calibrated model + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + calibrated_probs = va_cal.predict_proba(X_binary_test) + + # Both should have valid probability distributions + assert np.allclose(uncalibrated_probs.sum(axis=1), 1.0) + assert np.allclose(calibrated_probs.sum(axis=1), 1.0) + + # Calibrated probabilities should be different + assert not np.allclose(uncalibrated_probs, calibrated_probs) + + +def test_probabilities_sum_to_one() -> None: + """Test that predicted probabilities sum to 1 for all samples.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + # Check that probabilities sum to 1 for each sample + prob_sums = probs.sum(axis=1) + np.testing.assert_allclose(prob_sums, np.ones(len(X_binary_test)), rtol=1e-5) + + +def test_probabilities_in_valid_range() -> None: + """Test that all predicted probabilities are in [0, 1].""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert np.all(probs >= 0) + assert np.all(probs <= 1) + + +def test_multiclass_probabilities_sum_to_one() -> None: + """Test that multi-class predicted probabilities sum to 1.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + prob_sums = probs.sum(axis=1) + np.testing.assert_allclose(prob_sums, np.ones(len(X_multi_test)), rtol=1e-5) + + +def test_multiclass_probabilities_in_valid_range() -> None: + """Test that all multi-class predicted probabilities are in [0, 1].""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + assert np.all(probs >= 0) + assert np.all(probs <= 1) + + +# ============================================================================ +# Comparison Tests Between Modes +# ============================================================================ + +def test_inductive_vs_cross_validation_different_results() -> None: + """Test that inductive and cross validation modes give different results.""" + va_cal_inductive = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal_inductive.fit(X_binary_train, y_binary_train) + probs_inductive = va_cal_inductive.predict_proba(X_binary_test) + + va_cal_cv = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5, + random_state=random_state + ) + va_cal_cv.fit(X_binary_train, y_binary_train) + probs_cv = va_cal_cv.predict_proba(X_binary_test) + + # Results should be different between modes + assert not np.allclose(probs_inductive, probs_cv) + + +def test_all_modes_produce_valid_probabilities() -> None: + """Test that all calibration modes produce valid probability distributions.""" + modes: List[Tuple[str, Dict[str, Any]]] = [ + ("inductive", {"inductive": True, "cal_size": 0.3}), + ("cross_val", {"inductive": False, "n_splits": 5}), + ] + + for mode_name, mode_params in modes: + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + random_state=random_state, + **mode_params + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + # Check valid probabilities + assert np.all(probs >= 0), f"Mode {mode_name} produced negative probabilities" + assert np.all(probs <= 1), f"Mode {mode_name} produced probabilities > 1" + assert np.allclose( + probs.sum(axis=1), 1.0 + ), f"Mode {mode_name} probabilities don't sum to 1" + + +# ============================================================================ +# Special Cases Tests +# ============================================================================ + +def test_perfect_predictions_no_calibration_needed() -> None: + """Test behavior when base estimator already makes perfect predictions.""" + # Create a simple linearly separable dataset + from sklearn.datasets import make_blobs + X_perfect, y_perfect = make_blobs( + n_samples=100, + n_features=2, + centers=2, + cluster_std=0.5, + random_state=random_state + ) + + X_train_p, X_test_p, y_train_p, y_test_p = train_test_split( + X_perfect, y_perfect, test_size=0.2, random_state=random_state + ) + + va_cal = VennAbersCalibrator( + estimator=LogisticRegression(random_state=random_state), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_train_p, y_train_p) + probs = va_cal.predict_proba(X_test_p) + predictions = va_cal.predict(X_test_p) + + # Should still produce valid probabilities + assert probs.shape == (len(X_test_p), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + # Predictions should be accurate + accuracy = np.mean(predictions == y_test_p) + assert accuracy > 0.9 # Should be very accurate for linearly separable data + + +def test_imbalanced_dataset() -> None: + """Test VennAbersCalibrator with highly imbalanced dataset.""" + # Create imbalanced dataset (90% class 0, 10% class 1) + X_imb, y_imb = make_classification( + n_samples=200, + n_features=20, + n_classes=2, + weights=[0.9, 0.1], + random_state=random_state + ) + + X_train_imb, X_test_imb, y_train_imb, y_test_imb = train_test_split( + X_imb, y_imb, test_size=0.2, random_state=random_state, stratify=y_imb + ) + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + stratify=y_train_imb + ) + va_cal.fit(X_train_imb, y_train_imb) + probs = va_cal.predict_proba(X_test_imb) + + # Should still produce valid probabilities + assert probs.shape == (len(X_test_imb), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_many_classes() -> None: + """Test VennAbersCalibrator with many classes.""" + # Create dataset with 10 classes + X_many, y_many = make_classification( + n_samples=500, + n_features=20, + n_classes=10, + n_informative=15, + random_state=random_state + ) + + X_train_many, X_test_many, y_train_many, y_test_many = train_test_split( + X_many, y_many, test_size=0.2, random_state=random_state + ) + + va_cal = VennAbersCalibrator( + estimator=RandomForestClassifier(random_state=random_state), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_train_many, y_train_many) + probs = va_cal.predict_proba(X_test_many) + + assert probs.shape == (len(X_test_many), 10) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_small_calibration_set() -> None: + """Test behavior with very small calibration set.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.1, # Very small calibration set + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + # Should still work, though calibration quality may be lower + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_large_calibration_set() -> None: + """Test behavior with very large calibration set.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.8, # Very large calibration set + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + # Should still work, though training set is small + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +# ============================================================================ +# Consistency Tests +# ============================================================================ + +def test_multiple_fits_same_data() -> None: + """Test that fitting multiple times with same data gives same results.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + + va_cal.fit(X_binary_train, y_binary_train) + probs1 = va_cal.predict_proba(X_binary_test) + + va_cal.fit(X_binary_train, y_binary_train) + probs2 = va_cal.predict_proba(X_binary_test) + + np.testing.assert_array_equal(probs1, probs2) + + +def test_predict_single_sample() -> None: + """Test prediction on a single sample.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + single_sample = X_binary_test[0:1] + probs = va_cal.predict_proba(single_sample) + pred = va_cal.predict(single_sample) + + assert probs.shape == (1, 2) + assert pred.shape == (1,) + assert np.allclose(probs.sum(), 1.0) + + +def test_predict_multiple_times_same_result() -> None: + """Test that multiple predictions on same data give same results.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + probs1 = va_cal.predict_proba(X_binary_test) + probs2 = va_cal.predict_proba(X_binary_test) + + np.testing.assert_array_equal(probs1, probs2) + + +# ============================================================================ +# Data Type Tests +# ============================================================================ + +def test_pandas_dataframe_input() -> None: + """Test that VennAbersCalibrator works with pandas DataFrames.""" + X_df = pd.DataFrame(X_binary_train) + y_series = pd.Series(y_binary_train) + X_test_df = pd.DataFrame(X_binary_test) + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_df, y_series) + probs = va_cal.predict_proba(X_test_df) + predictions = va_cal.predict(X_test_df) + + assert probs.shape == (len(X_test_df), 2) + assert predictions.shape == (len(X_test_df),) + + +def test_numpy_array_input() -> None: + """Test that VennAbersCalibrator works with numpy arrays.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + predictions = va_cal.predict(X_binary_test) + + assert isinstance(probs, np.ndarray) + assert isinstance(predictions, np.ndarray) + + +def test_mixed_input_types() -> None: + """Test with mixed input types (DataFrame for X, array for y).""" + X_df = pd.DataFrame(X_binary_train) + y_array = np.array(y_binary_train) + X_test_df = pd.DataFrame(X_binary_test) + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_df, y_array) + probs = va_cal.predict_proba(X_test_df) + + assert probs.shape == (len(X_test_df), 2) + + +def test_with_pandas_dataframe() -> None: + """Test VennAbersCalibrator with pandas DataFrame.""" + X_train_df = pd.DataFrame(X_binary_train) + X_test_df = pd.DataFrame(X_binary_test) + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_train_df, y_binary_train) + probs = va_cal.predict_proba(X_test_df) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_with_pandas_series() -> None: + """Test VennAbersCalibrator with pandas Series for y.""" + y_train_series = pd.Series(y_binary_train) + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_train_series) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +# ============================================================================ +# Integration Tests +# ============================================================================ + +def test_integration_with_cross_validation() -> None: + """Test integration with sklearn's cross-validation utilities.""" + from sklearn.model_selection import cross_val_score + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + + # This should work with cross_val_score + scores = cross_val_score( + va_cal, X_binary, y_binary, cv=3, scoring='accuracy' + ) + + assert len(scores) == 3 + assert np.all(scores >= 0) and np.all(scores <= 1) + + +def test_integration_with_grid_search() -> None: + """Test integration with sklearn's GridSearchCV.""" + from sklearn.model_selection import GridSearchCV + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + random_state=random_state + ) + + param_grid = { + 'cal_size': [0.2, 0.3, 0.4], + } + + grid_search = GridSearchCV( + va_cal, param_grid, cv=3, scoring='accuracy' + ) + grid_search.fit(X_binary_train, y_binary_train) + + assert hasattr(grid_search, 'best_params_') + assert 'cal_size' in grid_search.best_params_ + + +def test_clone_estimator() -> None: + """Test that VennAbersCalibrator can be cloned.""" + from sklearn.base import clone + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + va_cal_clone = clone(va_cal) + + is_fitted = True + try: + check_is_fitted(va_cal_clone.estimator) + except NotFittedError: + is_fitted = False + + # Clone should have same parameters but not be fitted + assert va_cal_clone.cal_size == va_cal.cal_size + assert va_cal_clone.inductive == va_cal.inductive + assert is_fitted is False + + +# ============================================================================ +# Performance and Scalability Tests +# ============================================================================ + +def test_large_dataset_performance() -> None: + """Test performance on a larger dataset.""" + X_large, y_large = make_classification( + n_samples=5000, + n_features=50, + n_classes=2, + random_state=random_state + ) + + X_train_large, X_test_large, y_train_large, y_test_large = train_test_split( + X_large, y_large, test_size=0.2, random_state=random_state + ) + + va_cal = VennAbersCalibrator( + estimator=RandomForestClassifier( + n_estimators=10, random_state=random_state + ), + inductive=True, + cal_size=0.3, + random_state=random_state, + precision=2 # Use precision for faster computation + ) + + import time + start = time.time() + va_cal.fit(X_train_large, y_train_large) + va_cal.predict_proba(X_test_large) + elapsed = time.time() - start + + # Should complete in reasonable time (< 60 seconds) + assert elapsed < 60 + + +def test_high_dimensional_data() -> None: + """Test with high-dimensional data.""" + X_high_dim, y_high_dim = make_classification( + n_samples=200, + n_features=100, + n_informative=50, + n_classes=2, + random_state=random_state + ) + + X_train_hd, X_test_hd, y_train_hd, y_test_hd = train_test_split( + X_high_dim, y_high_dim, test_size=0.2, random_state=random_state + ) + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_train_hd, y_train_hd) + probs = va_cal.predict_proba(X_test_hd) + + assert probs.shape == (len(X_test_hd), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +# ============================================================================ +# Documentation and Examples Tests +# ============================================================================ + +def test_basic_example_from_docstring() -> None: + """Test the basic example from the class docstring.""" + from sklearn.datasets import make_classification + from sklearn.model_selection import train_test_split + from sklearn.naive_bayes import GaussianNB + + X, y = make_classification(n_samples=1000, n_classes=2, n_informative=10) + X_train, X_test, y_train, y_test = train_test_split(X, y) + + clf = GaussianNB() + va_cal = VennAbersCalibrator(estimator=clf, inductive=True, cal_size=0.3) + va_cal.fit(X_train, y_train) + + p_prime = va_cal.predict_proba(X_test) + + assert p_prime.shape == (len(X_test), 2) + assert np.allclose(p_prime.sum(axis=1), 1.0) + + +def test_prefit_example() -> None: + """Test prefit example workflow.""" + X_train_proper, X_cal, y_train_proper, y_cal = train_test_split( + X_binary_train, y_binary_train, test_size=0.2, shuffle=False + ) + + clf = GaussianNB() + clf.fit(X_train_proper, y_train_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_cal, y_cal) + + p_prime = va_cal.predict_proba(X_binary_test) + + assert p_prime.shape == (len(X_binary_test), 2) + + +def test_cross_validation_example() -> None: + """Test cross-validation example workflow.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5 + ) + va_cal.fit(X_binary_train, y_binary_train) + + p_prime = va_cal.predict_proba(X_binary_test) + + assert p_prime.shape == (len(X_binary_test), 2) + + +# ============================================================================ +# Comparison with Other Calibration Methods Tests +# ============================================================================ + +def test_comparison_with_uncalibrated() -> None: + """Compare calibrated vs uncalibrated predictions.""" + # Uncalibrated + clf_uncal = RandomForestClassifier(random_state=random_state) + clf_uncal.fit(X_binary_train, y_binary_train) + probs_uncal = clf_uncal.predict_proba(X_binary_test) + + # Calibrated + va_cal = VennAbersCalibrator( + estimator=RandomForestClassifier(random_state=random_state), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs_cal = va_cal.predict_proba(X_binary_test) + + # Both should be valid probabilities + assert np.allclose(probs_uncal.sum(axis=1), 1.0) + assert np.allclose(probs_cal.sum(axis=1), 1.0) + + # Calibrated should be different from uncalibrated + assert not np.allclose(probs_uncal, probs_cal) + + +# ============================================================================ +# Regression Tests (ensure no breaking changes) +# ============================================================================ + +def test_backward_compatibility_basic_usage() -> None: + """Test that basic usage pattern remains compatible.""" + # This test ensures the most common usage pattern doesn't break + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + preds = va_cal.predict(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert preds.shape == (len(X_binary_test),) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_backward_compatibility_prefit() -> None: + """Test that prefit mode usage pattern remains compatible.""" + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +def test_backward_compatibility_cross_val() -> None: + """Test that cross-validation mode usage pattern remains compatible.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + +# ============================================================================ +# Edge Cases for Different Modes +# ============================================================================ + + +def test_prefit_with_unfitted_estimator_raises_error() -> None: + """Test that prefit mode with unfitted estimator raises an error.""" + clf = GaussianNB() # Not fitted + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + + with pytest.raises(ValueError, match=".*must be already fitted.*"): + va_cal.fit(X_binary_cal, y_binary_cal) + + +def test_cross_val_without_n_splits_raises_error() -> None: + """Test that cross-validation mode without n_splits raises an error.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=None # Missing n_splits + ) + + with pytest.raises(ValueError, match=".*please provide n_splits.*"): + va_cal.fit(X_binary_train, y_binary_train) + + +def test_inductive_with_very_small_dataset() -> None: + """Test inductive mode with very small dataset.""" + X_small = X_binary_train[:20] + y_small = y_binary_train[:20] + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + + # Should work but might have limited calibration quality + va_cal.fit(X_small, y_small) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + + +# ============================================================================ +# Attribute Access Tests +# ============================================================================ + +def test_classes_attribute() -> None: + """Test that classes_ attribute is correctly set.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + assert hasattr(va_cal, 'classes_') + assert va_cal.classes_ is not None + assert len(va_cal.classes_) == 2 + np.testing.assert_array_equal(va_cal.classes_, np.unique(y_binary_train)) + + +def test_n_classes_attribute() -> None: + """Test that n_classes_ attribute is correctly set.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + assert hasattr(va_cal, 'n_classes_') + assert va_cal.n_classes_ == 2 + + +def test_va_calibrator_attribute() -> None: + """Test that va_calibrator_ attribute is correctly set.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + assert hasattr(va_cal, 'va_calibrator_') + assert va_cal.va_calibrator_ is not None + + +def test_single_estimator_attribute_prefit() -> None: + """Test that single_estimator_ attribute is set in prefit mode.""" + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + + assert hasattr(va_cal, 'single_estimator_') + assert va_cal.single_estimator_ is not None + + +# ============================================================================ +# Multi-class Specific Tests +# ============================================================================ + +def test_multiclass_binary_calibration() -> None: + """Test that multi-class uses binary calibration for each class pair.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + # For 3 classes, should have 3 probability columns + assert probs.shape == (len(X_multi_test), 3) + + # Each row should sum to 1 + np.testing.assert_allclose(probs.sum(axis=1), 1.0, rtol=1e-5) + + +def test_multiclass_prefit_mode() -> None: + """Test multi-class calibration in prefit mode.""" + clf = RandomForestClassifier(random_state=random_state) + clf.fit(X_multi_proper, y_multi_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_multi_cal, y_multi_cal) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_multiclass_cross_validation_mode() -> None: + """Test multi-class calibration in cross-validation mode.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=5, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_multiclass_predictions_match_argmax() -> None: + """Test that multi-class predictions match argmax of probabilities.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + + probs = va_cal.predict_proba(X_multi_test) + preds = va_cal.predict(X_multi_test) + + # Predictions should match the class with highest probability + assert va_cal.classes_ is not None + expected_preds = va_cal.classes_[np.argmax(probs, axis=1)] + np.testing.assert_array_equal(preds, expected_preds) + + +def test_multiclass_with_different_estimators() -> None: + """Test multi-class calibration with different base estimators.""" + estimators = [ + GaussianNB(), + RandomForestClassifier(n_estimators=10, random_state=random_state), + LogisticRegression(random_state=random_state, max_iter=1000) + ] + + for estimator in estimators: + va_cal = VennAbersCalibrator( + estimator=estimator, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +# ============================================================================ +# Precision Parameter Tests +# ============================================================================ + +@pytest.mark.parametrize("precision", [None, 2, 4, 6]) +def test_precision_parameter(precision: Optional[int]) -> None: + """Test that precision parameter works correctly.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + precision=precision + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_precision_speeds_up_computation() -> None: + """Test that precision parameter reduces computation time.""" + import time + + # Without precision + va_cal_no_precision = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + precision=None + ) + start = time.time() + va_cal_no_precision.fit(X_binary_train, y_binary_train) + va_cal_no_precision.predict_proba(X_binary_test) + time_no_precision = time.time() - start + + # With precision + va_cal_with_precision = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + precision=2 + ) + start = time.time() + va_cal_with_precision.fit(X_binary_train, y_binary_train) + va_cal_with_precision.predict_proba(X_binary_test) + time_with_precision = time.time() - start + + # With precision should be faster or similar + # (may not always be faster for small datasets) + assert time_with_precision <= time_no_precision + + +@pytest.mark.parametrize("precision", [1, 2, 3, 4]) +def test_different_precision_values(precision: int) -> None: + """Test that different precision values work correctly.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + precision=precision + ) + va_cal.fit(X_binary_train, y_binary_train) + probs = va_cal.predict_proba(X_binary_test) + + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_precision_maintains_calibration_quality() -> None: + """Test that precision parameter maintains reasonable calibration quality.""" + va_cal_high_prec = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + precision=4 + ) + va_cal_high_prec.fit(X_binary_train, y_binary_train) + probs_high = va_cal_high_prec.predict_proba(X_binary_test) + + va_cal_low_prec = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state, + precision=2 + ) + va_cal_low_prec.fit(X_binary_train, y_binary_train) + probs_low = va_cal_low_prec.predict_proba(X_binary_test) + + # Both should be valid probabilities + assert np.allclose(probs_high.sum(axis=1), 1.0) + assert np.allclose(probs_low.sum(axis=1), 1.0) + + # They should be similar but not necessarily identical + assert probs_high.shape == probs_low.shape + + +def test_precision_parameter_multiclass() -> None: + """Test that precision parameter works correctly for multiclass.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + precision=6, + random_state=random_state + ) + va_cal.fit(X_multi_train, y_multi_train) + probs = va_cal.predict_proba(X_multi_test) + + assert probs.shape == (len(X_multi_test), 3) + assert np.allclose(probs.sum(axis=1), 1.0) +# ============================================================================ +# Error Message Quality Tests +# ============================================================================ + + +def test_error_message_for_missing_estimator() -> None: + """Test that missing estimator gives clear error message.""" + va_cal = VennAbersCalibrator(estimator=None) + + with pytest.raises(ValueError, match=".*estimator must be provided.*"): + va_cal.fit(X_binary_train, y_binary_train) + + +def test_error_message_for_invalid_cv() -> None: + """Test that invalid cv parameter gives clear error message.""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + cv="invalid_cv_option" + ) + + with pytest.raises(ValueError): + va_cal.fit(X_binary_train, y_binary_train) + + +# ============================================================================ +# Final Comprehensive Test +# ============================================================================ + +def test_venn_abers_cv_with_sample_weight() -> None: + """Test VennAbersCV with sample weights in cross-validation mode.""" + # Create sample weights - higher weights for some samples + sample_weight = np.ones(len(y_binary_train)) + sample_weight[:len(y_binary_train)//2] = 2.0 # Double weight for first half + weighted_estimator = GaussianNB().set_fit_request(sample_weight=True) + va_cal = VennAbersCalibrator( + estimator=weighted_estimator, + inductive=False, # Use cross-validation mode + n_splits=3, + random_state=random_state + ) + + # Fit with sample weights + va_cal.fit(X_binary_train, y_binary_train, sample_weight=sample_weight) + probs = va_cal.predict_proba(X_binary_test) + + # Should produce valid probabilities + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + # Fit without sample weights for comparison + va_cal_no_weight = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=3, + random_state=random_state + ) + va_cal_no_weight.fit(X_binary_train, y_binary_train) + probs_no_weight = va_cal_no_weight.predict_proba(X_binary_test) + + # Results should be different when using sample weights + with pytest.raises(AssertionError): + np.testing.assert_array_almost_equal(probs, probs_no_weight) + + +def test_venn_abers_cv_sample_weight_all_folds() -> None: + """Test that sample weights are properly used across all CV folds.""" + sample_weight = np.random.RandomState(42).uniform(0.5, 2.0, len(y_binary_train)) + weighted_estimator = GaussianNB().set_fit_request(sample_weight=True) + va_cal = VennAbersCalibrator( + estimator=weighted_estimator, + inductive=False, + n_splits=5, # Multiple folds to ensure all are tested + random_state=random_state + ) + + # Should not raise any errors + va_cal.fit(X_binary_train, y_binary_train, sample_weight=sample_weight) + probs = va_cal.predict_proba(X_binary_test) + + # Verify output validity + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + assert np.all((probs >= 0) & (probs <= 1)) + + +def test_comprehensive_workflow() -> None: + """Comprehensive test covering multiple aspects of VennAbersCalibrator.""" + # Test all three modes with binary classification + modes: List[Tuple[str, Dict[str, Any]]] = [ + ("inductive", {"inductive": True, "cal_size": 0.3}), + ("cross_val", {"inductive": False, "n_splits": 5}), + ] + + for mode_name, mode_params in modes: + # Binary classification + va_cal_binary = VennAbersCalibrator( + estimator=RandomForestClassifier( + n_estimators=10, random_state=random_state + ), + random_state=random_state, + **mode_params + ) + va_cal_binary.fit(X_binary_train, y_binary_train) + + probs_binary = va_cal_binary.predict_proba(X_binary_test) + preds_binary = va_cal_binary.predict(X_binary_test) + + # Validate binary results + assert probs_binary.shape == (len(X_binary_test), 2) + assert preds_binary.shape == (len(X_binary_test),) + assert np.allclose(probs_binary.sum(axis=1), 1.0) + assert np.all((probs_binary >= 0) & (probs_binary <= 1)) + + # Multi-class classification + va_cal_multi = VennAbersCalibrator( + estimator=RandomForestClassifier( + n_estimators=10, random_state=random_state + ), + random_state=random_state, + **mode_params + ) + va_cal_multi.fit(X_multi_train, y_multi_train) + + probs_multi = va_cal_multi.predict_proba(X_multi_test) + preds_multi = va_cal_multi.predict(X_multi_test) + + # Validate multi-class results + assert probs_multi.shape == (len(X_multi_test), 3) + assert preds_multi.shape == (len(X_multi_test),) + assert np.allclose(probs_multi.sum(axis=1), 1.0) + assert np.all((probs_multi >= 0) & (probs_multi <= 1)) + + # Test prefit mode separately + clf_binary = RandomForestClassifier( + n_estimators=10, random_state=random_state + ) + clf_binary.fit(X_binary_proper, y_binary_proper) + + va_cal_prefit = VennAbersCalibrator(estimator=clf_binary, cv="prefit") + va_cal_prefit.fit(X_binary_cal, y_binary_cal) + + probs_prefit = va_cal_prefit.predict_proba(X_binary_test) + assert probs_prefit.shape == (len(X_binary_test), 2) + assert np.allclose(probs_prefit.sum(axis=1), 1.0) + + +def test_predict_proba_prefitted_va_one_vs_all(): + """ + Test predict_proba_prefitted_va with one_vs_all strategy + to cover lines 345-368. + """ + # Generate multiclass classification data + X, y = make_classification( + n_samples=500, + n_classes=3, + n_informative=10, + n_redundant=0, + n_clusters_per_class=1, + random_state=42 + ) + + # Split into train, calibration, and test sets + X_train, X_temp, y_train, y_temp = train_test_split( + X, y, test_size=0.4, random_state=42 + ) + X_cal, X_test, y_cal, y_test = train_test_split( + X_temp, y_temp, test_size=0.5, random_state=42 + ) + + # Train a classifier + clf = GaussianNB() + clf.fit(X_train, y_train) + + # Get probability predictions + p_cal = clf.predict_proba(X_cal) + p_test = clf.predict_proba(X_test) + + # Test one_vs_all strategy + p_calibrated, p0p1 = predict_proba_prefitted_va( + p_cal, y_cal, p_test, precision=None, va_tpe='one_vs_all' + ) + + # Assertions + assert p_calibrated.shape == p_test.shape + assert np.allclose(p_calibrated.sum(axis=1), 1.0) + assert len(p0p1) == 3 # One for each class + assert all(p.shape == (len(p_test), 2) for p in p0p1) + + # Test with precision parameter + p_calibrated_prec, p0p1_prec = predict_proba_prefitted_va( + p_cal, y_cal, p_test, precision=3, va_tpe='one_vs_all' + ) + + assert p_calibrated_prec.shape == p_test.shape + assert np.allclose(p_calibrated_prec.sum(axis=1), 1.0) + + +def test_predict_proba_prefitted_va_one_vs_one(): + """ + Test predict_proba_prefitted_va with one_vs_one strategy + for comparison and completeness. + """ + # Generate multiclass classification data + X, y = make_classification( + n_samples=500, + n_classes=3, + n_informative=10, + n_redundant=0, + n_clusters_per_class=1, + random_state=42 + ) + + # Split into train, calibration, and test sets + X_train, X_temp, y_train, y_temp = train_test_split( + X, y, test_size=0.4, random_state=42 + ) + X_cal, X_test, y_cal, y_test = train_test_split( + X_temp, y_temp, test_size=0.5, random_state=42 + ) + + # Train a classifier + clf = GaussianNB() + clf.fit(X_train, y_train) + + # Get probability predictions + p_cal = clf.predict_proba(X_cal) + p_test = clf.predict_proba(X_test) + + # Test one_vs_one strategy + p_calibrated, p0p1 = predict_proba_prefitted_va( + p_cal, y_cal, p_test, precision=None, va_tpe='one_vs_one' + ) + + # Assertions + assert p_calibrated.shape == p_test.shape + assert np.allclose(p_calibrated.sum(axis=1), 1.0) + assert len(p0p1) == 3 # C(3,2) = 3 pairs + + +def test_predict_proba_prefitted_va_invalid_type(): + """ + Test that invalid va_tpe raises ValueError. + """ + # Generate simple data + X, y = make_classification(n_samples=100, n_classes=2, random_state=42) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) + + clf = GaussianNB() + clf.fit(X_train, y_train) + + p_cal = clf.predict_proba(X_train) + p_test = clf.predict_proba(X_test) + + with pytest.raises(ValueError, match="Invalid va_tpe"): + predict_proba_prefitted_va( + p_cal, y_train, p_test, va_tpe='invalid_type' + ) + + +def test_venn_abers_basic(): + """ + Test basic VennAbers functionality for binary classification. + """ + # Generate binary classification data + X, y = make_classification(n_samples=500, n_classes=2, random_state=42) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) + + # Further split training data + X_train_proper, X_cal, y_train_proper, y_cal = train_test_split( + X_train, y_train, test_size=0.2, random_state=42 + ) + + # Train classifier + clf = GaussianNB() + clf.fit(X_train_proper, y_train_proper) + + # Get probabilities + p_cal = clf.predict_proba(X_cal) + p_test = clf.predict_proba(X_test) + + # Apply Venn-ABERS calibration + va = VennAbers() + va.fit(p_cal, y_cal) + p_prime, p0_p1 = va.predict_proba(p_test) + + # Assertions + assert p_prime.shape == (len(X_test), 2) + assert p0_p1.shape == (len(X_test), 2) + assert np.allclose(p_prime.sum(axis=1), 1.0) + + # Test with precision + va_prec = VennAbers() + va_prec.fit(p_cal, y_cal, precision=3) + p_prime_prec, _ = va_prec.predict_proba(p_test) + assert p_prime_prec.shape == (len(X_test), 2) + + +def test_venn_abers_cv_brier_loss() -> None: + """Test VennAbersCV with Brier loss (non-log loss).""" + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=False, + n_splits=3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + # Use 'brier' loss to trigger the else branch + probs_brier = va_cal.predict_proba(X_binary_test, loss='brier') + + # Should produce valid probabilities + assert probs_brier.shape == (len(X_binary_test), 2) + assert np.allclose(probs_brier.sum(axis=1), 1.0) + assert np.all((probs_brier >= 0) & (probs_brier <= 1)) + + +def test_venn_abers_cv_p0_p1_output() -> None: + """Test VennAbersCV predict_proba with p0_p1_output=True.""" + from sklearn.naive_bayes import GaussianNB + from mapie._venn_abers import VennAbersCV + + # Create and fit VennAbersCV in inductive mode + va_cv = VennAbersCV( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cv.fit(X_binary_train, y_binary_train) + + # Call predict_proba with p0_p1_output=True to reach the target code + p_prime, p0_p1 = va_cv.predict_proba(X_binary_test, p0_p1_output=True) + + # Verify the outputs + assert p_prime.shape == (len(X_binary_test), 2) + assert p0_p1.shape == (len(X_binary_test), 2) # Should have p0 and p1 stacked + assert np.allclose(p_prime.sum(axis=1), 1.0) + assert np.all((p_prime >= 0) & (p_prime <= 1)) + assert np.all((p0_p1 >= 0) & (p0_p1 <= 1)) + + +def test_multiclass_cross_validation_requires_n_splits() -> None: + """Test that VennAbersMultiClass in CVAP mode requires n_splits parameter.""" + from mapie._venn_abers import VennAbersMultiClass + + va_multi = VennAbersMultiClass( + estimator=GaussianNB(), + inductive=False, + n_splits=None # Missing n_splits for cross-validation mode + ) + + with pytest.raises( + Exception, + match=r".*For Cross Venn ABERS please provide n_splits.*" + ): + va_multi.fit(X_multi_train, y_multi_train) + + +def test_inductive_missing_size_parameters_raises_error(): + """Test that inductive mode raises error + when both cal_size and train_proper_size are None. + """ + # Generate multi-class dataset + X, y = make_classification( + n_samples=100, + n_classes=3, + n_informative=10, + n_redundant=0, + random_state=42 + ) + + # Create VennAbersMultiClass with inductive=True but no size parameters + va_multi = VennAbersMultiClass( + estimator=GaussianNB(), + inductive=True, + cal_size=None, + train_proper_size=None, + random_state=42 + ) + + # Should raise Exception when fitting without size parameters + with pytest.raises( + Exception, + match="For Inductive Venn-ABERS please provide either calibration" + ): + va_multi.fit(X, y) + + +def test_multiclass_p0_p1_output() -> None: + """Test VennAbersMultiClass with p0_p1_output=True.""" + from mapie._venn_abers import VennAbersMultiClass + from sklearn.naive_bayes import GaussianNB + import numpy as np + + # Use the existing test data fixtures + random_state = 42 + np.random.seed(random_state) + + # Generate multiclass data + n_samples = 100 + n_features = 4 + n_classes = 3 + + X_train = np.random.randn(n_samples, n_features) + y_train = np.random.randint(0, n_classes, n_samples) + + X_test = np.random.randn(30, n_features) + + # Create and fit VennAbersMultiClass + estimator = GaussianNB() + va_multi = VennAbersMultiClass( + estimator=estimator, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + + va_multi.fit(X_train, y_train) + + # Test with p0_p1_output=True + p_prime, p0_p1_list = va_multi.predict_proba( + X_test, + loss='log', + p0_p1_output=True + ) + + # Verify p_prime shape and properties + assert p_prime.shape == (len(X_test), n_classes) + assert np.allclose(p_prime.sum(axis=1), 1.0) + assert np.all((p_prime >= 0) & (p_prime <= 1)) + + # Verify p0_p1_list structure + # For 3 classes, we should have C(3,2) = 3 pairwise comparisons + n_pairs = n_classes * (n_classes - 1) // 2 + assert len(p0_p1_list) == n_pairs + + # Verify each p0_p1 entry has correct shape + # Each entry should have shape (n_test_samples, 2*n_splits) for IVAP + for p0_p1 in p0_p1_list: + assert p0_p1.shape[0] == len(X_test) + assert p0_p1.shape[1] >= 2 # At least p0 and p1 for one split + + # Verify multiclass_probs and multiclass_p0p1 are populated + assert len(va_multi.multiclass_probs) == n_pairs + assert len(va_multi.multiclass_p0p1) == n_pairs + + # Verify each multiclass_probs entry is binary probabilities + for probs in va_multi.multiclass_probs: + assert probs.shape == (len(X_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_venn_abers_multiclass_p0_p1_output() -> None: + """Test VennAbersMultiClass.predict_proba with p0_p1_output=True.""" + + # Setup test data + random_state = 42 + np.random.seed(random_state) + + n_samples = 150 + n_features = 4 + n_classes = 3 + + X_train = np.random.randn(n_samples, n_features) + y_train = np.random.randint(0, n_classes, n_samples) + X_test = np.random.randn(30, n_features) + + # Test with inductive mode + estimator = GaussianNB() + va_multi = VennAbersMultiClass( + estimator=estimator, + inductive=True, + cal_size=0.3, + random_state=random_state + ) + + va_multi.fit(X_train, y_train) + + # Test with p0_p1_output=True + p_prime, p0_p1_list = va_multi.predict_proba( + X_test, + loss='log', + p0_p1_output=True + ) + + # Verify p_prime shape and properties + assert p_prime.shape == (len(X_test), n_classes) + assert np.allclose(p_prime.sum(axis=1), 1.0) + assert np.all((p_prime >= 0) & (p_prime <= 1)) + + # Verify p0_p1_list structure + # For 3 classes with one-vs-one, we should have C(3,2) = 3 pairwise comparisons + n_pairs = n_classes * (n_classes - 1) // 2 + assert len(p0_p1_list) == n_pairs + + # Verify each p0_p1 entry has correct shape + for p0_p1 in p0_p1_list: + assert p0_p1.shape[0] == len(X_test) + # For inductive mode with n_splits=1, should have 2 columns (p0 and p1) + assert p0_p1.shape[1] == 2 + assert np.all((p0_p1 >= 0) & (p0_p1 <= 1)) + + # Verify multiclass_p0p1 attribute is populated + assert len(va_multi.multiclass_p0p1) == n_pairs + assert va_multi.multiclass_p0p1 == p0_p1_list + + # Test with p0_p1_output=False (default behavior) + p_prime_only = va_multi.predict_proba(X_test, loss='log', p0_p1_output=False) + + # Verify it returns only p_prime + assert isinstance(p_prime_only, np.ndarray) + assert p_prime_only.shape == (len(X_test), n_classes) + assert np.allclose(p_prime_only.sum(axis=1), 1.0) + + # Test with cross-validation mode + va_multi_cv = VennAbersMultiClass( + estimator=GaussianNB(), + inductive=False, + n_splits=3, + random_state=random_state + ) + + va_multi_cv.fit(X_train, y_train) + + p_prime_cv, p0_p1_list_cv = va_multi_cv.predict_proba( + X_test, + loss='log', + p0_p1_output=True + ) + + # Verify CV mode results + assert p_prime_cv.shape == (len(X_test), n_classes) + assert len(p0_p1_list_cv) == n_pairs + + # For CV mode with n_splits=3, each p0_p1 should have 6 columns (2 * n_splits) + for p0_p1_cv in p0_p1_list_cv: + assert p0_p1_cv.shape[0] == len(X_test) + assert p0_p1_cv.shape[1] == 2 * 3 # 2 * n_splits + assert np.all((p0_p1_cv >= 0) & (p0_p1_cv <= 1)) + + # Test with Brier loss + p_prime_brier, p0_p1_brier = va_multi.predict_proba( + X_test, + loss='brier', + p0_p1_output=True + ) + + assert p_prime_brier.shape == (len(X_test), n_classes) + assert len(p0_p1_brier) == n_pairs + assert np.allclose(p_prime_brier.sum(axis=1), 1.0) + + +def test_prefit_predict_proba_without_single_estimator() -> None: + """ + Test that predict_proba raises RuntimeError when single_estimator_ + is None in prefit mode. + """ + + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + + # Manually set single_estimator_ to None to simulate the error condition + va_cal.single_estimator_ = None + + with pytest.raises( + RuntimeError, + match=r"single_estimator_ should not be None in prefit mode" + ): + va_cal.predict_proba(X_binary_test) + + +def test_prefit_predict_proba_without_n_classes() -> None: + """ + Test that predict_proba raises RuntimeError when n_classes_ + is None after fitting in prefit mode. + """ + + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + + # Manually set n_classes_ to None to simulate the error condition + va_cal.n_classes_ = None + + with pytest.raises( + RuntimeError, + match=r"n_classes_ should not be None after fitting" + ): + va_cal.predict_proba(X_binary_test) + + +def test_prefit_predict_proba_binary_without_va_calibrator() -> None: + """ + Test that predict_proba raises RuntimeError when va_calibrator_ + is None for binary classification in prefit mode. + """ + + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + + # Manually set va_calibrator_ to None to simulate the error condition + va_cal.va_calibrator_ = None + + with pytest.raises( + RuntimeError, + match=r"va_calibrator_ should not be None for binary classification" + ): + va_cal.predict_proba(X_binary_test) + + +def test_prefit_predict_proba_binary_with_loss_parameter() -> None: + """ + Test that predict_proba correctly uses loss parameter when available + in va_calibrator_.predict_proba for binary classification in prefit mode. + """ + + clf = GaussianNB() + clf.fit(X_binary_proper, y_binary_proper) + + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") + va_cal.fit(X_binary_cal, y_binary_cal) + + # Test with default loss='log' + probs_log = va_cal.predict_proba(X_binary_test, loss='log') + + # Test with loss='brier' + probs_brier = va_cal.predict_proba(X_binary_test, loss='brier') + + # Verify output shape and properties + assert probs_log.shape == (len(X_binary_test), 2) + assert probs_brier.shape == (len(X_binary_test), 2) + assert np.allclose(probs_log.sum(axis=1), 1.0) + assert np.allclose(probs_brier.sum(axis=1), 1.0) + + +def test_inductive_predict_proba_with_wrong_calibrator_type() -> None: + """ + Test that predict_proba raises RuntimeError when va_calibrator_ + is not a VennAbersMultiClass instance in inductive/cross-validation mode. + """ + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + # Manually set va_calibrator_ to wrong type + # (VennAbers instead of VennAbersMultiClass) + va_cal.va_calibrator_ = VennAbers() + + with pytest.raises( + RuntimeError, + match=r"va_calibrator_ should be VennAbersMultiClass instance in " + r"inductive/cross-validation mode" + ): + va_cal.predict_proba(X_binary_test) + + +def test_inductive_predict_proba_without_loss_parameter() -> None: + """ + Test that predict_proba works correctly when va_calibrator_.predict_proba + doesn't have a loss parameter in inductive/cross-validation mode. + """ + import inspect + + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + # Create a mock that inherits from VennAbersMultiClass + class MockVennAbersMultiClass(VennAbersMultiClass): + def predict_proba(self, X, p0_p1_output=False): + """Mock predict_proba without loss parameter.""" + probs = np.random.rand(len(X), 2) + probs = probs / probs.sum(axis=1, keepdims=True) + return probs + + # Replace with mock that doesn't have loss parameter + mock_calibrator = MockVennAbersMultiClass( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3 + ) + + # Verify the mock's predict_proba doesn't have 'loss' parameter + sig = inspect.signature(mock_calibrator.predict_proba) + assert 'loss' not in sig.parameters + + va_cal.va_calibrator_ = mock_calibrator + + # Call predict_proba - should use the else branch without loss parameter + probs = va_cal.predict_proba(X_binary_test) + + # Verify output shape + assert probs.shape == (len(X_binary_test), 2) + assert np.allclose(probs.sum(axis=1), 1.0) + + +def test_predict_without_n_classes() -> None: + """ + Test that predict raises RuntimeError when n_classes_ + is None after fitting. + """ + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + # Manually set n_classes_ to None to simulate the error condition + va_cal.n_classes_ = None + + with pytest.raises( + RuntimeError, + match=r"n_classes_ should not be None after fitting" + ): + va_cal.predict(X_binary_test) + + +def test_predict_without_classes() -> None: + """ + Test that predict raises RuntimeError when classes_ + is None after fitting. + """ + va_cal = VennAbersCalibrator( + estimator=GaussianNB(), + inductive=True, + cal_size=0.3, + random_state=random_state + ) + va_cal.fit(X_binary_train, y_binary_train) + + # Manually set classes_ to None to simulate the error condition + va_cal.classes_ = None + + with pytest.raises( + RuntimeError, + match=r"classes_ should not be None after fitting" + ): + va_cal.predict(X_binary_test) diff --git a/mapie/utils.py b/mapie/utils.py index 34b288e15..c791bdc76 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -2,7 +2,6 @@ import warnings from inspect import signature from typing import Any, Iterable, Optional, Tuple, Union, cast - import numpy as np from sklearn.base import ClassifierMixin, RegressorMixin from sklearn.linear_model import LogisticRegression From a9ab37a91112d10d25c4d1741eacd0230b52b264 Mon Sep 17 00:00:00 2001 From: OmG Date: Mon, 27 Oct 2025 15:56:08 +0100 Subject: [PATCH 02/23] fix: type check for self.n_classes_ and self.classes_ in predict function not to be None --- mapie/calibration.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mapie/calibration.py b/mapie/calibration.py index 6513bc74f..1ae448da8 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -1091,6 +1091,18 @@ def predict( """ check_is_fitted(self, self.fit_attributes) + # Type guard: ensure n_classes_ is not None after fit + if self.n_classes_ is None: + raise RuntimeError( + "n_classes_ should not be None after fitting" + ) + + # Type guard: ensure classes_ is not None after fit + if self.classes_ is None: + raise RuntimeError( + "classes_ should not be None after fitting" + ) + # Get calibrated probabilities p_prime = self.predict_proba(X, loss=loss) From fabf0a9415dab917eccc647d5e376f04892012d8 Mon Sep 17 00:00:00 2001 From: OmG Date: Mon, 27 Oct 2025 16:10:20 +0100 Subject: [PATCH 03/23] fix: add two local variables to resolve type-checking issue for self.n_classes_ variable --- mapie/calibration.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/mapie/calibration.py b/mapie/calibration.py index 1ae448da8..ad00b13e5 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -1106,24 +1106,16 @@ def predict( # Get calibrated probabilities p_prime = self.predict_proba(X, loss=loss) - # Type guard: ensure n_classes_ is not None after fit - if self.n_classes_ is None: - raise RuntimeError( - "n_classes_ should not be None after fitting" - ) - - # Type guard: ensure classes_ is not None after fit - if self.classes_ is None: - raise RuntimeError( - "classes_ should not be None after fitting" - ) + # Store classes_ in a local variable to help type checker + classes: NDArray = self.classes_ + n_classes = self.n_classes_ # Convert probabilities to class predictions - if self.n_classes_ <= 2: + if n_classes <= 2: # Binary classification - y_pred = self.classes_[(p_prime[:, 1] >= 0.5).astype(int)] + y_pred = classes[(p_prime[:, 1] >= 0.5).astype(int)] else: # Multi-class classification - y_pred = self.classes_[np.argmax(p_prime, axis=1)] + y_pred = classes[np.argmax(p_prime, axis=1)] return y_pred From 97b55d6ad0964ba1322ee4df95690144fef25302 Mon Sep 17 00:00:00 2001 From: OmG Date: Mon, 27 Oct 2025 16:36:28 +0100 Subject: [PATCH 04/23] fix: type checking for classes_ in fit function --- mapie/calibration.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mapie/calibration.py b/mapie/calibration.py index ad00b13e5..bc1a29949 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -933,6 +933,13 @@ def fit( # Set up classes from the fitted estimator self.single_estimator_ = last_estimator self.classes_ = self.single_estimator_.classes_ + + # Type guard: ensure classes_ is not None + if self.classes_ is None: + raise RuntimeError( + "classes_ should not be None after fitting estimator" + ) + self.n_classes_ = len(self.classes_) # Get predictions from the fitted estimator From bb8ce97f4829aee151103d08a7f51892f1ba9ede Mon Sep 17 00:00:00 2001 From: OmG Date: Mon, 27 Oct 2025 18:34:51 +0100 Subject: [PATCH 05/23] fix: coverage for a new case of None classes_ in fit function --- mapie/tests/test_venn_abers_calibration.py | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py index e364454dc..e9a1c60fe 100644 --- a/mapie/tests/test_venn_abers_calibration.py +++ b/mapie/tests/test_venn_abers_calibration.py @@ -2966,3 +2966,32 @@ def test_predict_without_classes() -> None: match=r"classes_ should not be None after fitting" ): va_cal.predict(X_binary_test) + + +def test_prefit_classes_none_after_fitting() -> None: + """ + Test that fit raises RuntimeError when classes_ is None + after fitting estimator in prefit mode. + """ + from sklearn.naive_bayes import GaussianNB + + # Create and fit a base estimator + clf = GaussianNB() + clf.fit(X_binary_train, y_binary_train) + + # Create VennAbersCalibrator in prefit mode + va_cal = VennAbersCalibrator( + estimator=clf, + cv="prefit", + random_state=random_state + ) + + # Manually set the classes_ attribute to None + # to simulate the error condition + clf.classes_ = None + + with pytest.raises( + RuntimeError, + match=r"classes_ should not be None after fitting estimator" + ): + va_cal.fit(X_binary_test, y_binary_test) From bdb9f0e957c96fbaee32439ffef3626786e9d135 Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 11:15:23 +0100 Subject: [PATCH 06/23] Fix: misinformation in calib_size doctring --- mapie/calibration.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mapie/calibration.py b/mapie/calibration.py index bc1a29949..61145a275 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -857,10 +857,12 @@ def fit( By default ``None``. calib_size : Optional[float], default=0.33 - If ``cv == split`` and X_calib and y_calib are not defined, then - the calibration dataset is created with the split defined by - calib_size. For inductive Venn-ABERS, this determines the proportion - of data used for calibration. + For inductive Venn-ABERS (when ``cv=None`` and ``inductive=True``), + this determines the proportion of data used for calibration. + If float, should be between 0.0 and 1.0 and represent the proportion + of the dataset to include in the calibration split. + If int, represents the absolute number of calibration samples. + Ignored when ``cv="prefit"``. random_state : Optional[Union[int, np.random.RandomState, None]], default=None Controls the shuffling applied to the data before applying the split. From 469fb52840eed84ba164dc94f444846db6facb1e Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 14:00:38 +0100 Subject: [PATCH 07/23] fix: improve doc string of calib_size --- mapie/calibration.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/mapie/calibration.py b/mapie/calibration.py index 61145a275..e41d3f37e 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -857,12 +857,17 @@ def fit( By default ``None``. calib_size : Optional[float], default=0.33 - For inductive Venn-ABERS (when ``cv=None`` and ``inductive=True``), - this determines the proportion of data used for calibration. - If float, should be between 0.0 and 1.0 and represent the proportion - of the dataset to include in the calibration split. - If int, represents the absolute number of calibration samples. - Ignored when ``cv="prefit"``. + Proportion of the dataset to use for calibration when using + Inductive Venn-ABERS (IVAP) mode (``inductive=True`` and ``cv=None``). + + - If float, should be between 0.0 and 1.0 and represents the + proportion of the dataset to include in the calibration split. + - If int, represents the absolute number of calibration samples. + - If ``None``, uses the value from the constructor's ``cal_size`` + parameter (default 0.33). + + This parameter is ignored when ``cv="prefit"`` or when using + Cross Venn-ABERS (``inductive=False``). random_state : Optional[Union[int, np.random.RandomState, None]], default=None Controls the shuffling applied to the data before applying the split. From 5ffddbd7454cbac8439660bd7194c7413402d198 Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 15:21:58 +0100 Subject: [PATCH 08/23] refactor: remove unnecessary cal_size in the calibrator --- mapie/calibration.py | 26 +-- mapie/tests/test_venn_abers_calibration.py | 185 +++++---------------- 2 files changed, 42 insertions(+), 169 deletions(-) diff --git a/mapie/calibration.py b/mapie/calibration.py index e41d3f37e..7c67a6a92 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -595,15 +595,6 @@ class VennAbersCalibrator(BaseEstimator, ClassifierMixin): Only used when ``inductive=False`` and ``cv=None``. Uses ``sklearn.model_selection.StratifiedKFold`` functionality. - cal_size : Optional[float], default=None - Proportion of the dataset to use for calibration in Inductive - Venn-ABERS (IVAP). Only used when ``inductive=True`` and ``cv=None``. - - - If float, should be between 0.0 and 1.0. - - If int, represents the absolute number of calibration samples. - - If ``None``, uses the value provided in the ``fit`` method - (default 0.33). - train_proper_size : Optional[float], default=None Proportion of the dataset to use for proper training in Inductive Venn-ABERS (IVAP). Only used when ``inductive=True`` and ``cv=None``. @@ -714,7 +705,6 @@ class VennAbersCalibrator(BaseEstimator, ClassifierMixin): >>> va_cal = VennAbersCalibrator( ... estimator=clf, ... inductive=True, - ... cal_size=0.3, ... random_state=42 ... ) >>> _ = va_cal.fit(X_train, y_train) @@ -774,7 +764,6 @@ def __init__( cv: Optional[str] = None, inductive: bool = True, n_splits: Optional[int] = None, - cal_size: Optional[float] = None, train_proper_size: Optional[float] = None, random_state: Optional[int] = None, shuffle: bool = True, @@ -785,7 +774,6 @@ def __init__( self.cv = cv self.inductive = inductive self.n_splits = n_splits - self.cal_size = cal_size self.train_proper_size = train_proper_size self.random_state = random_state self.shuffle = shuffle @@ -859,13 +847,8 @@ def fit( calib_size : Optional[float], default=0.33 Proportion of the dataset to use for calibration when using Inductive Venn-ABERS (IVAP) mode (``inductive=True`` and ``cv=None``). - - - If float, should be between 0.0 and 1.0 and represents the - proportion of the dataset to include in the calibration split. - - If int, represents the absolute number of calibration samples. - - If ``None``, uses the value from the constructor's ``cal_size`` - parameter (default 0.33). - + It should be between 0.0 and 1.0 and represents + the proportion of the dataset to include in the calibration split. This parameter is ignored when ``cv="prefit"`` or when using Cross Venn-ABERS (``inductive=False``). @@ -969,9 +952,6 @@ def fit( if not self.inductive and self.n_splits is None: raise ValueError("For Cross Venn-ABERS please provide n_splits") - # For inductive mode, use calib_size parameter - cal_size_to_use = self.cal_size if self.cal_size is not None else calib_size - # Check random state random_state_to_use: Optional[Union[int, np.random.RandomState]] = None if random_state is not None: @@ -984,7 +964,7 @@ def fit( estimator=last_estimator, inductive=self.inductive, n_splits=self.n_splits, - cal_size=cal_size_to_use, + cal_size=calib_size, train_proper_size=self.train_proper_size, random_state=random_state_to_use, shuffle=shuffle if shuffle is not None else self.shuffle, diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py index e9a1c60fe..631586434 100644 --- a/mapie/tests/test_venn_abers_calibration.py +++ b/mapie/tests/test_venn_abers_calibration.py @@ -86,7 +86,6 @@ def test_default_parameters() -> None: assert va_cal.cv is None assert va_cal.inductive is True assert va_cal.n_splits is None - assert va_cal.cal_size is None assert va_cal.train_proper_size is None assert va_cal.random_state is None assert va_cal.shuffle is True @@ -183,7 +182,6 @@ def test_inductive_mode_binary() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -199,7 +197,6 @@ def test_inductive_mode_multiclass() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -333,7 +330,6 @@ def test_prefit_inductive_consistency() -> None: va_cal_inductive = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=len(X_binary_cal) / len(X_binary_train), random_state=random_state ) # Combine proper and cal sets @@ -357,7 +353,6 @@ def test_different_estimators_binary(estimator: ClassifierMixin) -> None: va_cal = VennAbersCalibrator( estimator=estimator, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -374,7 +369,6 @@ def test_different_estimators_multiclass(estimator: ClassifierMixin) -> None: va_cal = VennAbersCalibrator( estimator=estimator, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -400,7 +394,6 @@ def test_predict_method_multiclass() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -416,7 +409,6 @@ def test_predict_proba_consistency() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -435,7 +427,6 @@ def test_predict_proba_shape_binary() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -450,7 +441,6 @@ def test_predict_proba_shape_multiclass() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -470,7 +460,6 @@ def test_gradient_boosting_with_early_stopping() -> None: va_cal = VennAbersCalibrator( estimator=gb, inductive=True, - cal_size=0.3, random_state=random_state ) @@ -490,7 +479,6 @@ def test_sample_weights_none() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train, sample_weight=None) @@ -507,7 +495,6 @@ def test_sample_weights_constant() -> None: va_cal_none = VennAbersCalibrator( estimator=weighted_estimator, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal_none.fit(X_binary_train, y_binary_train, sample_weight=None) @@ -515,7 +502,6 @@ def test_sample_weights_constant() -> None: va_cal_ones = VennAbersCalibrator( estimator=weighted_estimator, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal_ones.fit( @@ -526,7 +512,6 @@ def test_sample_weights_constant() -> None: va_cal_fives = VennAbersCalibrator( estimator=weighted_estimator, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal_fives.fit( @@ -549,7 +534,6 @@ def test_sample_weights_variable() -> None: va_cal_uniform = VennAbersCalibrator( estimator=RandomForestClassifier(random_state=random_state), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal_uniform.fit(X_binary_train, y_binary_train, sample_weight=None) @@ -566,7 +550,6 @@ def test_sample_weights_variable() -> None: va_cal_weighted = VennAbersCalibrator( estimator=estimator_weighted, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal_weighted.fit( @@ -590,7 +573,6 @@ def test_random_state_reproducibility() -> None: va_cal1 = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=42 ) va_cal1.fit(X_binary_train, y_binary_train) @@ -599,7 +581,6 @@ def test_random_state_reproducibility() -> None: va_cal2 = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=42 ) va_cal2.fit(X_binary_train, y_binary_train) @@ -613,7 +594,6 @@ def test_random_state_in_fit_overrides() -> None: va_cal1 = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=42 ) va_cal1.fit(X_binary_train, y_binary_train, random_state=123) @@ -622,7 +602,6 @@ def test_random_state_in_fit_overrides() -> None: va_cal2 = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=999 # Different from fit ) va_cal2.fit(X_binary_train, y_binary_train, random_state=123) @@ -636,7 +615,6 @@ def test_different_random_states_give_different_results() -> None: va_cal1 = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=42 ) va_cal1.fit(X_binary_train, y_binary_train) @@ -645,7 +623,6 @@ def test_different_random_states_give_different_results() -> None: va_cal2 = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=123 ) va_cal2.fit(X_binary_train, y_binary_train) @@ -664,7 +641,6 @@ def test_shuffle_parameter() -> None: va_cal_shuffle = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, shuffle=True ) @@ -674,7 +650,6 @@ def test_shuffle_parameter() -> None: va_cal_no_shuffle = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, shuffle=False ) @@ -689,7 +664,6 @@ def test_shuffle_in_fit_overrides() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, shuffle=False ) @@ -705,7 +679,6 @@ def test_stratify_parameter() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, stratify=y_binary_train ) @@ -720,7 +693,6 @@ def test_stratify_in_fit_overrides() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, stratify=None ) @@ -741,9 +713,8 @@ def test_different_calibration_sizes(cal_size: float) -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=cal_size, random_state=random_state) - va_cal.fit(X_binary_train, y_binary_train) + va_cal.fit(X_binary_train, y_binary_train, calib_size=cal_size) probs = va_cal.predict_proba(X_binary_test) assert probs.shape == (len(X_binary_test), 2) @@ -755,7 +726,6 @@ def test_cal_size_in_fit_overrides() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.2, random_state=random_state ) # Override with calib_size in fit @@ -820,7 +790,6 @@ def test_fitted_attributes_inductive() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -906,7 +875,6 @@ def test_pipeline_compatibility() -> None: va_cal = VennAbersCalibrator( estimator=pipe, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_df, y_series) @@ -967,7 +935,6 @@ def test_with_pipeline() -> None: va_cal = VennAbersCalibrator( estimator=pipeline, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1002,7 +969,6 @@ def test_with_column_transformer() -> None: va_cal = VennAbersCalibrator( estimator=pipeline, inductive=True, - cal_size=0.3, random_state=random_state ) @@ -1034,43 +1000,16 @@ def test_multiclass_one_vs_one_strategy() -> None: assert probs.shape == (len(X_multi_test), 3) assert np.allclose(probs.sum(axis=1), 1.0) - -# ============================================================================ -# Fit Parameters Passing Tests -# ============================================================================ - -# def test_fit_parameters_passing() -> None: -# """ -# Test passing fit parameters, here early stopping at iteration 3. -# Checks that underlying GradientBoosting estimators have used 3 iterations -# only during boosting, instead of default value for n_estimators (=100). -# """ -# gb = GradientBoostingClassifier(random_state=random_state) - -# va_cal = VennAbersCalibrator( -# estimator=gb, -# inductive=True, -# cal_size=0.3, -# random_state=random_state -# ) - -# va_cal.fit(X_binary_train, y_binary_train) - -# # For inductive mode, check the underlying estimator -# if hasattr(va_cal.va_calibrator_, 'estimator_'): -# assert va_cal.va_calibrator_ is not None -# assert va_cal.va_calibrator_.estimator_.estimators_.shape[0] == 3 - # ============================================================================ # Check Fitted Tests # ============================================================================ + def test_check_is_fitted_after_fit() -> None: """Test that check_is_fitted passes after fitting.""" va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1091,7 +1030,6 @@ def test_empty_dataset_raises_error() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) with pytest.raises(ValueError): @@ -1106,7 +1044,6 @@ def test_single_class_raises_error() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) with pytest.raises(ValueError): @@ -1121,7 +1058,6 @@ def test_mismatched_X_y_length_raises_error() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) with pytest.raises(ValueError): @@ -1133,7 +1069,6 @@ def test_predict_before_fit_raises_error() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) with pytest.raises(Exception): # NotFittedError or AttributeError @@ -1145,7 +1080,6 @@ def test_predict_proba_before_fit_raises_error() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) with pytest.raises(Exception): # NotFittedError or AttributeError @@ -1157,23 +1091,23 @@ def test_invalid_cal_size_raises_error() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=1.5, # Invalid: > 1.0 random_state=random_state ) with pytest.raises(ValueError): - va_cal.fit(X_binary_train, y_binary_train) + va_cal.fit(X_binary_train, + y_binary_train, + calib_size=1.5) # Invalid: > 1.0 def test_negative_cal_size_raises_error() -> None: - """Test that negative cal_size raises an error.""" + """Test that negative calib_size raises an error.""" va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=-0.1, random_state=random_state ) with pytest.raises(ValueError): - va_cal.fit(X_binary_train, y_binary_train) + va_cal.fit(X_binary_train, y_binary_train, calib_size=-0.1) def test_empty_calibration_set_raises_error() -> None: @@ -1181,12 +1115,14 @@ def test_empty_calibration_set_raises_error() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.99, # Very large cal_size leaves almost no training data random_state=random_state ) # This should work but with a very small training set try: - va_cal.fit(X_binary_train[:10], y_binary_train[:10]) + # Very large calib_size leaves almost no training data + va_cal.fit(X_binary_train[:10], + y_binary_train[:10], + calib_size=0.99) except ValueError: # Expected if the split is invalid pass @@ -1200,7 +1136,6 @@ def test_very_small_dataset() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_small, y_small) @@ -1239,7 +1174,6 @@ def test_probabilities_sum_to_one() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1255,7 +1189,6 @@ def test_probabilities_in_valid_range() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1270,7 +1203,6 @@ def test_multiclass_probabilities_sum_to_one() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -1285,7 +1217,6 @@ def test_multiclass_probabilities_in_valid_range() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -1304,7 +1235,6 @@ def test_inductive_vs_cross_validation_different_results() -> None: va_cal_inductive = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal_inductive.fit(X_binary_train, y_binary_train) @@ -1326,7 +1256,7 @@ def test_inductive_vs_cross_validation_different_results() -> None: def test_all_modes_produce_valid_probabilities() -> None: """Test that all calibration modes produce valid probability distributions.""" modes: List[Tuple[str, Dict[str, Any]]] = [ - ("inductive", {"inductive": True, "cal_size": 0.3}), + ("inductive", {"inductive": True}), ("cross_val", {"inductive": False, "n_splits": 5}), ] @@ -1370,7 +1300,6 @@ def test_perfect_predictions_no_calibration_needed() -> None: va_cal = VennAbersCalibrator( estimator=LogisticRegression(random_state=random_state), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_train_p, y_train_p) @@ -1404,7 +1333,6 @@ def test_imbalanced_dataset() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, stratify=y_train_imb ) @@ -1435,7 +1363,6 @@ def test_many_classes() -> None: va_cal = VennAbersCalibrator( estimator=RandomForestClassifier(random_state=random_state), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_train_many, y_train_many) @@ -1451,10 +1378,11 @@ def test_small_calibration_set() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.1, # Very small calibration set random_state=random_state ) - va_cal.fit(X_binary_train, y_binary_train) + va_cal.fit(X_binary_train, + y_binary_train, + calib_size=0.1) # Very small calibration set probs = va_cal.predict_proba(X_binary_test) # Should still work, though calibration quality may be lower @@ -1467,10 +1395,11 @@ def test_large_calibration_set() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.8, # Very large calibration set random_state=random_state ) - va_cal.fit(X_binary_train, y_binary_train) + va_cal.fit(X_binary_train, + y_binary_train, + calib_size=0.8) # Very large calibration set probs = va_cal.predict_proba(X_binary_test) # Should still work, though training set is small @@ -1487,7 +1416,6 @@ def test_multiple_fits_same_data() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) @@ -1505,7 +1433,6 @@ def test_predict_single_sample() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1524,7 +1451,6 @@ def test_predict_multiple_times_same_result() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1548,7 +1474,6 @@ def test_pandas_dataframe_input() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_df, y_series) @@ -1564,7 +1489,6 @@ def test_numpy_array_input() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1584,7 +1508,6 @@ def test_mixed_input_types() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_df, y_array) @@ -1601,7 +1524,6 @@ def test_with_pandas_dataframe() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_train_df, y_binary_train) @@ -1618,7 +1540,6 @@ def test_with_pandas_series() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_train_series) @@ -1638,7 +1559,6 @@ def test_integration_with_cross_validation() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) @@ -1651,27 +1571,27 @@ def test_integration_with_cross_validation() -> None: assert np.all(scores >= 0) and np.all(scores <= 1) -def test_integration_with_grid_search() -> None: - """Test integration with sklearn's GridSearchCV.""" - from sklearn.model_selection import GridSearchCV +# def test_integration_with_grid_search() -> None: +# """Test integration with sklearn's GridSearchCV.""" +# from sklearn.model_selection import GridSearchCV - va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state - ) +# va_cal = VennAbersCalibrator( +# estimator=GaussianNB(), +# inductive=True, +# random_state=random_state +# ) - param_grid = { - 'cal_size': [0.2, 0.3, 0.4], - } +# param_grid = { +# 'cal_size': [0.2, 0.3, 0.4], +# } - grid_search = GridSearchCV( - va_cal, param_grid, cv=3, scoring='accuracy' - ) - grid_search.fit(X_binary_train, y_binary_train) +# grid_search = GridSearchCV( +# va_cal, param_grid, cv=3, scoring='accuracy' +# ) +# grid_search.fit(X_binary_train, y_binary_train) - assert hasattr(grid_search, 'best_params_') - assert 'cal_size' in grid_search.best_params_ +# assert hasattr(grid_search, 'best_params_') +# assert 'cal_size' in grid_search.best_params_ def test_clone_estimator() -> None: @@ -1681,7 +1601,6 @@ def test_clone_estimator() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1695,7 +1614,6 @@ def test_clone_estimator() -> None: is_fitted = False # Clone should have same parameters but not be fitted - assert va_cal_clone.cal_size == va_cal.cal_size assert va_cal_clone.inductive == va_cal.inductive assert is_fitted is False @@ -1722,7 +1640,6 @@ def test_large_dataset_performance() -> None: n_estimators=10, random_state=random_state ), inductive=True, - cal_size=0.3, random_state=random_state, precision=2 # Use precision for faster computation ) @@ -1754,7 +1671,6 @@ def test_high_dimensional_data() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_train_hd, y_train_hd) @@ -1778,7 +1694,7 @@ def test_basic_example_from_docstring() -> None: X_train, X_test, y_train, y_test = train_test_split(X, y) clf = GaussianNB() - va_cal = VennAbersCalibrator(estimator=clf, inductive=True, cal_size=0.3) + va_cal = VennAbersCalibrator(estimator=clf, inductive=True) va_cal.fit(X_train, y_train) p_prime = va_cal.predict_proba(X_test) @@ -1833,7 +1749,6 @@ def test_comparison_with_uncalibrated() -> None: va_cal = VennAbersCalibrator( estimator=RandomForestClassifier(random_state=random_state), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1857,7 +1772,6 @@ def test_backward_compatibility_basic_usage() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -1928,7 +1842,6 @@ def test_inductive_with_very_small_dataset() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) @@ -1948,7 +1861,6 @@ def test_classes_attribute() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1964,7 +1876,6 @@ def test_n_classes_attribute() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1978,7 +1889,6 @@ def test_va_calibrator_attribute() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -2008,7 +1918,6 @@ def test_multiclass_binary_calibration() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -2054,7 +1963,6 @@ def test_multiclass_predictions_match_argmax() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -2080,7 +1988,6 @@ def test_multiclass_with_different_estimators() -> None: va_cal = VennAbersCalibrator( estimator=estimator, inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -2101,7 +2008,6 @@ def test_precision_parameter(precision: Optional[int]) -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, precision=precision ) @@ -2120,7 +2026,6 @@ def test_precision_speeds_up_computation() -> None: va_cal_no_precision = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, precision=None ) @@ -2133,7 +2038,6 @@ def test_precision_speeds_up_computation() -> None: va_cal_with_precision = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, precision=2 ) @@ -2153,7 +2057,6 @@ def test_different_precision_values(precision: int) -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, precision=precision ) @@ -2169,7 +2072,6 @@ def test_precision_maintains_calibration_quality() -> None: va_cal_high_prec = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, precision=4 ) @@ -2179,7 +2081,6 @@ def test_precision_maintains_calibration_quality() -> None: va_cal_low_prec = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state, precision=2 ) @@ -2199,7 +2100,6 @@ def test_precision_parameter_multiclass() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, precision=6, random_state=random_state ) @@ -2298,7 +2198,7 @@ def test_comprehensive_workflow() -> None: """Comprehensive test covering multiple aspects of VennAbersCalibrator.""" # Test all three modes with binary classification modes: List[Tuple[str, Dict[str, Any]]] = [ - ("inductive", {"inductive": True, "cal_size": 0.3}), + ("inductive", {"inductive": True}), ("cross_val", {"inductive": False, "n_splits": 5}), ] @@ -2534,7 +2434,6 @@ def test_venn_abers_cv_p0_p1_output() -> None: va_cv = VennAbersCV( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cv.fit(X_binary_train, y_binary_train) @@ -2569,7 +2468,7 @@ def test_multiclass_cross_validation_requires_n_splits() -> None: def test_inductive_missing_size_parameters_raises_error(): """Test that inductive mode raises error - when both cal_size and train_proper_size are None. + when train_proper_size is None. """ # Generate multi-class dataset X, y = make_classification( @@ -2584,7 +2483,6 @@ def test_inductive_missing_size_parameters_raises_error(): va_multi = VennAbersMultiClass( estimator=GaussianNB(), inductive=True, - cal_size=None, train_proper_size=None, random_state=42 ) @@ -2861,7 +2759,6 @@ def test_inductive_predict_proba_with_wrong_calibrator_type() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -2888,7 +2785,6 @@ def test_inductive_predict_proba_without_loss_parameter() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -2904,8 +2800,7 @@ def predict_proba(self, X, p0_p1_output=False): # Replace with mock that doesn't have loss parameter mock_calibrator = MockVennAbersMultiClass( estimator=GaussianNB(), - inductive=True, - cal_size=0.3 + inductive=True ) # Verify the mock's predict_proba doesn't have 'loss' parameter @@ -2930,7 +2825,6 @@ def test_predict_without_n_classes() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -2953,7 +2847,6 @@ def test_predict_without_classes() -> None: va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - cal_size=0.3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) From 5466ab609f163da9a3a6efb9da91cd6b9fada145 Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 16:12:34 +0100 Subject: [PATCH 09/23] refactor: apply black reformatter on new files --- mapie/_venn_abers.py | 225 +++--- mapie/calibration.py | 114 ++-- mapie/tests/test_venn_abers_calibration.py | 757 +++++++-------------- mapie/utils.py | 237 +++---- 4 files changed, 511 insertions(+), 822 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index df78196b6..e1bc73efd 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -6,7 +6,7 @@ from sklearn.exceptions import NotFittedError sklearn.set_config(enable_metadata_routing=True) -np.seterr(divide='ignore', invalid='ignore') +np.seterr(divide="ignore", invalid="ignore") """ Private module containing core Venn-ABERS implementation classes. @@ -18,7 +18,7 @@ def _geo_mean(a): """Geometric mean calculation for Venn-ABERS.""" - return a.prod(axis=1)**(1.0/a.shape[1]) + return a.prod(axis=1) ** (1.0 / a.shape[1]) def calc_p0p1(p_cal, y_cal, precision=None): @@ -75,8 +75,9 @@ def calc_p0p1(p_cal, y_cal, precision=None): Ordered set of unique calibration probabilities """ if precision is not None: - cal = np.hstack((np.round(p_cal[:, 1], precision). - reshape(-1, 1), y_cal.reshape(-1, 1))) + cal = np.hstack( + (np.round(p_cal[:, 1], precision).reshape(-1, 1), y_cal.reshape(-1, 1)) + ) else: cal = np.hstack((p_cal[:, 1].reshape(-1, 1), y_cal.reshape(-1, 1))) ix = np.argsort(cal[:, 0]) @@ -197,8 +198,11 @@ def calc_probs(p0, p1, c, p_test): """ out = p_test[:, 1] p0_p1 = np.hstack( - (p0[np.searchsorted(c, out, 'right'), 1]. - reshape(-1, 1), p1[np.searchsorted(c, out, 'left'), 1].reshape(-1, 1))) + ( + p0[np.searchsorted(c, out, "right"), 1].reshape(-1, 1), + p1[np.searchsorted(c, out, "left"), 1].reshape(-1, 1), + ) + ) p_prime = np.zeros((len(out), 2)) p_prime[:, 1] = p0_p1[:, 1] / (1 - p0_p1[:, 0] + p0_p1[:, 1]) @@ -207,8 +211,9 @@ def calc_probs(p0, p1, c, p_test): return p_prime, p0_p1 -def predict_proba_prefitted_va(p_cal, y_cal, p_test, - precision=None, va_tpe='one_vs_one'): +def predict_proba_prefitted_va( + p_cal, y_cal, p_test, precision=None, va_tpe="one_vs_one" +): """ Generate Venn-ABERS calibrated probabilities for multiclass problems using pre-fitted calibration data. @@ -296,7 +301,7 @@ def predict_proba_prefitted_va(p_cal, y_cal, p_test, (2, 3) """ # Validate va_tpe parameter - if va_tpe not in ['one_vs_one', 'one_vs_all']: + if va_tpe not in ["one_vs_one", "one_vs_all"]: raise ValueError( f"Invalid va_tpe '{va_tpe}'. " f"Allowed values are ['one_vs_one', 'one_vs_all']." @@ -305,7 +310,7 @@ def predict_proba_prefitted_va(p_cal, y_cal, p_test, p_prime = None multiclass_p0p1 = None - if va_tpe == 'one_vs_one': + if va_tpe == "one_vs_one": classes = np.unique(y_cal) class_pairs = [] for i in range(len(classes) - 1): @@ -316,10 +321,12 @@ def predict_proba_prefitted_va(p_cal, y_cal, p_test, multiclass_p0p1 = [] for i, class_pair in enumerate(class_pairs): pairwise_indices = (y_cal == class_pair[0]) + (y_cal == class_pair[1]) - binary_cal_probs = p_cal[:, class_pair][pairwise_indices] / \ - np.sum(p_cal[:, class_pair][pairwise_indices], axis=1).reshape(-1, 1) - binary_test_probs = p_test[:, class_pair] / \ - np.sum(p_test[:, class_pair], axis=1).reshape(-1, 1) + binary_cal_probs = p_cal[:, class_pair][pairwise_indices] / np.sum( + p_cal[:, class_pair][pairwise_indices], axis=1 + ).reshape(-1, 1) + binary_test_probs = p_test[:, class_pair] / np.sum( + p_test[:, class_pair], axis=1 + ).reshape(-1, 1) binary_classes = y_cal[pairwise_indices] == class_pair[1] va = VennAbers() @@ -330,20 +337,26 @@ def predict_proba_prefitted_va(p_cal, y_cal, p_test, p_prime = np.zeros((len(p_test), len(classes))) - for i, cl_id, in enumerate(classes): + for ( + i, + cl_id, + ) in enumerate(classes): stack_i = [ p[:, 0].reshape(-1, 1) for i, p in enumerate(multiclass_probs) - if class_pairs[i][0] == cl_id] + if class_pairs[i][0] == cl_id + ] stack_j = [ p[:, 1].reshape(-1, 1) for i, p in enumerate(multiclass_probs) - if class_pairs[i][1] == cl_id] + if class_pairs[i][1] == cl_id + ] p_stack = stack_i + stack_j - p_prime[:, i] = 1 / \ - (np.sum(np.hstack([(1 / p) for p in p_stack]), axis=1) - - (len(classes) - 2)) + p_prime[:, i] = 1 / ( + np.sum(np.hstack([(1 / p) for p in p_stack]), axis=1) + - (len(classes) - 2) + ) else: classes = np.unique(y_cal) @@ -351,7 +364,7 @@ def predict_proba_prefitted_va(p_cal, y_cal, p_test, multiclass_probs = [] multiclass_p0p1 = [] for _, class_id in enumerate(classes): - class_indices = (y_cal == class_id) + class_indices = y_cal == class_id binary_cal_probs = np.zeros((len(p_cal), 2)) binary_test_probs = np.zeros((len(p_test), 2)) binary_cal_probs[:, 1] = p_cal[:, class_id] @@ -541,16 +554,19 @@ class VennAbersCV: probabilities p_cal are rounded to. Yields significantly faster computation time for larger calibration datasets """ - def __init__(self, - estimator, - inductive, - n_splits=None, - cal_size=None, - train_proper_size=None, - random_state=None, - shuffle=True, - stratify=None, - precision=None): + + def __init__( + self, + estimator, + inductive, + n_splits=None, + cal_size=None, + train_proper_size=None, + random_state=None, + shuffle=True, + stratify=None, + precision=None, + ): self.estimator = estimator self.n_splits = n_splits self.clf_p_cal = [] @@ -564,7 +580,7 @@ def __init__(self, self.precision = precision def fit(self, _x_train, _y_train, sample_weight=None): - """ Fits the IVAP or CVAP calibrator to the training set. + """Fits the IVAP or CVAP calibrator to the training set. Parameters ---------- @@ -583,8 +599,8 @@ def fit(self, _x_train, _y_train, sample_weight=None): # Split sample_weight along with data if provided if sample_weight is not None: - x_train_proper, x_cal, y_train_proper, \ - y_cal, sw_train, sw_cal = train_test_split( + x_train_proper, x_cal, y_train_proper, y_cal, sw_train, sw_cal = ( + train_test_split( _x_train, _y_train, sample_weight, @@ -592,7 +608,9 @@ def fit(self, _x_train, _y_train, sample_weight=None): train_size=self.train_proper_size, random_state=self.random_state, shuffle=self.shuffle, - stratify=self.stratify) + stratify=self.stratify, + ) + ) else: x_train_proper, x_cal, y_train_proper, y_cal = train_test_split( _x_train, @@ -601,14 +619,15 @@ def fit(self, _x_train, _y_train, sample_weight=None): train_size=self.train_proper_size, random_state=self.random_state, shuffle=self.shuffle, - stratify=self.stratify + stratify=self.stratify, ) sw_train = None # Fit estimator with sample weights if provided if sw_train is not None: - self.estimator.fit(x_train_proper, y_train_proper.flatten(), - sample_weight=sw_train) + self.estimator.fit( + x_train_proper, y_train_proper.flatten(), sample_weight=sw_train + ) else: self.estimator.fit(x_train_proper, y_train_proper.flatten()) @@ -616,9 +635,11 @@ def fit(self, _x_train, _y_train, sample_weight=None): self.clf_p_cal.append(clf_prob) self.clf_y_cal.append(y_cal) else: - kf = StratifiedKFold(n_splits=self.n_splits, - shuffle=self.shuffle, - random_state=self.random_state) + kf = StratifiedKFold( + n_splits=self.n_splits, + shuffle=self.shuffle, + random_state=self.random_state, + ) for train_index, test_index in kf.split(_x_train, _y_train): # Extract sample weights for this fold if provided fold_sample_weight = None @@ -627,19 +648,22 @@ def fit(self, _x_train, _y_train, sample_weight=None): # Fit estimator with sample weights if provided if fold_sample_weight is not None: - self.estimator.fit(_x_train[train_index], - _y_train[train_index].flatten(), - sample_weight=fold_sample_weight) + self.estimator.fit( + _x_train[train_index], + _y_train[train_index].flatten(), + sample_weight=fold_sample_weight, + ) else: - self.estimator.fit(_x_train[train_index], - _y_train[train_index].flatten()) + self.estimator.fit( + _x_train[train_index], _y_train[train_index].flatten() + ) clf_prob = self.estimator.predict_proba(_x_train[test_index]) self.clf_p_cal.append(clf_prob) self.clf_y_cal.append(_y_train[test_index]) - def predict_proba(self, _x_test, loss='log', p0_p1_output=False): - """ Generates Venn-ABERS calibrated probabilities. + def predict_proba(self, _x_test, loss="log", p0_p1_output=False): + """Generates Venn-ABERS calibrated probabilities. Parameters @@ -667,9 +691,11 @@ def predict_proba(self, _x_test, loss='log', p0_p1_output=False): clf_prob_test = self.estimator.predict_proba(_x_test) for i in range(self.n_splits): va = VennAbers() - va.fit(p_cal=self.clf_p_cal[i], - y_cal=self.clf_y_cal[i], - precision=self.precision) + va.fit( + p_cal=self.clf_p_cal[i], + y_cal=self.clf_y_cal[i], + precision=self.precision, + ) _, probs = va.predict_proba(p_test=clf_prob_test) p0p1_test.append(probs) p0_stack = np.hstack([prob[:, 0].reshape(-1, 1) for prob in p0p1_test]) @@ -677,15 +703,21 @@ def predict_proba(self, _x_test, loss='log', p0_p1_output=False): p_prime = np.zeros((len(_x_test), 2)) - if loss == 'log': - p_prime[:, 1] = _geo_mean(p1_stack) / \ - (_geo_mean(1-p0_stack) + _geo_mean(p1_stack)) + if loss == "log": + p_prime[:, 1] = _geo_mean(p1_stack) / ( + _geo_mean(1 - p0_stack) + _geo_mean(p1_stack) + ) p_prime[:, 0] = 1 - p_prime[:, 1] else: - p_prime[:, 1] = 1 / self.n_splits * ( - np.sum(p1_stack, axis=1) + - 0.5 * np.sum(p0_stack**2, axis=1) - - 0.5 * np.sum(p1_stack**2, axis=1)) + p_prime[:, 1] = ( + 1 + / self.n_splits + * ( + np.sum(p1_stack, axis=1) + + 0.5 * np.sum(p0_stack**2, axis=1) + - 0.5 * np.sum(p1_stack**2, axis=1) + ) + ) p_prime[:, 0] = 1 - p_prime[:, 1] if p0_p1_output: @@ -759,17 +791,19 @@ class VennAbersMultiClass: probabilities p_cal are rounded to. Yields significantly faster computation time for larger calibration datasets """ - def __init__(self, - estimator, - inductive, - n_splits=None, - cal_size=None, - train_proper_size=None, - random_state=None, - shuffle=True, - stratify=None, - precision=None - ): + + def __init__( + self, + estimator, + inductive, + n_splits=None, + cal_size=None, + train_proper_size=None, + random_state=None, + shuffle=True, + stratify=None, + precision=None, + ): self.estimator = estimator self.inductive = inductive self.n_splits = n_splits @@ -812,11 +846,13 @@ def fit(self, _x_train, _y_train, sample_weight=None): try: check_is_fitted(self.estimator) except NotFittedError: - if (self.inductive and self.cal_size is None) and\ - (self.train_proper_size is None): + if (self.inductive and self.cal_size is None) and ( + self.train_proper_size is None + ): raise Exception( "For Inductive Venn-ABERS please provide either calibration" - "or proper train set size") + "or proper train set size" + ) self.classes = np.unique(_y_train) self.n_classes = len(self.classes) @@ -836,13 +872,12 @@ def fit(self, _x_train, _y_train, sample_weight=None): # OneVsOneClassifier will handle the estimator's preprocessing # (e.g., if it's a pipeline, it will apply transformations internally) self.clf_ovo = OneVsOneClassifier(self.estimator) - self.clf_ovo.fit( - _x_train, _y_train, **fit_params - ) + self.clf_ovo.fit(_x_train, _y_train, **fit_params) for pair_id, clf_ovo_estimator in enumerate(self.clf_ovo.estimators_): - _pairwise_indices = (_y_train == self.pairwise_id[pair_id][0]) +\ - (_y_train == self.pairwise_id[pair_id][1]) + _pairwise_indices = (_y_train == self.pairwise_id[pair_id][0]) + ( + _y_train == self.pairwise_id[pair_id][1] + ) # Extract sample weights for this pair if provided pair_sample_weight = None @@ -860,17 +895,18 @@ def fit(self, _x_train, _y_train, sample_weight=None): random_state=self.random_state, shuffle=self.shuffle, stratify=self.stratify, - precision=self.precision + precision=self.precision, ) va_cv.fit( _x_train[_pairwise_indices], - np.array(_y_train[_pairwise_indices] == self.pairwise_id[pair_id][1]) - .reshape(-1, 1), - sample_weight=pair_sample_weight + np.array( + _y_train[_pairwise_indices] == self.pairwise_id[pair_id][1] + ).reshape(-1, 1), + sample_weight=pair_sample_weight, ) self.multiclass_va_estimators.append(va_cv) - def predict_proba(self, _x_test, loss='log', p0_p1_output=False): + def predict_proba(self, _x_test, loss="log", p0_p1_output=False): """ Generates Venn-ABERS calibrated probabilities. @@ -901,9 +937,9 @@ def predict_proba(self, _x_test, loss='log', p0_p1_output=False): if p0_p1_output: for i, va_estimator in enumerate(self.multiclass_va_estimators): - _p_prime, _p0_p1 = va_estimator.predict_proba(_x_test, - loss=loss, - p0_p1_output=True) + _p_prime, _p0_p1 = va_estimator.predict_proba( + _x_test, loss=loss, p0_p1_output=True + ) self.multiclass_probs.append(_p_prime) self.multiclass_p0p1.append(_p0_p1) else: @@ -913,21 +949,28 @@ def predict_proba(self, _x_test, loss='log', p0_p1_output=False): p_prime = np.zeros((len(_x_test), self.n_classes)) - for i, cl_id, in enumerate(self.classes): + for ( + i, + cl_id, + ) in enumerate(self.classes): stack_i = [ p[:, 0].reshape(-1, 1) for i, p in enumerate(self.multiclass_probs) - if self.pairwise_id[i][0] == cl_id] + if self.pairwise_id[i][0] == cl_id + ] stack_j = [ p[:, 1].reshape(-1, 1) for i, p in enumerate(self.multiclass_probs) - if self.pairwise_id[i][1] == cl_id] + if self.pairwise_id[i][1] == cl_id + ] p_stack = stack_i + stack_j - p_prime[:, i] = 1/(np.sum(np.hstack([(1/p) for p in p_stack]), axis=1) - - (self.n_classes - 2)) + p_prime[:, i] = 1 / ( + np.sum(np.hstack([(1 / p) for p in p_stack]), axis=1) + - (self.n_classes - 2) + ) - p_prime = p_prime/np.sum(p_prime, axis=1).reshape(-1, 1) + p_prime = p_prime / np.sum(p_prime, axis=1).reshape(-1, 1) if p0_p1_output: return p_prime, self.multiclass_p0p1 diff --git a/mapie/calibration.py b/mapie/calibration.py index 7c67a6a92..db2a128e5 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -1,4 +1,3 @@ - from __future__ import annotations import warnings @@ -12,16 +11,19 @@ from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state from sklearn.utils.multiclass import type_of_target -from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, - indexable) +from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable from numpy.typing import ArrayLike, NDArray -from .utils import (_check_estimator_classification, - _check_estimator_fit_predict, _check_n_features_in, - _check_null_weight, _fit_estimator, _get_calib_set) +from .utils import ( + _check_estimator_classification, + _check_estimator_fit_predict, + _check_n_features_in, + _check_null_weight, + _fit_estimator, + _get_calib_set, +) -from ._venn_abers import (predict_proba_prefitted_va, - VennAbers, VennAbersMultiClass) +from ._venn_abers import predict_proba_prefitted_va, VennAbers, VennAbersMultiClass class TopLabelCalibrator(BaseEstimator, ClassifierMixin): @@ -109,7 +111,7 @@ class TopLabelCalibrator(BaseEstimator, ClassifierMixin): named_calibrators = { "sigmoid": _SigmoidCalibration(), - "isotonic": IsotonicRegression(out_of_bounds="clip") + "isotonic": IsotonicRegression(out_of_bounds="clip"), } valid_cv = ["prefit", "split"] @@ -151,10 +153,7 @@ def _check_cv( """ if cv in self.valid_cv: return cv - raise ValueError( - "Invalid cv argument. " - f"Allowed values are {self.valid_cv}." - ) + raise ValueError("Invalid cv argument. " f"Allowed values are {self.valid_cv}.") def _check_calibrator( self, @@ -191,15 +190,13 @@ def _check_calibrator( else: raise ValueError( "Please provide a string in: " - + (", ").join(self.named_calibrators.keys()) + "." + + (", ").join(self.named_calibrators.keys()) + + "." ) _check_estimator_fit_predict(calibrator) return calibrator - def _get_labels( - self, - X: ArrayLike - ) -> Tuple[NDArray, NDArray]: + def _get_labels(self, X: ArrayLike) -> Tuple[NDArray, NDArray]: """ This method depends on the value of ``method`` and collects the labels that are needed to transform a multi-class calibration to multiple @@ -241,7 +238,8 @@ def _check_type_of_target(self, y: ArrayLike): if type_of_target(y) not in self.valid_inputs: raise ValueError( "Make sure to have one of the allowed targets: " - + (", ").join(self.valid_inputs) + "." + + (", ").join(self.valid_inputs) + + "." ) def _fit_calibrator( @@ -287,12 +285,8 @@ def _fit_calibrator( if sample_weight is not None: sample_weight_ = sample_weight[given_label_indices] - ( + (sample_weight_, top_class_prob_, y_calib_) = _check_null_weight( sample_weight_, top_class_prob_, y_calib_ - ) = _check_null_weight( - sample_weight_, - top_class_prob_, - y_calib_ ) else: sample_weight_ = sample_weight @@ -384,9 +378,7 @@ def _pred_proba_calib( """ idx_labels = np.where(y_pred.ravel() == label)[0].ravel() if label not in self.calibrators.keys(): - calibrated_values[ - idx_labels, idx - ] = max_prob[idx_labels].ravel() + calibrated_values[idx_labels, idx] = max_prob[idx_labels].ravel() warnings.warn( f"WARNING: This predicted label {label} has not been seen " + " during the calibration and therefore scores will remain" @@ -464,9 +456,7 @@ def fit( self.single_estimator_ = estimator self.classes_ = self.single_estimator_.classes_ self.n_classes_ = len(self.classes_) - self.calibrators = self._fit_calibrators( - X, y, sample_weight, calibrator - ) + self.calibrators = self._fit_calibrators(X, y, sample_weight, calibrator) if cv == "split": results = _get_calib_set( X, @@ -480,13 +470,13 @@ def fit( X_train, y_train, X_calib, y_calib, sw_train, sw_calib = results X_train, y_train = indexable(X_train, y_train) y_train = _check_y(y_train) - sw_train, X_train, y_train = _check_null_weight( - sw_train, - X_train, - y_train - ) + sw_train, X_train, y_train = _check_null_weight(sw_train, X_train, y_train) estimator = _fit_estimator( - clone(estimator), X_train, y_train, sw_train, **fit_params, + clone(estimator), + X_train, + y_train, + sw_train, + **fit_params, ) self.single_estimator_ = estimator self.classes_ = self.single_estimator_.classes_ @@ -752,9 +742,7 @@ class VennAbersCalibrator(BaseEstimator, ClassifierMixin): calibration with isotonic regression or Platt scaling. """ - fit_attributes = [ - "va_calibrator_", "classes_", "n_classes_" - ] + fit_attributes = ["va_calibrator_", "classes_", "n_classes_"] valid_cv = ["prefit", None] @@ -768,7 +756,7 @@ def __init__( random_state: Optional[int] = None, shuffle: bool = True, stratify: Optional[ArrayLike] = None, - precision: Optional[int] = None + precision: Optional[int] = None, ) -> None: self.estimator = estimator self.cv = cv @@ -810,10 +798,7 @@ def _check_cv(self, cv: Optional[str]) -> Optional[str]: """ if cv in self.valid_cv: return cv - raise ValueError( - "Invalid cv argument. " - f"Allowed values are {self.valid_cv}." - ) + raise ValueError("Invalid cv argument. " f"Allowed values are {self.valid_cv}.") def fit( self, @@ -824,7 +809,7 @@ def fit( random_state: Optional[Union[int, np.random.RandomState, None]] = None, shuffle: Optional[bool] = True, stratify: Optional[ArrayLike] = None, - **fit_params + **fit_params, ) -> "VennAbersCalibrator": """ Fits the Venn-ABERS calibrator. @@ -969,18 +954,14 @@ def fit( random_state=random_state_to_use, shuffle=shuffle if shuffle is not None else self.shuffle, stratify=stratify if stratify is not None else self.stratify, - precision=self.precision + precision=self.precision, ) self.va_calibrator_.fit(X_processed, y, sample_weight=sample_weight) return self - def predict_proba( - self, - X: ArrayLike, - loss="log" - ) -> NDArray: + def predict_proba(self, X: ArrayLike, loss="log") -> NDArray: """ Prediction of the calibrated scores using fitted classifier and Venn-ABERS calibrator. @@ -1000,7 +981,7 @@ def predict_proba( cv = self._check_cv(self.cv) # Process test data - if (self.transformers_ is not None): + if self.transformers_ is not None: X_processed = self.transformers_.transform(X) else: X_processed = X @@ -1015,9 +996,7 @@ def predict_proba( # Type guard: ensure n_classes_ is not None after fit if self.n_classes_ is None: - raise RuntimeError( - "n_classes_ should not be None after fitting" - ) + raise RuntimeError("n_classes_ should not be None after fitting") if self.n_classes_ <= 2: # Binary classification @@ -1033,7 +1012,7 @@ def predict_proba( self.y_cal_, p_test_pred, precision=self.precision, - va_tpe='one_vs_one' + va_tpe="one_vs_one", ) return p_prime @@ -1053,23 +1032,14 @@ def predict_proba( if "loss" in signature(self.va_calibrator_.predict_proba).parameters: p_prime = self.va_calibrator_.predict_proba( - X_processed, - loss=loss, - p0_p1_output=False + X_processed, loss=loss, p0_p1_output=False ) else: - p_prime = self.va_calibrator_.predict_proba( - X_processed, - p0_p1_output=False - ) + p_prime = self.va_calibrator_.predict_proba(X_processed, p0_p1_output=False) return p_prime - def predict( - self, - X: ArrayLike, - loss="log" - ) -> NDArray: + def predict(self, X: ArrayLike, loss="log") -> NDArray: """ Predict the class of the estimator after Venn-ABERS calibration. @@ -1087,15 +1057,11 @@ def predict( # Type guard: ensure n_classes_ is not None after fit if self.n_classes_ is None: - raise RuntimeError( - "n_classes_ should not be None after fitting" - ) + raise RuntimeError("n_classes_ should not be None after fitting") # Type guard: ensure classes_ is not None after fit if self.classes_ is None: - raise RuntimeError( - "classes_ should not be None after fitting" - ) + raise RuntimeError("classes_ should not be None after fitting") # Get calibrated probabilities p_prime = self.predict_proba(X, loss=loss) diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py index 631586434..e03774583 100644 --- a/mapie/tests/test_venn_abers_calibration.py +++ b/mapie/tests/test_venn_abers_calibration.py @@ -1,7 +1,7 @@ - """ Tests for VennAbersCalibrator class. """ + from inspect import signature from typing import Optional, Dict, Any, List, Tuple @@ -21,11 +21,7 @@ from sklearn.exceptions import NotFittedError from sklearn.utils.validation import check_is_fitted from mapie.calibration import VennAbersCalibrator -from mapie._venn_abers import ( - VennAbers, - VennAbersMultiClass, - predict_proba_prefitted_va -) +from mapie._venn_abers import VennAbers, VennAbersMultiClass, predict_proba_prefitted_va random_state = 42 @@ -41,7 +37,7 @@ n_features=20, n_classes=2, n_informative=10, - random_state=random_state + random_state=random_state, ) X_binary_train, X_binary_test, y_binary_train, y_binary_test = train_test_split( @@ -58,7 +54,7 @@ n_features=20, n_classes=3, n_informative=10, - random_state=random_state + random_state=random_state, ) X_multi_train, X_multi_test, y_multi_train, y_multi_test = train_test_split( @@ -74,6 +70,7 @@ # Basic Initialization Tests # ============================================================================ + def test_initialized() -> None: """Test that initialization does not crash.""" VennAbersCalibrator() @@ -96,32 +93,18 @@ def test_default_parameters() -> None: def test_default_fit_params() -> None: """Test default sample weights and other parameters.""" va_cal = VennAbersCalibrator() - assert ( - signature(va_cal.fit).parameters["sample_weight"].default - is None - ) - assert ( - signature(va_cal.fit).parameters["calib_size"].default - == 0.33 - ) - assert ( - signature(va_cal.fit).parameters["random_state"].default - is None - ) - assert ( - signature(va_cal.fit).parameters["shuffle"].default - is True - ) - assert ( - signature(va_cal.fit).parameters["stratify"].default - is None - ) + assert signature(va_cal.fit).parameters["sample_weight"].default is None + assert signature(va_cal.fit).parameters["calib_size"].default == 0.33 + assert signature(va_cal.fit).parameters["random_state"].default is None + assert signature(va_cal.fit).parameters["shuffle"].default is True + assert signature(va_cal.fit).parameters["stratify"].default is None # ============================================================================ # CV Parameter Tests # ============================================================================ + @pytest.mark.parametrize("cv", ["prefit", None]) def test_valid_cv_argument(cv: Optional[str]) -> None: """Test that valid cv methods work.""" @@ -130,11 +113,7 @@ def test_valid_cv_argument(cv: Optional[str]) -> None: va_cal = VennAbersCalibrator(estimator=est, cv=cv) va_cal.fit(X_binary_cal, y_binary_cal) else: - va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - cv=cv, - inductive=True - ) + va_cal = VennAbersCalibrator(estimator=GaussianNB(), cv=cv, inductive=True) va_cal.fit(X_binary_train, y_binary_train) @@ -157,8 +136,7 @@ def test_prefit_unfitted_estimator_raises_error() -> None: clf = GaussianNB() # Unfitted estimator va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") with pytest.raises( - ValueError, - match=r".*For cv='prefit', the estimator must be already fitted*" + ValueError, match=r".*For cv='prefit', the estimator must be already fitted*" ): va_cal.fit(X_binary_cal, y_binary_cal) @@ -166,10 +144,7 @@ def test_prefit_unfitted_estimator_raises_error() -> None: def test_prefit_requires_estimator() -> None: """Test that prefit mode requires a fitted estimator.""" va_cal = VennAbersCalibrator(cv="prefit") - with pytest.raises( - ValueError, - match=r".*an estimator must be provided*" - ): + with pytest.raises(ValueError, match=r".*an estimator must be provided*"): va_cal.fit(X_binary_train, y_binary_train) @@ -177,12 +152,11 @@ def test_prefit_requires_estimator() -> None: # Inductive vs Cross Validation Tests # ============================================================================ + def test_inductive_mode_binary() -> None: """Test Inductive Venn-ABERS (IVAP) for binary classification.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -195,9 +169,7 @@ def test_inductive_mode_binary() -> None: def test_inductive_mode_multiclass() -> None: """Test Inductive Venn-ABERS (IVAP) for multi-class classification.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -210,10 +182,7 @@ def test_inductive_mode_multiclass() -> None: def test_cross_validation_mode_binary() -> None: """Test Cross Venn-ABERS (CVAP) for binary classification.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=5, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=5, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -226,10 +195,7 @@ def test_cross_validation_mode_binary() -> None: def test_cross_validation_mode_multiclass() -> None: """Test Cross Venn-ABERS (CVAP) for multi-class classification.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=5, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=5, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -241,14 +207,9 @@ def test_cross_validation_mode_multiclass() -> None: def test_cross_validation_requires_n_splits() -> None: """Test that CVAP requires n_splits parameter.""" - va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=None - ) + va_cal = VennAbersCalibrator(estimator=GaussianNB(), inductive=False, n_splits=None) with pytest.raises( - ValueError, - match=r".*For Cross Venn-ABERS please provide n_splits*" + ValueError, match=r".*For Cross Venn-ABERS please provide n_splits*" ): va_cal.fit(X_binary_train, y_binary_train) @@ -260,7 +221,7 @@ def test_cross_validation_with_shuffle() -> None: inductive=False, n_splits=5, shuffle=True, - random_state=random_state + random_state=random_state, ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -275,7 +236,7 @@ def test_cross_validation_with_stratify() -> None: inductive=False, n_splits=5, stratify=y_binary_train, - random_state=random_state + random_state=random_state, ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -287,6 +248,7 @@ def test_cross_validation_with_stratify() -> None: # Prefit Mode Tests # ============================================================================ + def test_prefit_mode_binary() -> None: """Test prefit mode for binary classification.""" clf = GaussianNB() @@ -328,9 +290,7 @@ def test_prefit_inductive_consistency() -> None: # Inductive mode with same split va_cal_inductive = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) # Combine proper and cal sets X_combined = np.vstack([X_binary_proper, X_binary_cal]) @@ -351,9 +311,7 @@ def test_prefit_inductive_consistency() -> None: def test_different_estimators_binary(estimator: ClassifierMixin) -> None: """Test VennAbersCalibrator with different base estimators (binary).""" va_cal = VennAbersCalibrator( - estimator=estimator, - inductive=True, - random_state=random_state + estimator=estimator, inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -367,9 +325,7 @@ def test_different_estimators_binary(estimator: ClassifierMixin) -> None: def test_different_estimators_multiclass(estimator: ClassifierMixin) -> None: """Test VennAbersCalibrator with different base estimators (multi-class).""" va_cal = VennAbersCalibrator( - estimator=estimator, - inductive=True, - random_state=random_state + estimator=estimator, inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -382,19 +338,14 @@ def test_different_estimators_multiclass(estimator: ClassifierMixin) -> None: def test_estimator_none_raises_error() -> None: """Test that None estimator raises ValueError.""" va_cal = VennAbersCalibrator(estimator=None) - with pytest.raises( - ValueError, - match=r".*an estimator must be provided*" - ): + with pytest.raises(ValueError, match=r".*an estimator must be provided*"): va_cal.fit(X_binary_train, y_binary_train) def test_predict_method_multiclass() -> None: """Test predict method for multi-class classification.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) predictions = va_cal.predict(X_multi_test) @@ -407,9 +358,7 @@ def test_predict_method_multiclass() -> None: def test_predict_proba_consistency() -> None: """Test that predict is consistent with predict_proba.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -425,9 +374,7 @@ def test_predict_proba_consistency() -> None: def test_predict_proba_shape_binary() -> None: """Test that predict_proba returns correct shape for binary classification.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -439,9 +386,7 @@ def test_predict_proba_shape_binary() -> None: def test_predict_proba_shape_multiclass() -> None: """Test that predict_proba returns correct shape for multi-class classification.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -452,15 +397,10 @@ def test_predict_proba_shape_multiclass() -> None: def test_gradient_boosting_with_early_stopping() -> None: """Test VennAbersCalibrator with GradientBoosting and early stopping.""" - gb = GradientBoostingClassifier( - n_estimators=100, - random_state=random_state - ) + gb = GradientBoostingClassifier(n_estimators=100, random_state=random_state) va_cal = VennAbersCalibrator( - estimator=gb, - inductive=True, - random_state=random_state + estimator=gb, inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -477,9 +417,7 @@ def test_gradient_boosting_with_early_stopping() -> None: def test_sample_weights_none() -> None: """Test that sample_weight=None works correctly.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train, sample_weight=None) probs = va_cal.predict_proba(X_binary_test) @@ -493,30 +431,20 @@ def test_sample_weights_constant() -> None: weighted_estimator = GaussianNB().set_fit_request(sample_weight=True) va_cal_none = VennAbersCalibrator( - estimator=weighted_estimator, - inductive=True, - random_state=random_state + estimator=weighted_estimator, inductive=True, random_state=random_state ) va_cal_none.fit(X_binary_train, y_binary_train, sample_weight=None) va_cal_ones = VennAbersCalibrator( - estimator=weighted_estimator, - inductive=True, - random_state=random_state - ) - va_cal_ones.fit( - X_binary_train, y_binary_train, - sample_weight=np.ones(n_samples) + estimator=weighted_estimator, inductive=True, random_state=random_state ) + va_cal_ones.fit(X_binary_train, y_binary_train, sample_weight=np.ones(n_samples)) va_cal_fives = VennAbersCalibrator( - estimator=weighted_estimator, - inductive=True, - random_state=random_state + estimator=weighted_estimator, inductive=True, random_state=random_state ) va_cal_fives.fit( - X_binary_train, y_binary_train, - sample_weight=np.ones(n_samples) * 5 + X_binary_train, y_binary_train, sample_weight=np.ones(n_samples) * 5 ) probs_none = va_cal_none.predict_proba(X_binary_test) @@ -534,7 +462,7 @@ def test_sample_weights_variable() -> None: va_cal_uniform = VennAbersCalibrator( estimator=RandomForestClassifier(random_state=random_state), inductive=True, - random_state=random_state + random_state=random_state, ) va_cal_uniform.fit(X_binary_train, y_binary_train, sample_weight=None) @@ -548,14 +476,9 @@ def test_sample_weights_variable() -> None: ).set_fit_request(sample_weight=True) va_cal_weighted = VennAbersCalibrator( - estimator=estimator_weighted, - inductive=True, - random_state=random_state - ) - va_cal_weighted.fit( - X_binary_train, y_binary_train, - sample_weight=sample_weights + estimator=estimator_weighted, inductive=True, random_state=random_state ) + va_cal_weighted.fit(X_binary_train, y_binary_train, sample_weight=sample_weights) probs_uniform = va_cal_uniform.predict_proba(X_binary_test) probs_weighted = va_cal_weighted.predict_proba(X_binary_test) @@ -568,20 +491,17 @@ def test_sample_weights_variable() -> None: # Random State and Reproducibility Tests # ============================================================================ + def test_random_state_reproducibility() -> None: """Test that random_state ensures reproducible results.""" va_cal1 = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=42 + estimator=GaussianNB(), inductive=True, random_state=42 ) va_cal1.fit(X_binary_train, y_binary_train) probs1 = va_cal1.predict_proba(X_binary_test) va_cal2 = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=42 + estimator=GaussianNB(), inductive=True, random_state=42 ) va_cal2.fit(X_binary_train, y_binary_train) probs2 = va_cal2.predict_proba(X_binary_test) @@ -592,17 +512,13 @@ def test_random_state_reproducibility() -> None: def test_random_state_in_fit_overrides() -> None: """Test that random_state in fit() overrides constructor parameter.""" va_cal1 = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=42 + estimator=GaussianNB(), inductive=True, random_state=42 ) va_cal1.fit(X_binary_train, y_binary_train, random_state=123) probs1 = va_cal1.predict_proba(X_binary_test) va_cal2 = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=999 # Different from fit + estimator=GaussianNB(), inductive=True, random_state=999 # Different from fit ) va_cal2.fit(X_binary_train, y_binary_train, random_state=123) probs2 = va_cal2.predict_proba(X_binary_test) @@ -613,17 +529,13 @@ def test_random_state_in_fit_overrides() -> None: def test_different_random_states_give_different_results() -> None: """Test that different random states give different results.""" va_cal1 = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=42 + estimator=GaussianNB(), inductive=True, random_state=42 ) va_cal1.fit(X_binary_train, y_binary_train) probs1 = va_cal1.predict_proba(X_binary_test) va_cal2 = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=123 + estimator=GaussianNB(), inductive=True, random_state=123 ) va_cal2.fit(X_binary_train, y_binary_train) probs2 = va_cal2.predict_proba(X_binary_test) @@ -636,22 +548,17 @@ def test_different_random_states_give_different_results() -> None: # Shuffle and Stratify Tests # ============================================================================ + def test_shuffle_parameter() -> None: """Test that shuffle parameter works correctly.""" va_cal_shuffle = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state, - shuffle=True + estimator=GaussianNB(), inductive=True, random_state=random_state, shuffle=True ) va_cal_shuffle.fit(X_binary_train, y_binary_train) probs_shuffle = va_cal_shuffle.predict_proba(X_binary_test) va_cal_no_shuffle = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state, - shuffle=False + estimator=GaussianNB(), inductive=True, random_state=random_state, shuffle=False ) va_cal_no_shuffle.fit(X_binary_train, y_binary_train) probs_no_shuffle = va_cal_no_shuffle.predict_proba(X_binary_test) @@ -662,10 +569,7 @@ def test_shuffle_parameter() -> None: def test_shuffle_in_fit_overrides() -> None: """Test that shuffle in fit() overrides constructor parameter.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state, - shuffle=False + estimator=GaussianNB(), inductive=True, random_state=random_state, shuffle=False ) # Override with shuffle=True in fit va_cal.fit(X_binary_train, y_binary_train, shuffle=True) @@ -680,7 +584,7 @@ def test_stratify_parameter() -> None: estimator=GaussianNB(), inductive=True, random_state=random_state, - stratify=y_binary_train + stratify=y_binary_train, ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -691,10 +595,7 @@ def test_stratify_parameter() -> None: def test_stratify_in_fit_overrides() -> None: """Test that stratify in fit() overrides constructor parameter.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state, - stratify=None + estimator=GaussianNB(), inductive=True, random_state=random_state, stratify=None ) # Override with stratify in fit va_cal.fit(X_binary_train, y_binary_train, stratify=y_binary_train) @@ -707,13 +608,13 @@ def test_stratify_in_fit_overrides() -> None: # Calibration Size Tests # ============================================================================ + @pytest.mark.parametrize("cal_size", [0.2, 0.3, 0.4, 0.5]) def test_different_calibration_sizes(cal_size: float) -> None: """Test that different calibration sizes work correctly.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state) + estimator=GaussianNB(), inductive=True, random_state=random_state + ) va_cal.fit(X_binary_train, y_binary_train, calib_size=cal_size) probs = va_cal.predict_proba(X_binary_test) @@ -724,9 +625,7 @@ def test_different_calibration_sizes(cal_size: float) -> None: def test_cal_size_in_fit_overrides() -> None: """Test that calib_size in fit() overrides constructor parameter.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) # Override with calib_size in fit va_cal.fit(X_binary_train, y_binary_train, calib_size=0.4) @@ -741,7 +640,7 @@ def test_train_proper_size_parameter() -> None: estimator=GaussianNB(), inductive=True, train_proper_size=0.6, - random_state=random_state + random_state=random_state, ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -753,6 +652,7 @@ def test_train_proper_size_parameter() -> None: # N_splits Tests # ============================================================================ + @pytest.mark.parametrize("n_splits", [2, 3, 5, 10]) def test_different_n_splits(n_splits: int) -> None: """Test that different n_splits values work correctly.""" @@ -760,7 +660,7 @@ def test_different_n_splits(n_splits: int) -> None: estimator=GaussianNB(), inductive=False, n_splits=n_splits, - random_state=random_state + random_state=random_state, ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -772,10 +672,7 @@ def test_different_n_splits(n_splits: int) -> None: def test_n_splits_too_small_raises_error() -> None: """Test that n_splits < 2 raises an error.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=1, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=1, random_state=random_state ) with pytest.raises(ValueError): va_cal.fit(X_binary_train, y_binary_train) @@ -785,18 +682,17 @@ def test_n_splits_too_small_raises_error() -> None: # Attributes Tests # ============================================================================ + def test_fitted_attributes_inductive() -> None: """Test that fitted attributes are set correctly for inductive mode.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) - assert hasattr(va_cal, 'classes_') - assert hasattr(va_cal, 'n_classes_') - assert hasattr(va_cal, 'va_calibrator_') + assert hasattr(va_cal, "classes_") + assert hasattr(va_cal, "n_classes_") + assert hasattr(va_cal, "va_calibrator_") assert va_cal.n_classes_ is not None assert va_cal.classes_ is not None assert va_cal.n_classes_ == 2 @@ -806,16 +702,13 @@ def test_fitted_attributes_inductive() -> None: def test_fitted_attributes_cross_validation() -> None: """Test that fitted attributes are set correctly for cross validation mode.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=5, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=5, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) - assert hasattr(va_cal, 'classes_') - assert hasattr(va_cal, 'n_classes_') - assert hasattr(va_cal, 'va_calibrator_') + assert hasattr(va_cal, "classes_") + assert hasattr(va_cal, "n_classes_") + assert hasattr(va_cal, "va_calibrator_") assert va_cal.n_classes_ is not None assert va_cal.classes_ is not None assert va_cal.n_classes_ == 2 @@ -830,9 +723,9 @@ def test_fitted_attributes_prefit() -> None: va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") va_cal.fit(X_binary_cal, y_binary_cal) - assert hasattr(va_cal, 'classes_') - assert hasattr(va_cal, 'n_classes_') - assert hasattr(va_cal, 'single_estimator_') + assert hasattr(va_cal, "classes_") + assert hasattr(va_cal, "n_classes_") + assert hasattr(va_cal, "single_estimator_") assert va_cal.n_classes_ is not None assert va_cal.classes_ is not None assert va_cal.n_classes_ == 2 @@ -843,6 +736,7 @@ def test_fitted_attributes_prefit() -> None: # Pipeline Compatibility Tests # ============================================================================ + def test_pipeline_compatibility() -> None: """Test that VennAbersCalibrator works with sklearn pipelines.""" X_df = pd.DataFrame( @@ -859,23 +753,19 @@ def test_pipeline_compatibility() -> None: ] ) categorical_preprocessor = Pipeline( - steps=[ - ("encoding", OneHotEncoder(handle_unknown="ignore")) - ] + steps=[("encoding", OneHotEncoder(handle_unknown="ignore"))] ) preprocessor = ColumnTransformer( [ ("cat", categorical_preprocessor, ["x_cat"]), - ("num", numeric_preprocessor, ["x_num"]) + ("num", numeric_preprocessor, ["x_num"]), ] ) pipe = make_pipeline(preprocessor, LogisticRegression(random_state=random_state)) pipe.fit(X_df, y_series) va_cal = VennAbersCalibrator( - estimator=pipe, - inductive=True, - random_state=random_state + estimator=pipe, inductive=True, random_state=random_state ) va_cal.fit(X_df, y_series) predictions = va_cal.predict(X_df) @@ -901,14 +791,12 @@ def test_pipeline_prefit_mode() -> None: ] ) categorical_preprocessor = Pipeline( - steps=[ - ("encoding", OneHotEncoder(handle_unknown="ignore")) - ] + steps=[("encoding", OneHotEncoder(handle_unknown="ignore"))] ) preprocessor = ColumnTransformer( [ ("cat", categorical_preprocessor, ["x_cat"]), - ("num", numeric_preprocessor, ["x_num"]) + ("num", numeric_preprocessor, ["x_num"]), ] ) pipe = make_pipeline(preprocessor, LogisticRegression(random_state=random_state)) @@ -927,15 +815,10 @@ def test_with_pipeline() -> None: """Test VennAbersCalibrator with sklearn Pipeline.""" from sklearn.preprocessing import StandardScaler - pipeline = make_pipeline( - StandardScaler(), - GaussianNB() - ) + pipeline = make_pipeline(StandardScaler(), GaussianNB()) va_cal = VennAbersCalibrator( - estimator=pipeline, - inductive=True, - random_state=random_state + estimator=pipeline, inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -947,41 +830,37 @@ def test_with_pipeline() -> None: def test_with_column_transformer() -> None: """Test VennAbersCalibrator with ColumnTransformer.""" # Create a mixed dataset - X_mixed = np.column_stack([ - X_binary_train, - np.random.choice(['A', 'B', 'C'], size=len(X_binary_train)) - ]) + X_mixed = np.column_stack( + [X_binary_train, np.random.choice(["A", "B", "C"], size=len(X_binary_train))] + ) preprocessor = ColumnTransformer( transformers=[ - ('num', SimpleImputer(strategy='mean'), - list(range(X_binary_train.shape[1]))), - ('cat', OneHotEncoder(handle_unknown='ignore'), - [X_binary_train.shape[1]]) + ( + "num", + SimpleImputer(strategy="mean"), + list(range(X_binary_train.shape[1])), + ), + ("cat", OneHotEncoder(handle_unknown="ignore"), [X_binary_train.shape[1]]), ] ) - pipeline = Pipeline([ - ('preprocessor', preprocessor), - ('classifier', GaussianNB()) - ]) + pipeline = Pipeline([("preprocessor", preprocessor), ("classifier", GaussianNB())]) va_cal = VennAbersCalibrator( - estimator=pipeline, - inductive=True, - random_state=random_state + estimator=pipeline, inductive=True, random_state=random_state ) - X_test_mixed = np.column_stack([ - X_binary_test, - np.random.choice(['A', 'B', 'C'], size=len(X_binary_test)) - ]) + X_test_mixed = np.column_stack( + [X_binary_test, np.random.choice(["A", "B", "C"], size=len(X_binary_test))] + ) va_cal.fit(X_mixed, y_binary_train) probs = va_cal.predict_proba(X_test_mixed) assert probs.shape == (len(X_binary_test), 2) + # ============================================================================ # Multiclass Strategy Tests # ============================================================================ @@ -1000,6 +879,7 @@ def test_multiclass_one_vs_one_strategy() -> None: assert probs.shape == (len(X_multi_test), 3) assert np.allclose(probs.sum(axis=1), 1.0) + # ============================================================================ # Check Fitted Tests # ============================================================================ @@ -1008,15 +888,14 @@ def test_multiclass_one_vs_one_strategy() -> None: def test_check_is_fitted_after_fit() -> None: """Test that check_is_fitted passes after fitting.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) # Should not raise an error check_is_fitted(va_cal) + # ============================================================================ # Edge Cases and Error Handling Tests # ============================================================================ @@ -1028,9 +907,7 @@ def test_empty_dataset_raises_error() -> None: y_empty = np.array([]) va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) with pytest.raises(ValueError): va_cal.fit(X_empty, y_empty) @@ -1042,9 +919,7 @@ def test_single_class_raises_error() -> None: y_single = np.zeros(10) # All same class va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) with pytest.raises(ValueError): va_cal.fit(X_single, y_single) @@ -1056,9 +931,7 @@ def test_mismatched_X_y_length_raises_error() -> None: y_mismatch = y_binary_train[:40] va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) with pytest.raises(ValueError): va_cal.fit(X_mismatch, y_mismatch) @@ -1067,9 +940,7 @@ def test_mismatched_X_y_length_raises_error() -> None: def test_predict_before_fit_raises_error() -> None: """Test that calling predict before fit raises an error.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) with pytest.raises(Exception): # NotFittedError or AttributeError va_cal.predict(X_binary_test) @@ -1078,9 +949,7 @@ def test_predict_before_fit_raises_error() -> None: def test_predict_proba_before_fit_raises_error() -> None: """Test that calling predict_proba before fit raises an error.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) with pytest.raises(Exception): # NotFittedError or AttributeError va_cal.predict_proba(X_binary_test) @@ -1089,22 +958,16 @@ def test_predict_proba_before_fit_raises_error() -> None: def test_invalid_cal_size_raises_error() -> None: """Test that invalid cal_size values raise an error.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) with pytest.raises(ValueError): - va_cal.fit(X_binary_train, - y_binary_train, - calib_size=1.5) # Invalid: > 1.0 + va_cal.fit(X_binary_train, y_binary_train, calib_size=1.5) # Invalid: > 1.0 def test_negative_cal_size_raises_error() -> None: """Test that negative calib_size raises an error.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) with pytest.raises(ValueError): va_cal.fit(X_binary_train, y_binary_train, calib_size=-0.1) @@ -1113,16 +976,12 @@ def test_negative_cal_size_raises_error() -> None: def test_empty_calibration_set_raises_error() -> None: """Test that empty calibration set raises an error.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) # This should work but with a very small training set try: # Very large calib_size leaves almost no training data - va_cal.fit(X_binary_train[:10], - y_binary_train[:10], - calib_size=0.99) + va_cal.fit(X_binary_train[:10], y_binary_train[:10], calib_size=0.99) except ValueError: # Expected if the split is invalid pass @@ -1134,9 +993,7 @@ def test_very_small_dataset() -> None: y_small = y_binary_train[:20] va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_small, y_small) probs = va_cal.predict_proba(X_binary_test[:5]) @@ -1172,9 +1029,7 @@ def test_calibration_improves_probabilities() -> None: def test_probabilities_sum_to_one() -> None: """Test that predicted probabilities sum to 1 for all samples.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -1187,9 +1042,7 @@ def test_probabilities_sum_to_one() -> None: def test_probabilities_in_valid_range() -> None: """Test that all predicted probabilities are in [0, 1].""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -1201,9 +1054,7 @@ def test_probabilities_in_valid_range() -> None: def test_multiclass_probabilities_sum_to_one() -> None: """Test that multi-class predicted probabilities sum to 1.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -1215,9 +1066,7 @@ def test_multiclass_probabilities_sum_to_one() -> None: def test_multiclass_probabilities_in_valid_range() -> None: """Test that all multi-class predicted probabilities are in [0, 1].""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -1230,21 +1079,17 @@ def test_multiclass_probabilities_in_valid_range() -> None: # Comparison Tests Between Modes # ============================================================================ + def test_inductive_vs_cross_validation_different_results() -> None: """Test that inductive and cross validation modes give different results.""" va_cal_inductive = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal_inductive.fit(X_binary_train, y_binary_train) probs_inductive = va_cal_inductive.predict_proba(X_binary_test) va_cal_cv = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=5, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=5, random_state=random_state ) va_cal_cv.fit(X_binary_train, y_binary_train) probs_cv = va_cal_cv.predict_proba(X_binary_test) @@ -1262,9 +1107,7 @@ def test_all_modes_produce_valid_probabilities() -> None: for mode_name, mode_params in modes: va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - random_state=random_state, - **mode_params + estimator=GaussianNB(), random_state=random_state, **mode_params ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -1281,16 +1124,18 @@ def test_all_modes_produce_valid_probabilities() -> None: # Special Cases Tests # ============================================================================ + def test_perfect_predictions_no_calibration_needed() -> None: """Test behavior when base estimator already makes perfect predictions.""" # Create a simple linearly separable dataset from sklearn.datasets import make_blobs + X_perfect, y_perfect = make_blobs( n_samples=100, n_features=2, centers=2, cluster_std=0.5, - random_state=random_state + random_state=random_state, ) X_train_p, X_test_p, y_train_p, y_test_p = train_test_split( @@ -1300,7 +1145,7 @@ def test_perfect_predictions_no_calibration_needed() -> None: va_cal = VennAbersCalibrator( estimator=LogisticRegression(random_state=random_state), inductive=True, - random_state=random_state + random_state=random_state, ) va_cal.fit(X_train_p, y_train_p) probs = va_cal.predict_proba(X_test_p) @@ -1323,7 +1168,7 @@ def test_imbalanced_dataset() -> None: n_features=20, n_classes=2, weights=[0.9, 0.1], - random_state=random_state + random_state=random_state, ) X_train_imb, X_test_imb, y_train_imb, y_test_imb = train_test_split( @@ -1334,7 +1179,7 @@ def test_imbalanced_dataset() -> None: estimator=GaussianNB(), inductive=True, random_state=random_state, - stratify=y_train_imb + stratify=y_train_imb, ) va_cal.fit(X_train_imb, y_train_imb) probs = va_cal.predict_proba(X_test_imb) @@ -1353,7 +1198,7 @@ def test_many_classes() -> None: n_features=20, n_classes=10, n_informative=15, - random_state=random_state + random_state=random_state, ) X_train_many, X_test_many, y_train_many, y_test_many = train_test_split( @@ -1363,7 +1208,7 @@ def test_many_classes() -> None: va_cal = VennAbersCalibrator( estimator=RandomForestClassifier(random_state=random_state), inductive=True, - random_state=random_state + random_state=random_state, ) va_cal.fit(X_train_many, y_train_many) probs = va_cal.predict_proba(X_test_many) @@ -1376,13 +1221,11 @@ def test_many_classes() -> None: def test_small_calibration_set() -> None: """Test behavior with very small calibration set.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) - va_cal.fit(X_binary_train, - y_binary_train, - calib_size=0.1) # Very small calibration set + va_cal.fit( + X_binary_train, y_binary_train, calib_size=0.1 + ) # Very small calibration set probs = va_cal.predict_proba(X_binary_test) # Should still work, though calibration quality may be lower @@ -1393,13 +1236,11 @@ def test_small_calibration_set() -> None: def test_large_calibration_set() -> None: """Test behavior with very large calibration set.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) - va_cal.fit(X_binary_train, - y_binary_train, - calib_size=0.8) # Very large calibration set + va_cal.fit( + X_binary_train, y_binary_train, calib_size=0.8 + ) # Very large calibration set probs = va_cal.predict_proba(X_binary_test) # Should still work, though training set is small @@ -1411,12 +1252,11 @@ def test_large_calibration_set() -> None: # Consistency Tests # ============================================================================ + def test_multiple_fits_same_data() -> None: """Test that fitting multiple times with same data gives same results.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1431,9 +1271,7 @@ def test_multiple_fits_same_data() -> None: def test_predict_single_sample() -> None: """Test prediction on a single sample.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1449,9 +1287,7 @@ def test_predict_single_sample() -> None: def test_predict_multiple_times_same_result() -> None: """Test that multiple predictions on same data give same results.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1465,6 +1301,7 @@ def test_predict_multiple_times_same_result() -> None: # Data Type Tests # ============================================================================ + def test_pandas_dataframe_input() -> None: """Test that VennAbersCalibrator works with pandas DataFrames.""" X_df = pd.DataFrame(X_binary_train) @@ -1472,9 +1309,7 @@ def test_pandas_dataframe_input() -> None: X_test_df = pd.DataFrame(X_binary_test) va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_df, y_series) probs = va_cal.predict_proba(X_test_df) @@ -1487,9 +1322,7 @@ def test_pandas_dataframe_input() -> None: def test_numpy_array_input() -> None: """Test that VennAbersCalibrator works with numpy arrays.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -1506,9 +1339,7 @@ def test_mixed_input_types() -> None: X_test_df = pd.DataFrame(X_binary_test) va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_df, y_array) probs = va_cal.predict_proba(X_test_df) @@ -1522,9 +1353,7 @@ def test_with_pandas_dataframe() -> None: X_test_df = pd.DataFrame(X_binary_test) va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_train_df, y_binary_train) probs = va_cal.predict_proba(X_test_df) @@ -1538,9 +1367,7 @@ def test_with_pandas_series() -> None: y_train_series = pd.Series(y_binary_train) va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_train_series) probs = va_cal.predict_proba(X_binary_test) @@ -1552,20 +1379,17 @@ def test_with_pandas_series() -> None: # Integration Tests # ============================================================================ + def test_integration_with_cross_validation() -> None: """Test integration with sklearn's cross-validation utilities.""" from sklearn.model_selection import cross_val_score va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) # This should work with cross_val_score - scores = cross_val_score( - va_cal, X_binary, y_binary, cv=3, scoring='accuracy' - ) + scores = cross_val_score(va_cal, X_binary, y_binary, cv=3, scoring="accuracy") assert len(scores) == 3 assert np.all(scores >= 0) and np.all(scores <= 1) @@ -1599,9 +1423,7 @@ def test_clone_estimator() -> None: from sklearn.base import clone va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -1622,13 +1444,11 @@ def test_clone_estimator() -> None: # Performance and Scalability Tests # ============================================================================ + def test_large_dataset_performance() -> None: """Test performance on a larger dataset.""" X_large, y_large = make_classification( - n_samples=5000, - n_features=50, - n_classes=2, - random_state=random_state + n_samples=5000, n_features=50, n_classes=2, random_state=random_state ) X_train_large, X_test_large, y_train_large, y_test_large = train_test_split( @@ -1636,15 +1456,14 @@ def test_large_dataset_performance() -> None: ) va_cal = VennAbersCalibrator( - estimator=RandomForestClassifier( - n_estimators=10, random_state=random_state - ), + estimator=RandomForestClassifier(n_estimators=10, random_state=random_state), inductive=True, random_state=random_state, - precision=2 # Use precision for faster computation + precision=2, # Use precision for faster computation ) import time + start = time.time() va_cal.fit(X_train_large, y_train_large) va_cal.predict_proba(X_test_large) @@ -1661,7 +1480,7 @@ def test_high_dimensional_data() -> None: n_features=100, n_informative=50, n_classes=2, - random_state=random_state + random_state=random_state, ) X_train_hd, X_test_hd, y_train_hd, y_test_hd = train_test_split( @@ -1669,9 +1488,7 @@ def test_high_dimensional_data() -> None: ) va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_train_hd, y_train_hd) probs = va_cal.predict_proba(X_test_hd) @@ -1684,6 +1501,7 @@ def test_high_dimensional_data() -> None: # Documentation and Examples Tests # ============================================================================ + def test_basic_example_from_docstring() -> None: """Test the basic example from the class docstring.""" from sklearn.datasets import make_classification @@ -1722,11 +1540,7 @@ def test_prefit_example() -> None: def test_cross_validation_example() -> None: """Test cross-validation example workflow.""" - va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=5 - ) + va_cal = VennAbersCalibrator(estimator=GaussianNB(), inductive=False, n_splits=5) va_cal.fit(X_binary_train, y_binary_train) p_prime = va_cal.predict_proba(X_binary_test) @@ -1738,6 +1552,7 @@ def test_cross_validation_example() -> None: # Comparison with Other Calibration Methods Tests # ============================================================================ + def test_comparison_with_uncalibrated() -> None: """Compare calibrated vs uncalibrated predictions.""" # Uncalibrated @@ -1749,7 +1564,7 @@ def test_comparison_with_uncalibrated() -> None: va_cal = VennAbersCalibrator( estimator=RandomForestClassifier(random_state=random_state), inductive=True, - random_state=random_state + random_state=random_state, ) va_cal.fit(X_binary_train, y_binary_train) probs_cal = va_cal.predict_proba(X_binary_test) @@ -1766,13 +1581,13 @@ def test_comparison_with_uncalibrated() -> None: # Regression Tests (ensure no breaking changes) # ============================================================================ + def test_backward_compatibility_basic_usage() -> None: """Test that basic usage pattern remains compatible.""" # This test ensures the most common usage pattern doesn't break va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state) + estimator=GaussianNB(), inductive=True, random_state=random_state + ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) preds = va_cal.predict(X_binary_test) @@ -1797,16 +1612,14 @@ def test_backward_compatibility_prefit() -> None: def test_backward_compatibility_cross_val() -> None: """Test that cross-validation mode usage pattern remains compatible.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=5, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=5, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) assert probs.shape == (len(X_binary_test), 2) + # ============================================================================ # Edge Cases for Different Modes # ============================================================================ @@ -1825,9 +1638,7 @@ def test_prefit_with_unfitted_estimator_raises_error() -> None: def test_cross_val_without_n_splits_raises_error() -> None: """Test that cross-validation mode without n_splits raises an error.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=None # Missing n_splits + estimator=GaussianNB(), inductive=False, n_splits=None # Missing n_splits ) with pytest.raises(ValueError, match=".*please provide n_splits.*"): @@ -1840,9 +1651,7 @@ def test_inductive_with_very_small_dataset() -> None: y_small = y_binary_train[:20] va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) # Should work but might have limited calibration quality @@ -1856,16 +1665,15 @@ def test_inductive_with_very_small_dataset() -> None: # Attribute Access Tests # ============================================================================ + def test_classes_attribute() -> None: """Test that classes_ attribute is correctly set.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) - assert hasattr(va_cal, 'classes_') + assert hasattr(va_cal, "classes_") assert va_cal.classes_ is not None assert len(va_cal.classes_) == 2 np.testing.assert_array_equal(va_cal.classes_, np.unique(y_binary_train)) @@ -1874,26 +1682,22 @@ def test_classes_attribute() -> None: def test_n_classes_attribute() -> None: """Test that n_classes_ attribute is correctly set.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) - assert hasattr(va_cal, 'n_classes_') + assert hasattr(va_cal, "n_classes_") assert va_cal.n_classes_ == 2 def test_va_calibrator_attribute() -> None: """Test that va_calibrator_ attribute is correctly set.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) - assert hasattr(va_cal, 'va_calibrator_') + assert hasattr(va_cal, "va_calibrator_") assert va_cal.va_calibrator_ is not None @@ -1905,7 +1709,7 @@ def test_single_estimator_attribute_prefit() -> None: va_cal = VennAbersCalibrator(estimator=clf, cv="prefit") va_cal.fit(X_binary_cal, y_binary_cal) - assert hasattr(va_cal, 'single_estimator_') + assert hasattr(va_cal, "single_estimator_") assert va_cal.single_estimator_ is not None @@ -1913,12 +1717,11 @@ def test_single_estimator_attribute_prefit() -> None: # Multi-class Specific Tests # ============================================================================ + def test_multiclass_binary_calibration() -> None: """Test that multi-class uses binary calibration for each class pair.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -1946,10 +1749,7 @@ def test_multiclass_prefit_mode() -> None: def test_multiclass_cross_validation_mode() -> None: """Test multi-class calibration in cross-validation mode.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=5, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=5, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -1961,9 +1761,7 @@ def test_multiclass_cross_validation_mode() -> None: def test_multiclass_predictions_match_argmax() -> None: """Test that multi-class predictions match argmax of probabilities.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) @@ -1981,14 +1779,12 @@ def test_multiclass_with_different_estimators() -> None: estimators = [ GaussianNB(), RandomForestClassifier(n_estimators=10, random_state=random_state), - LogisticRegression(random_state=random_state, max_iter=1000) + LogisticRegression(random_state=random_state, max_iter=1000), ] for estimator in estimators: va_cal = VennAbersCalibrator( - estimator=estimator, - inductive=True, - random_state=random_state + estimator=estimator, inductive=True, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) @@ -2002,6 +1798,7 @@ def test_multiclass_with_different_estimators() -> None: # Precision Parameter Tests # ============================================================================ + @pytest.mark.parametrize("precision", [None, 2, 4, 6]) def test_precision_parameter(precision: Optional[int]) -> None: """Test that precision parameter works correctly.""" @@ -2009,7 +1806,7 @@ def test_precision_parameter(precision: Optional[int]) -> None: estimator=GaussianNB(), inductive=True, random_state=random_state, - precision=precision + precision=precision, ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -2027,7 +1824,7 @@ def test_precision_speeds_up_computation() -> None: estimator=GaussianNB(), inductive=True, random_state=random_state, - precision=None + precision=None, ) start = time.time() va_cal_no_precision.fit(X_binary_train, y_binary_train) @@ -2036,10 +1833,7 @@ def test_precision_speeds_up_computation() -> None: # With precision va_cal_with_precision = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state, - precision=2 + estimator=GaussianNB(), inductive=True, random_state=random_state, precision=2 ) start = time.time() va_cal_with_precision.fit(X_binary_train, y_binary_train) @@ -2058,7 +1852,7 @@ def test_different_precision_values(precision: int) -> None: estimator=GaussianNB(), inductive=True, random_state=random_state, - precision=precision + precision=precision, ) va_cal.fit(X_binary_train, y_binary_train) probs = va_cal.predict_proba(X_binary_test) @@ -2070,19 +1864,13 @@ def test_different_precision_values(precision: int) -> None: def test_precision_maintains_calibration_quality() -> None: """Test that precision parameter maintains reasonable calibration quality.""" va_cal_high_prec = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state, - precision=4 + estimator=GaussianNB(), inductive=True, random_state=random_state, precision=4 ) va_cal_high_prec.fit(X_binary_train, y_binary_train) probs_high = va_cal_high_prec.predict_proba(X_binary_test) va_cal_low_prec = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state, - precision=2 + estimator=GaussianNB(), inductive=True, random_state=random_state, precision=2 ) va_cal_low_prec.fit(X_binary_train, y_binary_train) probs_low = va_cal_low_prec.predict_proba(X_binary_test) @@ -2098,16 +1886,15 @@ def test_precision_maintains_calibration_quality() -> None: def test_precision_parameter_multiclass() -> None: """Test that precision parameter works correctly for multiclass.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - precision=6, - random_state=random_state + estimator=GaussianNB(), inductive=True, precision=6, random_state=random_state ) va_cal.fit(X_multi_train, y_multi_train) probs = va_cal.predict_proba(X_multi_test) assert probs.shape == (len(X_multi_test), 3) assert np.allclose(probs.sum(axis=1), 1.0) + + # ============================================================================ # Error Message Quality Tests # ============================================================================ @@ -2123,10 +1910,7 @@ def test_error_message_for_missing_estimator() -> None: def test_error_message_for_invalid_cv() -> None: """Test that invalid cv parameter gives clear error message.""" - va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - cv="invalid_cv_option" - ) + va_cal = VennAbersCalibrator(estimator=GaussianNB(), cv="invalid_cv_option") with pytest.raises(ValueError): va_cal.fit(X_binary_train, y_binary_train) @@ -2136,17 +1920,18 @@ def test_error_message_for_invalid_cv() -> None: # Final Comprehensive Test # ============================================================================ + def test_venn_abers_cv_with_sample_weight() -> None: """Test VennAbersCV with sample weights in cross-validation mode.""" # Create sample weights - higher weights for some samples sample_weight = np.ones(len(y_binary_train)) - sample_weight[:len(y_binary_train)//2] = 2.0 # Double weight for first half + sample_weight[: len(y_binary_train) // 2] = 2.0 # Double weight for first half weighted_estimator = GaussianNB().set_fit_request(sample_weight=True) va_cal = VennAbersCalibrator( estimator=weighted_estimator, inductive=False, # Use cross-validation mode n_splits=3, - random_state=random_state + random_state=random_state, ) # Fit with sample weights @@ -2160,10 +1945,7 @@ def test_venn_abers_cv_with_sample_weight() -> None: # Fit without sample weights for comparison va_cal_no_weight = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=3, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=3, random_state=random_state ) va_cal_no_weight.fit(X_binary_train, y_binary_train) probs_no_weight = va_cal_no_weight.predict_proba(X_binary_test) @@ -2181,7 +1963,7 @@ def test_venn_abers_cv_sample_weight_all_folds() -> None: estimator=weighted_estimator, inductive=False, n_splits=5, # Multiple folds to ensure all are tested - random_state=random_state + random_state=random_state, ) # Should not raise any errors @@ -2209,7 +1991,7 @@ def test_comprehensive_workflow() -> None: n_estimators=10, random_state=random_state ), random_state=random_state, - **mode_params + **mode_params, ) va_cal_binary.fit(X_binary_train, y_binary_train) @@ -2228,7 +2010,7 @@ def test_comprehensive_workflow() -> None: n_estimators=10, random_state=random_state ), random_state=random_state, - **mode_params + **mode_params, ) va_cal_multi.fit(X_multi_train, y_multi_train) @@ -2242,9 +2024,7 @@ def test_comprehensive_workflow() -> None: assert np.all((probs_multi >= 0) & (probs_multi <= 1)) # Test prefit mode separately - clf_binary = RandomForestClassifier( - n_estimators=10, random_state=random_state - ) + clf_binary = RandomForestClassifier(n_estimators=10, random_state=random_state) clf_binary.fit(X_binary_proper, y_binary_proper) va_cal_prefit = VennAbersCalibrator(estimator=clf_binary, cv="prefit") @@ -2267,7 +2047,7 @@ def test_predict_proba_prefitted_va_one_vs_all(): n_informative=10, n_redundant=0, n_clusters_per_class=1, - random_state=42 + random_state=42, ) # Split into train, calibration, and test sets @@ -2288,7 +2068,7 @@ def test_predict_proba_prefitted_va_one_vs_all(): # Test one_vs_all strategy p_calibrated, p0p1 = predict_proba_prefitted_va( - p_cal, y_cal, p_test, precision=None, va_tpe='one_vs_all' + p_cal, y_cal, p_test, precision=None, va_tpe="one_vs_all" ) # Assertions @@ -2299,7 +2079,7 @@ def test_predict_proba_prefitted_va_one_vs_all(): # Test with precision parameter p_calibrated_prec, p0p1_prec = predict_proba_prefitted_va( - p_cal, y_cal, p_test, precision=3, va_tpe='one_vs_all' + p_cal, y_cal, p_test, precision=3, va_tpe="one_vs_all" ) assert p_calibrated_prec.shape == p_test.shape @@ -2318,7 +2098,7 @@ def test_predict_proba_prefitted_va_one_vs_one(): n_informative=10, n_redundant=0, n_clusters_per_class=1, - random_state=42 + random_state=42, ) # Split into train, calibration, and test sets @@ -2339,7 +2119,7 @@ def test_predict_proba_prefitted_va_one_vs_one(): # Test one_vs_one strategy p_calibrated, p0p1 = predict_proba_prefitted_va( - p_cal, y_cal, p_test, precision=None, va_tpe='one_vs_one' + p_cal, y_cal, p_test, precision=None, va_tpe="one_vs_one" ) # Assertions @@ -2363,9 +2143,7 @@ def test_predict_proba_prefitted_va_invalid_type(): p_test = clf.predict_proba(X_test) with pytest.raises(ValueError, match="Invalid va_tpe"): - predict_proba_prefitted_va( - p_cal, y_train, p_test, va_tpe='invalid_type' - ) + predict_proba_prefitted_va(p_cal, y_train, p_test, va_tpe="invalid_type") def test_venn_abers_basic(): @@ -2409,15 +2187,12 @@ def test_venn_abers_basic(): def test_venn_abers_cv_brier_loss() -> None: """Test VennAbersCV with Brier loss (non-log loss).""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=False, - n_splits=3, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=3, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) # Use 'brier' loss to trigger the else branch - probs_brier = va_cal.predict_proba(X_binary_test, loss='brier') + probs_brier = va_cal.predict_proba(X_binary_test, loss="brier") # Should produce valid probabilities assert probs_brier.shape == (len(X_binary_test), 2) @@ -2432,9 +2207,7 @@ def test_venn_abers_cv_p0_p1_output() -> None: # Create and fit VennAbersCV in inductive mode va_cv = VennAbersCV( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cv.fit(X_binary_train, y_binary_train) @@ -2456,12 +2229,11 @@ def test_multiclass_cross_validation_requires_n_splits() -> None: va_multi = VennAbersMultiClass( estimator=GaussianNB(), inductive=False, - n_splits=None # Missing n_splits for cross-validation mode + n_splits=None, # Missing n_splits for cross-validation mode ) with pytest.raises( - Exception, - match=r".*For Cross Venn ABERS please provide n_splits.*" + Exception, match=r".*For Cross Venn ABERS please provide n_splits.*" ): va_multi.fit(X_multi_train, y_multi_train) @@ -2472,25 +2244,17 @@ def test_inductive_missing_size_parameters_raises_error(): """ # Generate multi-class dataset X, y = make_classification( - n_samples=100, - n_classes=3, - n_informative=10, - n_redundant=0, - random_state=42 + n_samples=100, n_classes=3, n_informative=10, n_redundant=0, random_state=42 ) # Create VennAbersMultiClass with inductive=True but no size parameters va_multi = VennAbersMultiClass( - estimator=GaussianNB(), - inductive=True, - train_proper_size=None, - random_state=42 + estimator=GaussianNB(), inductive=True, train_proper_size=None, random_state=42 ) # Should raise Exception when fitting without size parameters with pytest.raises( - Exception, - match="For Inductive Venn-ABERS please provide either calibration" + Exception, match="For Inductive Venn-ABERS please provide either calibration" ): va_multi.fit(X, y) @@ -2518,20 +2282,13 @@ def test_multiclass_p0_p1_output() -> None: # Create and fit VennAbersMultiClass estimator = GaussianNB() va_multi = VennAbersMultiClass( - estimator=estimator, - inductive=True, - cal_size=0.3, - random_state=random_state + estimator=estimator, inductive=True, cal_size=0.3, random_state=random_state ) va_multi.fit(X_train, y_train) # Test with p0_p1_output=True - p_prime, p0_p1_list = va_multi.predict_proba( - X_test, - loss='log', - p0_p1_output=True - ) + p_prime, p0_p1_list = va_multi.predict_proba(X_test, loss="log", p0_p1_output=True) # Verify p_prime shape and properties assert p_prime.shape == (len(X_test), n_classes) @@ -2577,20 +2334,13 @@ def test_venn_abers_multiclass_p0_p1_output() -> None: # Test with inductive mode estimator = GaussianNB() va_multi = VennAbersMultiClass( - estimator=estimator, - inductive=True, - cal_size=0.3, - random_state=random_state + estimator=estimator, inductive=True, cal_size=0.3, random_state=random_state ) va_multi.fit(X_train, y_train) # Test with p0_p1_output=True - p_prime, p0_p1_list = va_multi.predict_proba( - X_test, - loss='log', - p0_p1_output=True - ) + p_prime, p0_p1_list = va_multi.predict_proba(X_test, loss="log", p0_p1_output=True) # Verify p_prime shape and properties assert p_prime.shape == (len(X_test), n_classes) @@ -2614,7 +2364,7 @@ def test_venn_abers_multiclass_p0_p1_output() -> None: assert va_multi.multiclass_p0p1 == p0_p1_list # Test with p0_p1_output=False (default behavior) - p_prime_only = va_multi.predict_proba(X_test, loss='log', p0_p1_output=False) + p_prime_only = va_multi.predict_proba(X_test, loss="log", p0_p1_output=False) # Verify it returns only p_prime assert isinstance(p_prime_only, np.ndarray) @@ -2623,18 +2373,13 @@ def test_venn_abers_multiclass_p0_p1_output() -> None: # Test with cross-validation mode va_multi_cv = VennAbersMultiClass( - estimator=GaussianNB(), - inductive=False, - n_splits=3, - random_state=random_state + estimator=GaussianNB(), inductive=False, n_splits=3, random_state=random_state ) va_multi_cv.fit(X_train, y_train) p_prime_cv, p0_p1_list_cv = va_multi_cv.predict_proba( - X_test, - loss='log', - p0_p1_output=True + X_test, loss="log", p0_p1_output=True ) # Verify CV mode results @@ -2649,9 +2394,7 @@ def test_venn_abers_multiclass_p0_p1_output() -> None: # Test with Brier loss p_prime_brier, p0_p1_brier = va_multi.predict_proba( - X_test, - loss='brier', - p0_p1_output=True + X_test, loss="brier", p0_p1_output=True ) assert p_prime_brier.shape == (len(X_test), n_classes) @@ -2675,8 +2418,7 @@ def test_prefit_predict_proba_without_single_estimator() -> None: va_cal.single_estimator_ = None with pytest.raises( - RuntimeError, - match=r"single_estimator_ should not be None in prefit mode" + RuntimeError, match=r"single_estimator_ should not be None in prefit mode" ): va_cal.predict_proba(X_binary_test) @@ -2697,8 +2439,7 @@ def test_prefit_predict_proba_without_n_classes() -> None: va_cal.n_classes_ = None with pytest.raises( - RuntimeError, - match=r"n_classes_ should not be None after fitting" + RuntimeError, match=r"n_classes_ should not be None after fitting" ): va_cal.predict_proba(X_binary_test) @@ -2720,7 +2461,7 @@ def test_prefit_predict_proba_binary_without_va_calibrator() -> None: with pytest.raises( RuntimeError, - match=r"va_calibrator_ should not be None for binary classification" + match=r"va_calibrator_ should not be None for binary classification", ): va_cal.predict_proba(X_binary_test) @@ -2738,10 +2479,10 @@ def test_prefit_predict_proba_binary_with_loss_parameter() -> None: va_cal.fit(X_binary_cal, y_binary_cal) # Test with default loss='log' - probs_log = va_cal.predict_proba(X_binary_test, loss='log') + probs_log = va_cal.predict_proba(X_binary_test, loss="log") # Test with loss='brier' - probs_brier = va_cal.predict_proba(X_binary_test, loss='brier') + probs_brier = va_cal.predict_proba(X_binary_test, loss="brier") # Verify output shape and properties assert probs_log.shape == (len(X_binary_test), 2) @@ -2757,9 +2498,7 @@ def test_inductive_predict_proba_with_wrong_calibrator_type() -> None: """ va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -2770,7 +2509,7 @@ def test_inductive_predict_proba_with_wrong_calibrator_type() -> None: with pytest.raises( RuntimeError, match=r"va_calibrator_ should be VennAbersMultiClass instance in " - r"inductive/cross-validation mode" + r"inductive/cross-validation mode", ): va_cal.predict_proba(X_binary_test) @@ -2783,9 +2522,7 @@ def test_inductive_predict_proba_without_loss_parameter() -> None: import inspect va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -2798,14 +2535,11 @@ def predict_proba(self, X, p0_p1_output=False): return probs # Replace with mock that doesn't have loss parameter - mock_calibrator = MockVennAbersMultiClass( - estimator=GaussianNB(), - inductive=True - ) + mock_calibrator = MockVennAbersMultiClass(estimator=GaussianNB(), inductive=True) # Verify the mock's predict_proba doesn't have 'loss' parameter sig = inspect.signature(mock_calibrator.predict_proba) - assert 'loss' not in sig.parameters + assert "loss" not in sig.parameters va_cal.va_calibrator_ = mock_calibrator @@ -2823,9 +2557,7 @@ def test_predict_without_n_classes() -> None: is None after fitting. """ va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -2833,8 +2565,7 @@ def test_predict_without_n_classes() -> None: va_cal.n_classes_ = None with pytest.raises( - RuntimeError, - match=r"n_classes_ should not be None after fitting" + RuntimeError, match=r"n_classes_ should not be None after fitting" ): va_cal.predict(X_binary_test) @@ -2845,9 +2576,7 @@ def test_predict_without_classes() -> None: is None after fitting. """ va_cal = VennAbersCalibrator( - estimator=GaussianNB(), - inductive=True, - random_state=random_state + estimator=GaussianNB(), inductive=True, random_state=random_state ) va_cal.fit(X_binary_train, y_binary_train) @@ -2855,8 +2584,7 @@ def test_predict_without_classes() -> None: va_cal.classes_ = None with pytest.raises( - RuntimeError, - match=r"classes_ should not be None after fitting" + RuntimeError, match=r"classes_ should not be None after fitting" ): va_cal.predict(X_binary_test) @@ -2873,18 +2601,13 @@ def test_prefit_classes_none_after_fitting() -> None: clf.fit(X_binary_train, y_binary_train) # Create VennAbersCalibrator in prefit mode - va_cal = VennAbersCalibrator( - estimator=clf, - cv="prefit", - random_state=random_state - ) + va_cal = VennAbersCalibrator(estimator=clf, cv="prefit", random_state=random_state) # Manually set the classes_ attribute to None # to simulate the error condition clf.classes_ = None with pytest.raises( - RuntimeError, - match=r"classes_ should not be None after fitting estimator" + RuntimeError, match=r"classes_ should not be None after fitting estimator" ): va_cal.fit(X_binary_test, y_binary_test) diff --git a/mapie/utils.py b/mapie/utils.py index c791bdc76..b9f9003b0 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -5,14 +5,23 @@ import numpy as np from sklearn.base import ClassifierMixin, RegressorMixin from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import (BaseCrossValidator, BaseShuffleSplit, - KFold, LeaveOneOut, ShuffleSplit, - train_test_split) +from sklearn.model_selection import ( + BaseCrossValidator, + BaseShuffleSplit, + KFold, + LeaveOneOut, + ShuffleSplit, + train_test_split, +) from sklearn.pipeline import Pipeline from sklearn.utils import _safe_indexing from sklearn.utils.multiclass import type_of_target -from sklearn.utils.validation import (_check_sample_weight, _num_features, - check_is_fitted, column_or_1d) +from sklearn.utils.validation import ( + _check_sample_weight, + _num_features, + check_is_fitted, + column_or_1d, +) from numpy.typing import ArrayLike, NDArray import copy @@ -113,12 +122,11 @@ def train_conformalize_test_split( [2] """ - _check_train_conf_test_proportions( - train_size, conformalize_size, test_size, len(X) - ) + _check_train_conf_test_proportions(train_size, conformalize_size, test_size, len(X)) X_train, X_conformalize_test, y_train, y_conformalize_test = train_test_split( - X, y, + X, + y, train_size=train_size, random_state=random_state, shuffle=shuffle, @@ -130,7 +138,8 @@ def train_conformalize_test_split( test_size_after_split = test_size X_conformalize, X_test, y_conformalize, y_test = train_test_split( - X_conformalize_test, y_conformalize_test, + X_conformalize_test, + y_conformalize_test, test_size=test_size_after_split, random_state=random_state, shuffle=shuffle, @@ -141,6 +150,7 @@ def train_conformalize_test_split( # Following functions are all private utilities + def _check_train_conf_test_proportions( train_size: Union[float, int], conformalize_size: Union[float, int], @@ -149,18 +159,22 @@ def _check_train_conf_test_proportions( ) -> None: count_input_proportions = sum([test_size, train_size, conformalize_size]) - if isinstance(train_size, float) and \ - isinstance(conformalize_size, float) and \ - isinstance(test_size, float): + if ( + isinstance(train_size, float) + and isinstance(conformalize_size, float) + and isinstance(test_size, float) + ): if not isclose(1, count_input_proportions): raise ValueError( "When using floats, train_size + conformalize_size" " + test_size must be equal to 1." ) - elif isinstance(train_size, int) and \ - isinstance(conformalize_size, int) and \ - isinstance(test_size, int): + elif ( + isinstance(train_size, int) + and isinstance(conformalize_size, int) + and isinstance(test_size, int) + ): if count_input_proportions != dataset_size: raise ValueError( "When using integers, train_size + " @@ -336,16 +350,12 @@ def _check_cv( random_seeds = cast(list, np.random.get_state())[1] random_state = np.random.choice(random_seeds) if cv is None: - return KFold( - n_splits=5, shuffle=True, random_state=random_state - ) + return KFold(n_splits=5, shuffle=True, random_state=random_state) elif isinstance(cv, int): if cv == -1: return LeaveOneOut() elif cv >= 2: - return KFold( - n_splits=cv, shuffle=True, random_state=random_state - ) + return KFold(n_splits=cv, shuffle=True, random_state=random_state) else: raise ValueError( "Invalid cv argument. " @@ -359,9 +369,7 @@ def _check_cv( elif cv == "prefit": return cv elif cv == "split": - return ShuffleSplit( - n_splits=1, test_size=test_size, random_state=random_state - ) + return ShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state) else: raise ValueError( "Invalid cv argument. " @@ -375,7 +383,7 @@ def _check_no_agg_cv( cv: Union[int, str, BaseCrossValidator, BaseShuffleSplit], no_agg_cv_array: list, y: Optional[ArrayLike] = None, - groups: Optional[ArrayLike] = None + groups: Optional[ArrayLike] = None, ) -> bool: """ Check if cross-validator is ``"prefit"``, ``"split"`` or any split @@ -544,15 +552,12 @@ def _check_n_features_in( if cv == "prefit" and hasattr(estimator, "n_features_in_"): if cast(Any, estimator).n_features_in_ != n_features_in: raise ValueError( - "Invalid mismatch between ", - "X.shape and estimator.n_features_in_." + "Invalid mismatch between ", "X.shape and estimator.n_features_in_." ) return n_features_in -def _check_gamma( - gamma: float -) -> None: +def _check_gamma(gamma: float) -> None: """ Check if gamma is between 0 and 1. @@ -566,9 +571,7 @@ def _check_gamma( If gamma is lower than 0 or higher than 1. """ if (gamma < 0) or (gamma > 1): - raise ValueError( - "Invalid gamma. Allowed values are between 0 and 1." - ) + raise ValueError("Invalid gamma. Allowed values are between 0 and 1.") def _get_effective_calibration_samples(scores: NDArray, sym: bool): @@ -625,7 +628,7 @@ def _check_alpha_and_n_samples( else: alphas_ = alphas for alpha in alphas_: - if n < np.max([1/alpha, 1/(1-alpha)]): + if n < np.max([1 / alpha, 1 / (1 - alpha)]): raise ValueError( "Number of samples of the score is too low,\n" "1/confidence_level and 1/(1 - confidence_level) must be\n" @@ -723,26 +726,20 @@ def _check_nan_in_aposteriori_prediction(X: ArrayLike) -> None: def _check_lower_upper_bounds( - y_pred_low: NDArray, - y_pred_up: NDArray, - y_preds: NDArray + y_pred_low: NDArray, y_pred_up: NDArray, y_preds: NDArray ) -> None: y_pred_low = column_or_1d(y_pred_low) y_pred_up = column_or_1d(y_pred_up) y_preds = column_or_1d(y_preds) any_inversion = np.any( - (y_pred_low > y_pred_up) | - (y_preds < y_pred_low) | - (y_preds > y_pred_up) + (y_pred_low > y_pred_up) | (y_preds < y_pred_low) | (y_preds > y_pred_up) ) if any_inversion: initial_logger_level = logging.root.level logging.basicConfig(level=logging.INFO) - logging.info( - "The predictions are ill-sorted." - ) + logging.info("The predictions are ill-sorted.") logging.basicConfig(level=initial_logger_level) @@ -895,8 +892,7 @@ def _get_calib_set( shuffle: Optional[bool] = True, stratify: Optional[ArrayLike] = None, ) -> Tuple[ - ArrayLike, ArrayLike, ArrayLike, ArrayLike, - Optional[NDArray], Optional[NDArray] + ArrayLike, ArrayLike, ArrayLike, ArrayLike, Optional[NDArray], Optional[NDArray] ]: """ Split the dataset into training and calibration sets. @@ -925,40 +921,42 @@ def _get_calib_set( sample_weight_calib """ if sample_weight is None: - ( - X_train, X_calib, y_train, y_calib - ) = train_test_split( - X, - y, - test_size=calib_size, - random_state=random_state, - shuffle=shuffle, - stratify=stratify + (X_train, X_calib, y_train, y_calib) = train_test_split( + X, + y, + test_size=calib_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify, ) sample_weight_train = sample_weight sample_weight_calib = None else: ( - X_train, - X_calib, - y_train, - y_calib, - sample_weight_train, - sample_weight_calib, + X_train, + X_calib, + y_train, + y_calib, + sample_weight_train, + sample_weight_calib, ) = train_test_split( - X, - y, - sample_weight, - test_size=calib_size, - random_state=random_state, - shuffle=shuffle, - stratify=stratify + X, + y, + sample_weight, + test_size=calib_size, + random_state=random_state, + shuffle=shuffle, + stratify=stratify, ) X_train, X_calib = cast(ArrayLike, X_train), cast(ArrayLike, X_calib) y_train, y_calib = cast(ArrayLike, y_train), cast(ArrayLike, y_calib) return ( - X_train, y_train, X_calib, y_calib, - sample_weight_train, sample_weight_calib + X_train, + y_train, + X_calib, + y_calib, + sample_weight_train, + sample_weight_calib, ) @@ -1023,10 +1021,7 @@ def _check_estimator_classification( return estimator -def check_proba_normalized( - y_pred_proba: NDArray, - axis: int = -1 -) -> NDArray: +def check_proba_normalized(y_pred_proba: NDArray, axis: int = -1) -> NDArray: """ Check if for all the samples the sum of the probabilities is equal to one. @@ -1050,7 +1045,7 @@ def check_proba_normalized( np.sum(y_pred_proba, axis=axis), 1, err_msg="The sum of the scores is not equal to one.", - rtol=1e-5 + rtol=1e-5, ) return y_pred_proba.astype(np.float64) @@ -1082,12 +1077,8 @@ def _get_binning_groups( bins = np.linspace(0.0, 1.0, num_bins) else: bin_groups = np.array_split(y_score, num_bins) - bins = np.sort(np.array( - [ - bin_group.max() for bin_group in bin_groups[:-1] - ] - + [np.inf] - ) + bins = np.sort( + np.array([bin_group.max() for bin_group in bin_groups[:-1]] + [np.inf]) ) return bins @@ -1142,9 +1133,7 @@ def _calc_bins( return bins, bin_accs, bin_confs, bin_sizes # type: ignore -def _check_split_strategy( - strategy: Optional[str] -) -> str: +def _check_split_strategy(strategy: Optional[str]) -> str: """ Checks that the split strategy provided is valid and defults None split strategy to "uniform". @@ -1166,15 +1155,11 @@ def _check_split_strategy( if strategy is None: strategy = "uniform" if strategy not in ["uniform", "quantile", "array split"]: - raise ValueError( - "Please provide a valid splitting strategy." - ) + raise ValueError("Please provide a valid splitting strategy.") return strategy -def _check_number_bins( - num_bins: int -) -> int: +def _check_number_bins(num_bins: int) -> int: """ Checks that the bin specified is a number. @@ -1193,9 +1178,7 @@ def _check_number_bins( When num_bins is a negative number is raises an error. """ if isinstance(num_bins, int) is False: - raise ValueError( - "Please provide a bin number as an integer." - ) + raise ValueError("Please provide a bin number as an integer.") elif num_bins < 1: raise ValueError( """ @@ -1207,9 +1190,7 @@ def _check_number_bins( return num_bins -def _check_binary_zero_one( - y_true: ArrayLike -) -> NDArray: +def _check_binary_zero_one(y_true: ArrayLike) -> NDArray: """ Checks if the array is binary and changes a non binary array to a zero, one array. @@ -1232,8 +1213,9 @@ def _check_binary_zero_one( """ y_true = cast(NDArray, column_or_1d(y_true)) if type_of_target(y_true) == "binary": - if ((np.unique(y_true) != np.array([0, 1])).any() and - len(np.unique(y_true)) == 2): + if (np.unique(y_true) != np.array([0, 1])).any() and len( + np.unique(y_true) + ) == 2: idx_min = np.where(y_true == np.min(y_true))[0] y_true[idx_min] = 0 idx_max = np.where(y_true == np.max(y_true))[0] @@ -1242,15 +1224,11 @@ def _check_binary_zero_one( else: return y_true else: - raise ValueError( - "Please provide y_true as a binary array." - ) + raise ValueError("Please provide y_true as a binary array.") def _fix_number_of_classes( - n_classes_: int, - n_classes_training: NDArray, - y_proba: NDArray + n_classes_: int, n_classes_training: NDArray, y_proba: NDArray ) -> NDArray: """ Fix shape of y_proba of validation set if number of classes @@ -1269,23 +1247,13 @@ def _fix_number_of_classes( NDArray Probabilities with the right number of classes. """ - y_pred_full = np.zeros( - shape=(len(y_proba), n_classes_) - ) + y_pred_full = np.zeros(shape=(len(y_proba), n_classes_)) y_index = np.tile(n_classes_training, (len(y_proba), 1)) - np.put_along_axis( - y_pred_full, - y_index, - y_proba, - axis=1 - ) + np.put_along_axis(y_pred_full, y_index, y_proba, axis=1) return y_pred_full -def _check_array_shape_classification( - y_true: NDArray, - y_pred_set: NDArray -) -> NDArray: +def _check_array_shape_classification(y_true: NDArray, y_pred_set: NDArray) -> NDArray: """ Fix shape of y_pred_set (to 3d array of shape (n_obs, n_class, n_alpha)). @@ -1323,10 +1291,7 @@ def _check_array_shape_classification( return y_pred_set -def _check_array_shape_regression( - y_true: NDArray, - y_intervals: NDArray -) -> NDArray: +def _check_array_shape_regression(y_true: NDArray, y_intervals: NDArray) -> NDArray: """ Fix shape of y_intervals (to 3d array of shape (n_obs, 2, n_alpha)). @@ -1432,9 +1397,7 @@ def _check_array_nan(array: NDArray) -> None: If all elements of the array are NaNs """ if np.isnan(array).all() and len(np.unique(array)) > 0: - raise ValueError( - "Array contains only NaN values." - ) + raise ValueError("Array contains only NaN values.") def _check_array_inf(array: NDArray) -> None: @@ -1453,9 +1416,7 @@ def _check_array_inf(array: NDArray) -> None: If any elements of the array is +inf or -inf. """ if np.isinf(array).any(): - raise ValueError( - "Array contains infinite values." - ) + raise ValueError("Array contains infinite values.") def _check_arrays_length(*arrays: NDArray) -> None: @@ -1474,15 +1435,11 @@ def _check_arrays_length(*arrays: NDArray) -> None: """ res = [array.shape[0] for array in arrays] if len(np.unique(res)) > 1: - raise ValueError( - "There are arrays with different length" - ) + raise ValueError("There are arrays with different length") def _check_n_samples( - X: NDArray, - n_samples: Optional[Union[float, int]], - indices: NDArray + X: NDArray, n_samples: Optional[Union[float, int]], indices: NDArray ) -> int: """ Check alpha and prepare it as a ArrayLike. @@ -1516,26 +1473,26 @@ def _check_n_samples( "The value of n_samples is too small. " "You need to increase it so that n_samples*X.shape[0] > 1" "otherwise n_samples should be an int" - ) + ) else: raise ValueError( "Invalid n_samples. Allowed values " "are float in the range (0.0, 1.0) or" " int in the range [1, inf)" - ) + ) elif isinstance(n_samples, int) and n_samples <= 0: raise ValueError( - "Invalid n_samples. Allowed values " - "are float in the range (0.0, 1.0) or" - " int in the range [1, inf)" - ) + "Invalid n_samples. Allowed values " + "are float in the range (0.0, 1.0) or" + " int in the range [1, inf)" + ) return int(n_samples) def _check_predict_params( predict_params_used_in_fit: bool, predict_params: dict, - cv: Optional[Union[int, str, BaseCrossValidator]] = None + cv: Optional[Union[int, str, BaseCrossValidator]] = None, ) -> None: """ Check that if predict_params is used in the predict method, From bf8c5ac60897509cf7ddfe4c3aedfb0962e62f4d Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 16:53:27 +0100 Subject: [PATCH 10/23] refactor: reformat some lines (to resolve some "make format" error) --- mapie/_venn_abers.py | 1 - mapie/calibration.py | 2 +- mapie/tests/test_venn_abers_calibration.py | 10 ++++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index e1bc73efd..3d58a977e 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -107,7 +107,6 @@ def calc_p0p1(p_cal, y_cal, precision=None): P1 = P[1:] + 1 for i in range(len(p1)): - P1[i, :] = P1[i, :] - 1 if i == 0: diff --git a/mapie/calibration.py b/mapie/calibration.py index 7c738aca5..db8203692 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -798,7 +798,7 @@ def _check_cv(self, cv: Optional[str]) -> Optional[str]: """ if cv in self.valid_cv: return cv - raise ValueError("Invalid cv argument. " f"Allowed values are {self.valid_cv}.") + raise ValueError("Invalid cv argument. Allowed values are {self.valid_cv}.") def fit( self, diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py index e03774583..37728f85c 100644 --- a/mapie/tests/test_venn_abers_calibration.py +++ b/mapie/tests/test_venn_abers_calibration.py @@ -518,7 +518,9 @@ def test_random_state_in_fit_overrides() -> None: probs1 = va_cal1.predict_proba(X_binary_test) va_cal2 = VennAbersCalibrator( - estimator=GaussianNB(), inductive=True, random_state=999 # Different from fit + estimator=GaussianNB(), + inductive=True, + random_state=999 # Different from fit ) va_cal2.fit(X_binary_train, y_binary_train, random_state=123) probs2 = va_cal2.predict_proba(X_binary_test) @@ -1115,9 +1117,9 @@ def test_all_modes_produce_valid_probabilities() -> None: # Check valid probabilities assert np.all(probs >= 0), f"Mode {mode_name} produced negative probabilities" assert np.all(probs <= 1), f"Mode {mode_name} produced probabilities > 1" - assert np.allclose( - probs.sum(axis=1), 1.0 - ), f"Mode {mode_name} probabilities don't sum to 1" + assert np.allclose(probs.sum(axis=1), 1.0), ( + f"Mode {mode_name} probabilities don't sum to 1" + ) # ============================================================================ From 5fc834fc26551d20171dda5a563a99ab5ef3e280 Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 16:59:19 +0100 Subject: [PATCH 11/23] refactor: reformat multiple lines to pass "make format" command --- mapie/tests/test_venn_abers_calibration.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py index 37728f85c..c3d865185 100644 --- a/mapie/tests/test_venn_abers_calibration.py +++ b/mapie/tests/test_venn_abers_calibration.py @@ -520,7 +520,7 @@ def test_random_state_in_fit_overrides() -> None: va_cal2 = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, - random_state=999 # Different from fit + random_state=999, # Different from fit ) va_cal2.fit(X_binary_train, y_binary_train, random_state=123) probs2 = va_cal2.predict_proba(X_binary_test) @@ -1640,7 +1640,9 @@ def test_prefit_with_unfitted_estimator_raises_error() -> None: def test_cross_val_without_n_splits_raises_error() -> None: """Test that cross-validation mode without n_splits raises an error.""" va_cal = VennAbersCalibrator( - estimator=GaussianNB(), inductive=False, n_splits=None # Missing n_splits + estimator=GaussianNB(), + inductive=False, + n_splits=None, # Missing n_splits ) with pytest.raises(ValueError, match=".*please provide n_splits.*"): From ef9b8e687411ea47bdadf6c0a0ce3610497c4b0e Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 17:29:54 +0100 Subject: [PATCH 12/23] refactor: remove unnecessary import in the middile of the function --- mapie/calibration.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mapie/calibration.py b/mapie/calibration.py index db8203692..bd90a6272 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -877,10 +877,8 @@ def fit( X, y = indexable(X, y) y = _check_y(y) sample_weight, X, y = _check_null_weight(sample_weight, X, y) - # Handle categorical features - - from sklearn.pipeline import Pipeline + # Handle categorical features last_estimator = self.estimator X_processed = X From 4d933475d885b94134d17fa01d498a2cee4f04ca Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 17:31:29 +0100 Subject: [PATCH 13/23] fix: a typo in doc string --- mapie/calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapie/calibration.py b/mapie/calibration.py index bd90a6272..a92e8f978 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -633,7 +633,7 @@ class VennAbersCalibrator(BaseEstimator, ClassifierMixin): May be None in prefit mode with multi-class classification. transformers_ : Optional[Pipeline] - Trasnformers from sklearn pipeline to transform categorical attributes. + Transformers from sklearn pipeline to transform categorical attributes. single_estimator_ : Optional[ClassifierMixin] The fitted estimator (only for prefit mode). From c8fcece7d679727e38f98fbcd2fbe302526c159e Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 17:32:27 +0100 Subject: [PATCH 14/23] fix: a typo in doc string --- mapie/_venn_abers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index 3d58a977e..ea894249d 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -513,7 +513,7 @@ class VennAbersCV: inductive : bool True to run the Inductive (IVAP) or False for Cross (CVAP) - Venn-ABERS calibtration + Venn-ABERS calibration n_splits: int, default=5 For CVAP only, number of folds. Must be at least 2. @@ -749,7 +749,7 @@ class VennAbersMultiClass: inductive : bool True to run the Inductive (IVAP) or False for Cross (CVAP) - Venn-ABERS calibtration + Venn-ABERS calibration n_splits: int, default=5 For CVAP only, number of folds. Must be at least 2. From 2a3956c9565070f0b3b110e9251950b0515d50f5 Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 17:34:17 +0100 Subject: [PATCH 15/23] fix: a typo in doc string --- mapie/_venn_abers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index ea894249d..577c2eb61 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -679,7 +679,7 @@ def predict_proba(self, _x_test, loss="log", p0_p1_output=False): Returns ---------- - p_prime: {array-like}, shape (n_samples,n_classses) + p_prime: {array-like}, shape (n_samples,n_classes) Venn-ABERS calibrated probabilities p0_p1: {array-like}, default = None @@ -924,7 +924,7 @@ def predict_proba(self, _x_test, loss="log", p0_p1_output=False): Returns ---------- - p_prime: {array-like}, shape (n_samples,n_classses) + p_prime: {array-like}, shape (n_samples,n_classes) Venn-ABERS calibrated probabilities p0_p1: {array-like}, default = None From 80d2405d0c8b635886f950b5eb7c51c0e42e5008 Mon Sep 17 00:00:00 2001 From: OmG Date: Tue, 28 Oct 2025 17:35:05 +0100 Subject: [PATCH 16/23] fix: a typo in the doc string --- mapie/_venn_abers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index 577c2eb61..3a7c52df1 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -531,7 +531,7 @@ class VennAbersCV: train_proper_size : float or int, default=None For IVAP only, if float, should be between 0.0 and 1.0 and represent the - proportion of the dataset to include in the poroper training set split. If + proportion of the dataset to include in the proper training set split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size. @@ -767,7 +767,7 @@ class VennAbersMultiClass: train_size : float or int, default=None For IVAP only, if float, should be between 0.0 and 1.0 and represent the - proportion of the dataset to include in the poroper training set split. If + proportion of the dataset to include in the proper training set split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size. From 35f384a7ab126b20d25d76cad3a33a95ef83096a Mon Sep 17 00:00:00 2001 From: OmG Date: Wed, 29 Oct 2025 12:36:33 +0100 Subject: [PATCH 17/23] fix: dimension of p_cal was mentioned wrongly in the doc string, in VennAbers --- mapie/_venn_abers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index 3a7c52df1..fd17c06e0 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -47,7 +47,7 @@ def calc_p0p1(p_cal, y_cal, precision=None): Parameters ---------- - p_cal : {array-like}, shape (n_samples,) + p_cal : {array-like}, shape (n_samples, 2) Input data for calibration consisting of calibration set probabilities y_cal : {array-like}, shape (n_samples,) @@ -453,7 +453,7 @@ def fit(self, p_cal, y_cal, precision=None): Parameters ---------- - p_cal : {array-like}, shape (n_samples,) + p_cal : {array-like}, shape (n_samples, 2) Input data for calibration consisting of calibration set probabilities y_cal : {array-like}, shape (n_samples,) From ce8d4ca5def6f98d240f995a452a1f18c0f5cc11 Mon Sep 17 00:00:00 2001 From: OmG Date: Wed, 29 Oct 2025 14:28:18 +0100 Subject: [PATCH 18/23] refactor: change Exceptions to ValueError when makes sense --- mapie/_venn_abers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index fd17c06e0..c39963ec1 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -841,14 +841,14 @@ def fit(self, _x_train, _y_train, sample_weight=None): # integrity checks if not self.inductive and self.n_splits is None: - raise Exception("For Cross Venn ABERS please provide n_splits") + raise ValueError("For Cross Venn ABERS please provide n_splits") try: check_is_fitted(self.estimator) except NotFittedError: if (self.inductive and self.cal_size is None) and ( self.train_proper_size is None ): - raise Exception( + raise ValueError( "For Inductive Venn-ABERS please provide either calibration" "or proper train set size" ) From 3c64f79cda130531ff2fc48077a9ed654cb9f94b Mon Sep 17 00:00:00 2001 From: OmG Date: Wed, 29 Oct 2025 15:17:00 +0100 Subject: [PATCH 19/23] fix: remove setting global config of sklearn --- mapie/_venn_abers.py | 3 --- mapie/tests/test_venn_abers_calibration.py | 7 +++++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index c39963ec1..046e6ee3a 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -1,12 +1,9 @@ import numpy as np -import sklearn from sklearn.model_selection import StratifiedKFold, train_test_split from sklearn.multiclass import OneVsOneClassifier from sklearn.utils.validation import check_is_fitted from sklearn.exceptions import NotFittedError -sklearn.set_config(enable_metadata_routing=True) -np.seterr(divide="ignore", invalid="ignore") """ Private module containing core Venn-ABERS implementation classes. diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py index c3d865185..6b8e4612b 100644 --- a/mapie/tests/test_venn_abers_calibration.py +++ b/mapie/tests/test_venn_abers_calibration.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd import pytest +import sklearn from sklearn.base import ClassifierMixin from sklearn.compose import ColumnTransformer from sklearn.datasets import make_classification @@ -416,6 +417,7 @@ def test_gradient_boosting_with_early_stopping() -> None: def test_sample_weights_none() -> None: """Test that sample_weight=None works correctly.""" + sklearn.set_config(enable_metadata_routing=True) va_cal = VennAbersCalibrator( estimator=GaussianNB(), inductive=True, random_state=random_state ) @@ -427,6 +429,8 @@ def test_sample_weights_none() -> None: def test_sample_weights_constant() -> None: """Test that constant sample weights give same results as None.""" + sklearn.set_config(enable_metadata_routing=True) + n_samples = len(X_binary_train) weighted_estimator = GaussianNB().set_fit_request(sample_weight=True) @@ -457,6 +461,7 @@ def test_sample_weights_constant() -> None: def test_sample_weights_variable() -> None: """Test that variable sample weights affect the results.""" + sklearn.set_config(enable_metadata_routing=True) n_samples = len(X_binary_train) va_cal_uniform = VennAbersCalibrator( @@ -1928,6 +1933,7 @@ def test_error_message_for_invalid_cv() -> None: def test_venn_abers_cv_with_sample_weight() -> None: """Test VennAbersCV with sample weights in cross-validation mode.""" # Create sample weights - higher weights for some samples + sklearn.set_config(enable_metadata_routing=True) sample_weight = np.ones(len(y_binary_train)) sample_weight[: len(y_binary_train) // 2] = 2.0 # Double weight for first half weighted_estimator = GaussianNB().set_fit_request(sample_weight=True) @@ -1961,6 +1967,7 @@ def test_venn_abers_cv_with_sample_weight() -> None: def test_venn_abers_cv_sample_weight_all_folds() -> None: """Test that sample weights are properly used across all CV folds.""" + sklearn.set_config(enable_metadata_routing=True) sample_weight = np.random.RandomState(42).uniform(0.5, 2.0, len(y_binary_train)) weighted_estimator = GaussianNB().set_fit_request(sample_weight=True) va_cal = VennAbersCalibrator( From 8b91f479686e6eec3f18859950f369538e599f0c Mon Sep 17 00:00:00 2001 From: OmG Date: Thu, 30 Oct 2025 10:50:26 +0100 Subject: [PATCH 20/23] fix: handling any types of labels for classes other than indices started from 0 --- mapie/_venn_abers.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index 046e6ee3a..149ddd77d 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -306,22 +306,27 @@ def predict_proba_prefitted_va( p_prime = None multiclass_p0p1 = None + classes = np.unique(y_cal) + class_label_to_idx_map = {label: i for i, label in enumerate(classes)} + if va_tpe == "one_vs_one": - classes = np.unique(y_cal) - class_pairs = [] + class_pairs_labels = [] + classes_pairs_indices = [] for i in range(len(classes) - 1): for j in range(i + 1, len(classes)): - class_pairs.append([classes[i], classes[j]]) + class_pairs_labels.append([classes[i], classes[j]]) + classes_pairs_indices.append([class_label_to_idx_map[classes[i]], + class_label_to_idx_map[classes[j]]]) multiclass_probs = [] multiclass_p0p1 = [] - for i, class_pair in enumerate(class_pairs): + for i, class_pair in enumerate(class_pairs_labels): pairwise_indices = (y_cal == class_pair[0]) + (y_cal == class_pair[1]) - binary_cal_probs = p_cal[:, class_pair][pairwise_indices] / np.sum( - p_cal[:, class_pair][pairwise_indices], axis=1 + binary_cal_probs = p_cal[:, classes_pairs_indices[i]][pairwise_indices] / np.sum( + p_cal[:, classes_pairs_indices[i]][pairwise_indices], axis=1 ).reshape(-1, 1) - binary_test_probs = p_test[:, class_pair] / np.sum( - p_test[:, class_pair], axis=1 + binary_test_probs = p_test[:, classes_pairs_indices[i]] / np.sum( + p_test[:, classes_pairs_indices[i]], axis=1 ).reshape(-1, 1) binary_classes = y_cal[pairwise_indices] == class_pair[1] @@ -340,12 +345,12 @@ def predict_proba_prefitted_va( stack_i = [ p[:, 0].reshape(-1, 1) for i, p in enumerate(multiclass_probs) - if class_pairs[i][0] == cl_id + if class_pairs_labels[i][0] == cl_id ] stack_j = [ p[:, 1].reshape(-1, 1) for i, p in enumerate(multiclass_probs) - if class_pairs[i][1] == cl_id + if class_pairs_labels[i][1] == cl_id ] p_stack = stack_i + stack_j @@ -355,17 +360,15 @@ def predict_proba_prefitted_va( ) else: - classes = np.unique(y_cal) - multiclass_probs = [] multiclass_p0p1 = [] for _, class_id in enumerate(classes): class_indices = y_cal == class_id binary_cal_probs = np.zeros((len(p_cal), 2)) binary_test_probs = np.zeros((len(p_test), 2)) - binary_cal_probs[:, 1] = p_cal[:, class_id] + binary_cal_probs[:, 1] = p_cal[:, class_label_to_idx_map[class_id]] binary_cal_probs[:, 0] = 1 - binary_cal_probs[:, 1] - binary_test_probs[:, 1] = p_test[:, class_id] + binary_test_probs[:, 1] = p_test[:, class_label_to_idx_map[class_id]] binary_test_probs[:, 0] = 1 - binary_test_probs[:, 1] binary_classes = class_indices From 772c79b4de19eb85efa4cec95dc2d5384e854600 Mon Sep 17 00:00:00 2001 From: OmG Date: Thu, 30 Oct 2025 10:57:21 +0100 Subject: [PATCH 21/23] fix: format errors --- mapie/_venn_abers.py | 16 ++++++++++++---- mapie/tests/test_venn_abers_calibration.py | 6 +++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index 149ddd77d..ffce83b82 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -315,16 +315,24 @@ def predict_proba_prefitted_va( for i in range(len(classes) - 1): for j in range(i + 1, len(classes)): class_pairs_labels.append([classes[i], classes[j]]) - classes_pairs_indices.append([class_label_to_idx_map[classes[i]], - class_label_to_idx_map[classes[j]]]) + classes_pairs_indices.append( + [ + class_label_to_idx_map[classes[i]], + class_label_to_idx_map[classes[j]], + ] + ) multiclass_probs = [] multiclass_p0p1 = [] for i, class_pair in enumerate(class_pairs_labels): pairwise_indices = (y_cal == class_pair[0]) + (y_cal == class_pair[1]) - binary_cal_probs = p_cal[:, classes_pairs_indices[i]][pairwise_indices] / np.sum( + binary_cal_probs = p_cal[:, classes_pairs_indices[i]][ + pairwise_indices + ] / np.sum( p_cal[:, classes_pairs_indices[i]][pairwise_indices], axis=1 - ).reshape(-1, 1) + ).reshape( + -1, 1 + ) binary_test_probs = p_test[:, classes_pairs_indices[i]] / np.sum( p_test[:, classes_pairs_indices[i]], axis=1 ).reshape(-1, 1) diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py index 6b8e4612b..ef72cb4da 100644 --- a/mapie/tests/test_venn_abers_calibration.py +++ b/mapie/tests/test_venn_abers_calibration.py @@ -1122,9 +1122,9 @@ def test_all_modes_produce_valid_probabilities() -> None: # Check valid probabilities assert np.all(probs >= 0), f"Mode {mode_name} produced negative probabilities" assert np.all(probs <= 1), f"Mode {mode_name} produced probabilities > 1" - assert np.allclose(probs.sum(axis=1), 1.0), ( - f"Mode {mode_name} probabilities don't sum to 1" - ) + assert np.allclose( + probs.sum(axis=1), 1.0 + ), f"Mode {mode_name} probabilities don't sum to 1" # ============================================================================ From 4dad2f6c56810e8b441c5043c45645c6e8eeb4ae Mon Sep 17 00:00:00 2001 From: OmG Date: Thu, 30 Oct 2025 11:04:35 +0100 Subject: [PATCH 22/23] fix: format error --- mapie/tests/test_venn_abers_calibration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mapie/tests/test_venn_abers_calibration.py b/mapie/tests/test_venn_abers_calibration.py index ef72cb4da..6b8e4612b 100644 --- a/mapie/tests/test_venn_abers_calibration.py +++ b/mapie/tests/test_venn_abers_calibration.py @@ -1122,9 +1122,9 @@ def test_all_modes_produce_valid_probabilities() -> None: # Check valid probabilities assert np.all(probs >= 0), f"Mode {mode_name} produced negative probabilities" assert np.all(probs <= 1), f"Mode {mode_name} produced probabilities > 1" - assert np.allclose( - probs.sum(axis=1), 1.0 - ), f"Mode {mode_name} probabilities don't sum to 1" + assert np.allclose(probs.sum(axis=1), 1.0), ( + f"Mode {mode_name} probabilities don't sum to 1" + ) # ============================================================================ From 5a97e45b252fc8fa2aa6e1d0d6bf0e749f2805d5 Mon Sep 17 00:00:00 2001 From: OmG Date: Thu, 30 Oct 2025 11:12:21 +0100 Subject: [PATCH 23/23] fix: format error (it was proposed by "black" formatter command) --- mapie/_venn_abers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mapie/_venn_abers.py b/mapie/_venn_abers.py index ffce83b82..a68dac684 100644 --- a/mapie/_venn_abers.py +++ b/mapie/_venn_abers.py @@ -330,9 +330,7 @@ def predict_proba_prefitted_va( pairwise_indices ] / np.sum( p_cal[:, classes_pairs_indices[i]][pairwise_indices], axis=1 - ).reshape( - -1, 1 - ) + ).reshape(-1, 1) binary_test_probs = p_test[:, classes_pairs_indices[i]] / np.sum( p_test[:, classes_pairs_indices[i]], axis=1 ).reshape(-1, 1)