|
20 | 20 | from ._typing import ArrayLike |
21 | 21 | from .aggregation_functions import aggregate_all, phi2D |
22 | 22 | from .subsample import Subsample |
23 | | -from .dre import DensityRatioEstimator, ProbClassificationDRE |
24 | 23 | from .utils import ( |
25 | 24 | check_cv, |
26 | 25 | check_alpha, |
|
30 | 29 | check_nan_in_aposteriori_prediction, |
31 | 30 | check_null_weight, |
32 | 31 | check_verbose, |
33 | | - fit_estimator, |
34 | | - empirical_quantile |
| 32 | + fit_estimator |
35 | 33 | ) |
36 | 34 |
|
37 | 35 |
|
@@ -678,360 +676,3 @@ def predict( |
678 | 676 | if ensemble: |
679 | 677 | y_pred = aggregate_all(self.agg_function, y_pred_multi) |
680 | 678 | return y_pred, np.stack([y_pred_low, y_pred_up], axis=1) |
681 | | - |
682 | | - |
683 | | -class MapieCovShiftRegressor(MapieRegressor): # type: ignore |
684 | | - """ |
685 | | - Prediction interval with out-of-fold residuals. |
686 | | -
|
687 | | - This class implements the jackknife+ strategy and its variations |
688 | | - for estimating prediction intervals on single-output data. The |
689 | | - idea is to evaluate out-of-fold residuals on hold-out validation |
690 | | - sets and to deduce valid confidence intervals with strong theoretical |
691 | | - guarantees. |
692 | | -
|
693 | | - Parameters |
694 | | - ---------- |
695 | | - estimator : Optional[RegressorMixin] |
696 | | - Any regressor with scikit-learn API |
697 | | - (i.e. with fit and predict methods), by default ``None``. |
698 | | - If ``None``, estimator defaults to a ``LinearRegression`` instance. |
699 | | -
|
700 | | - dr_estimator : Optional[DensityRatioEstimator] |
701 | | - Any density ratio estimator with scikit-learn API |
702 | | - (i.e. with fit and predict methods), by default ``None``. |
703 | | - If ``None``, dr_estimator defaults to a ``ProbClassificationDRE`` |
704 | | - instance with ``LogisticRegression`` model. |
705 | | -
|
706 | | - method: str, optional |
707 | | - Method to choose for prediction interval estimates. |
708 | | - Choose among: |
709 | | -
|
710 | | - - "naive", based on training set residuals, |
711 | | - - "base", based on validation sets residuals, |
712 | | - - "plus", based on validation residuals and testing predictions, |
713 | | - - "minmax", based on validation residuals and testing predictions |
714 | | - (min/max among cross-validation clones). |
715 | | -
|
716 | | - By default "plus". |
717 | | -
|
718 | | - cv: Optional[Union[int, str, BaseCrossValidator]] |
719 | | - The cross-validation strategy for computing residuals. |
720 | | - It directly drives the distinction between jackknife and cv variants. |
721 | | - Choose among: |
722 | | -
|
723 | | - - ``None``, to use the default 5-fold cross-validation |
724 | | - - integer, to specify the number of folds. |
725 | | - If equal to -1, equivalent to |
726 | | - ``sklearn.model_selection.LeaveOneOut()``. |
727 | | - - CV splitter: any ``sklearn.model_selection.BaseCrossValidator`` |
728 | | - Main variants are: |
729 | | - - ``sklearn.model_selection.LeaveOneOut`` (jackknife), |
730 | | - - ``sklearn.model_selection.KFold`` (cross-validation), |
731 | | - - ``subsample.Subsample`` object (bootstrap). |
732 | | - - ``"prefit"``, assumes that ``estimator`` has been fitted already, |
733 | | - and the ``method`` parameter is ignored. |
734 | | - All data provided in the ``fit`` method is then used |
735 | | - for computing residuals only. |
736 | | - At prediction time, quantiles of these residuals are used to provide |
737 | | - a prediction interval with fixed width. |
738 | | - The user has to take care manually that data for model fitting and |
739 | | - residual estimate are disjoint. |
740 | | -
|
741 | | - By default ``None``. |
742 | | -
|
743 | | - n_jobs: Optional[int] |
744 | | - Number of jobs for parallel processing using joblib |
745 | | - via the "locky" backend. |
746 | | - If ``-1`` all CPUs are used. |
747 | | - If ``1`` is given, no parallel computing code is used at all, |
748 | | - which is useful for debugging. |
749 | | - For n_jobs below ``-1``, ``(n_cpus + 1 - n_jobs)`` are used. |
750 | | - None is a marker for `unset` that will be interpreted as ``n_jobs=1`` |
751 | | - (sequential execution). |
752 | | -
|
753 | | - By default ``None``. |
754 | | -
|
755 | | - agg_function : str |
756 | | - Determines how to aggregate predictions from perturbed models, both at |
757 | | - training and prediction time. |
758 | | -
|
759 | | - If ``None``, it is ignored except if cv class is ``Subsample``, |
760 | | - in which case an error is raised. |
761 | | - If "mean" or "median", returns the mean or median of the predictions |
762 | | - computed from the out-of-folds models. |
763 | | - Note: if you plan to set the ``ensemble`` argument to ``True`` in the |
764 | | - ``predict`` method, you have to specify an aggregation function. |
765 | | - Otherwise an error would be raised. |
766 | | -
|
767 | | - The Jackknife+ interval can be interpreted as an interval around the |
768 | | - median prediction, and is guaranteed to lie inside the interval, |
769 | | - unlike the single estimator predictions. |
770 | | -
|
771 | | - When the cross-validation strategy is Subsample (i.e. for the |
772 | | - Jackknife+-after-Bootstrap method), this function is also used to |
773 | | - aggregate the training set in-sample predictions. |
774 | | -
|
775 | | - If cv is ``"prefit"``, ``agg_function`` is ignored. |
776 | | -
|
777 | | - By default "mean". |
778 | | -
|
779 | | - verbose : int, optional |
780 | | - The verbosity level, used with joblib for multiprocessing. |
781 | | - The frequency of the messages increases with the verbosity level. |
782 | | - If it more than ``10``, all iterations are reported. |
783 | | - Above ``50``, the output is sent to stdout. |
784 | | -
|
785 | | - By default ``0``. |
786 | | -
|
787 | | - Attributes |
788 | | - ---------- |
789 | | - valid_methods: List[str] |
790 | | - List of all valid methods. |
791 | | -
|
792 | | - single_estimator_ : sklearn.RegressorMixin |
793 | | - Estimator fitted on the whole training set. |
794 | | -
|
795 | | - estimators_ : list |
796 | | - List of out-of-folds estimators. |
797 | | -
|
798 | | - residuals_ : ArrayLike of shape (n_samples_train,) |
799 | | - Residuals between ``y_train`` and ``y_pred``. |
800 | | -
|
801 | | - k_ : ArrayLike |
802 | | - - Array of nans, of shape (len(y), 1) if cv is ``"prefit"`` |
803 | | - (defined but not used) |
804 | | - - Dummy array of folds containing each training sample, otherwise. |
805 | | - Of shape (n_samples_train, cv.get_n_splits(X_train, y_train)). |
806 | | -
|
807 | | - n_features_in_: int |
808 | | - Number of features passed to the fit method. |
809 | | -
|
810 | | - n_samples_: List[int] |
811 | | - Number of samples passed to the fit method. |
812 | | -
|
813 | | - References |
814 | | - ---------- |
815 | | -
|
816 | | - Examples |
817 | | - -------- |
818 | | -
|
819 | | - """ |
820 | | - valid_methods_ = ["naive", "base"] |
821 | | - valid_agg_functions_ = [None, "median", "mean"] |
822 | | - fit_attributes = [ |
823 | | - "single_estimator_", |
824 | | - "estimators_", |
825 | | - "k_", |
826 | | - "residuals_", |
827 | | - "residuals_dre_", |
828 | | - "n_features_in_", |
829 | | - "n_samples_", |
830 | | - ] |
831 | | - |
832 | | - def __init__( |
833 | | - self, |
834 | | - estimator: Optional[RegressorMixin] = None, |
835 | | - dr_estimator: Optional[DensityRatioEstimator] = None, |
836 | | - method: str = "base", |
837 | | - cv: Optional[Union[int, str, BaseCrossValidator]] = None, |
838 | | - n_jobs: Optional[int] = None, |
839 | | - agg_function: Optional[str] = "mean", |
840 | | - verbose: int = 0, |
841 | | - ) -> None: |
842 | | - self.dr_estimator = dr_estimator |
843 | | - if cv != "prefit": |
844 | | - raise NotImplementedError |
845 | | - super().__init__( |
846 | | - estimator=estimator, |
847 | | - method=method, |
848 | | - cv=cv, |
849 | | - n_jobs=n_jobs, |
850 | | - agg_function=agg_function, |
851 | | - verbose=verbose, |
852 | | - ) |
853 | | - |
854 | | - def _check_dr_estimator( |
855 | | - self, |
856 | | - dr_estimator: Optional[DensityRatioEstimator] = None |
857 | | - ) -> DensityRatioEstimator: |
858 | | - """ |
859 | | - Check if estimator is ``None``, and returns a ``ProbClassificationDRE`` |
860 | | - instance with ``LogisticRegression`` model if necessary. |
861 | | - If the ``cv`` attribute is ``"prefit"``, check if estimator is indeed |
862 | | - already fitted. |
863 | | -
|
864 | | - Parameters |
865 | | - ---------- |
866 | | - dr_estimator : Optional[DensityRatioEstimator], optional |
867 | | - Estimator to check, by default ``None``. |
868 | | -
|
869 | | - Returns |
870 | | - ------- |
871 | | - DensityRatioEstimator |
872 | | - The estimator itself or a default ``ProbClassificationDRE`` |
873 | | - instance with ``LogisticRegression`` model. |
874 | | -
|
875 | | - Raises |
876 | | - ------ |
877 | | - ValueError |
878 | | - If the estimator is not ``None`` |
879 | | - and has no fit nor predict methods. |
880 | | -
|
881 | | - NotFittedError |
882 | | - If the estimator is not fitted and ``cv`` attribute is "prefit". |
883 | | - """ |
884 | | - if dr_estimator is None: |
885 | | - return ProbClassificationDRE(clip_min=0.01, clip_max=0.99) |
886 | | - if not (hasattr(dr_estimator, "fit") and |
887 | | - hasattr(dr_estimator, "predict")): |
888 | | - raise ValueError( |
889 | | - "Invalid estimator. " |
890 | | - "Please provide a density ratio estimator with fit" |
891 | | - "and predict methods." |
892 | | - ) |
893 | | - if self.cv == "prefit": |
894 | | - dr_estimator.check_is_fitted() |
895 | | - |
896 | | - return dr_estimator |
897 | | - |
898 | | - def fit( |
899 | | - self, |
900 | | - X: ArrayLike, |
901 | | - y: ArrayLike, |
902 | | - sample_weight: Optional[ArrayLike] = None, |
903 | | - ) -> MapieRegressor: |
904 | | - """ |
905 | | - Fit estimator and compute residuals used for prediction intervals. |
906 | | - Fit the base estimator under the ``single_estimator_`` attribute. |
907 | | - Fit all cross-validated estimator clones |
908 | | - and rearrange them into a list, the ``estimators_`` attribute. |
909 | | - Out-of-fold residuals are stored under the ``residuals_`` attribute. |
910 | | -
|
911 | | - Parameters |
912 | | - ---------- |
913 | | - X : ArrayLike of shape (n_samples, n_features) |
914 | | - Training data. |
915 | | -
|
916 | | - y : ArrayLike of shape (n_samples,) |
917 | | - Training labels. |
918 | | -
|
919 | | - sample_weight : Optional[ArrayLike] of shape (n_samples,) |
920 | | - Sample weights for fitting the out-of-fold models. |
921 | | - If None, then samples are equally weighted. |
922 | | - If some weights are null, |
923 | | - their corresponding observations are removed |
924 | | - before the fitting process and hence have no residuals. |
925 | | - If weights are non-uniform, residuals are still uniformly weighted. |
926 | | -
|
927 | | - By default ``None``. |
928 | | -
|
929 | | - Returns |
930 | | - ------- |
931 | | - MapieRegressor |
932 | | - The model itself. |
933 | | - """ |
934 | | - super().fit(X=X, y=y, sample_weight=sample_weight) |
935 | | - self.residuals_dre_ = self.dr_estimator.predict(X) |
936 | | - |
937 | | - def predict( |
938 | | - self, |
939 | | - X: ArrayLike, |
940 | | - ensemble: bool = False, |
941 | | - alpha: Optional[Union[float, Iterable[float]]] = None, |
942 | | - ) -> Union[ArrayLike, Tuple[ArrayLike, ArrayLike]]: |
943 | | - """ |
944 | | - Predict target on new samples with confidence intervals. |
945 | | - Residuals from the training set and predictions from the model clones |
946 | | - are central to the computation. |
947 | | - Prediction Intervals for a given ``alpha`` are deduced from either |
948 | | -
|
949 | | - - quantiles of residuals (naive and base methods), |
950 | | - - quantiles of (predictions +/- residuals) (plus method), |
951 | | - - quantiles of (max/min(predictions) +/- residuals) (minmax method). |
952 | | -
|
953 | | - Parameters |
954 | | - ---------- |
955 | | - X : ArrayLike of shape (n_samples, n_features) |
956 | | - Test data. |
957 | | -
|
958 | | - ensemble: bool |
959 | | - Boolean determining whether the predictions are ensembled or not. |
960 | | - If False, predictions are those of the model trained on the whole |
961 | | - training set. |
962 | | - If True, predictions from perturbed models are aggregated by |
963 | | - the aggregation function specified in the ``agg_function`` |
964 | | - attribute. |
965 | | -
|
966 | | - If cv is ``"prefit"``, ``ensemble`` is ignored. |
967 | | -
|
968 | | - By default ``False``. |
969 | | -
|
970 | | - alpha: Optional[Union[float, Iterable[float]]] |
971 | | - Can be a float, a list of floats, or a ``ArrayLike`` of floats. |
972 | | - Between 0 and 1, represents the uncertainty of the confidence |
973 | | - interval. |
974 | | - Lower ``alpha`` produce larger (more conservative) prediction |
975 | | - intervals. |
976 | | - ``alpha`` is the complement of the target coverage level. |
977 | | -
|
978 | | - By default ``None``. |
979 | | -
|
980 | | - Returns |
981 | | - ------- |
982 | | - Union[ArrayLike, Tuple[ArrayLike, ArrayLike]] |
983 | | -
|
984 | | - - ArrayLike of shape (n_samples,) if alpha is None. |
985 | | -
|
986 | | - - Tuple[ArrayLike, ArrayLike] of shapes |
987 | | - (n_samples,) and (n_samples, 2, n_alpha) if alpha is not None. |
988 | | -
|
989 | | - - [:, 0, :]: Lower bound of the prediction interval. |
990 | | - - [:, 1, :]: Upper bound of the prediction interval. |
991 | | - """ |
992 | | - # Checks |
993 | | - check_is_fitted(self, self.fit_attributes) |
994 | | - self._check_ensemble(ensemble) |
995 | | - alpha_ = check_alpha(alpha) |
996 | | - |
997 | | - y_pred = self.single_estimator_.predict(X) |
998 | | - dre_pred = self.dr_estimator.predict(X) |
999 | | - dre_calib = self.residuals_dre_ |
1000 | | - |
1001 | | - if alpha is None: |
1002 | | - return np.array(y_pred) |
1003 | | - else: |
1004 | | - alpha_ = cast(ArrayLike, alpha_) |
1005 | | - check_alpha_and_n_samples(alpha_, self.residuals_.shape[0]) |
1006 | | - if self.method in ["naive", "base"] or self.cv == "prefit": |
1007 | | - |
1008 | | - # Denominator in weight calculation (array; differs based |
1009 | | - # on each test point) |
1010 | | - denom = dre_calib.sum() + dre_pred |
1011 | | - |
1012 | | - y_pred_low = np.empty( |
1013 | | - (y_pred.shape[0], len(alpha_)), dtype=y_pred.dtype) |
1014 | | - y_pred_up = np.empty_like(y_pred_low, dtype=y_pred.dtype) |
1015 | | - for i in range(dre_pred.shape[0]): |
1016 | | - |
1017 | | - # Numerator in weight calculation |
1018 | | - # Calibration (array) |
1019 | | - cal_weights = dre_calib / denom[i] |
1020 | | - # Test (float) |
1021 | | - test_weight = dre_pred[i] / denom[i] |
1022 | | - |
1023 | | - # Calculate the quantile for constructing interval |
1024 | | - quantile = empirical_quantile( |
1025 | | - np.hstack([self.residuals_, np.array([np.inf])]), |
1026 | | - alphas=1-alpha_, |
1027 | | - weights=np.hstack( |
1028 | | - [cal_weights, np.array([test_weight])]), |
1029 | | - ) |
1030 | | - |
1031 | | - y_pred_low[i, :] = y_pred[i] - quantile |
1032 | | - y_pred_up[i, :] = y_pred[i] + quantile |
1033 | | - |
1034 | | - else: |
1035 | | - raise NotImplementedError |
1036 | | - |
1037 | | - return y_pred, np.stack([y_pred_low, y_pred_up], axis=1) |
0 commit comments