From d7d34aec206020c736c1aa1c76bc998c639d6455 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Bostr=C3=B6m?=
 <henrikbostrom@users.noreply.github.com>
Date: Tue, 28 Jun 2022 16:25:49 +0200
Subject: [PATCH] crepes 0.1.0

---
 src/crepes/__init__.py |   9 +-
 src/crepes/base.py     | 500 ++++++++++++++++++++++++++++++-----------
 src/crepes/fillings.py | 139 ++++++++++--
 3 files changed, 497 insertions(+), 151 deletions(-)

diff --git a/src/crepes/__init__.py b/src/crepes/__init__.py
index e4ece93..19a425a 100644
--- a/src/crepes/__init__.py
+++ b/src/crepes/__init__.py
@@ -1,14 +1,15 @@
 """Conformal regressors and predictive systems (crepes)
 
-Routines that implement conformal regressors and conformal predictive
+Classes implementing conformal regressors and conformal predictive
 systems, which transform point predictions into prediction intervals
-and cumulative distributions, respectively.
+and cumulative distribution functions, respectively.
 
 Author: Henrik Boström (bostromh@kth.se)
 
-Copyright 2021 Henrik Boström
+Copyright 2022 Henrik Boström
 
 License: BSD 3 clause
 """
-from crepes.base import ConformalRegressor, ConformalPredictiveSystem
+from crepes.base import ConformalRegressor, ConformalPredictiveSystem, __version__
+
 
diff --git a/src/crepes/base.py b/src/crepes/base.py
index 33c1cde..225ccbd 100644
--- a/src/crepes/base.py
+++ b/src/crepes/base.py
@@ -1,29 +1,30 @@
 """Conformal regressors and predictive systems (crepes)
 
-Routines that implement conformal regressors and conformal predictive
+Classes implementing conformal regressors and conformal predictive
 systems, which transform point predictions into prediction intervals
-and cumulative distributions, respectively.
+and cumulative distribution functions, respectively.
 
 Author: Henrik Boström (bostromh@kth.se)
 
-Copyright 2021 Henrik Boström
+Copyright 2022 Henrik Boström
 
 License: BSD 3 clause
 """
 
-# To do:
-#
-# - error messages
-# - commenting and documentation 
-# - test for uniformity of p-values (in evaluate)
-
 import numpy as np
 import pandas as pd
 import time
 
+__version__ = "0.1.0"
 
 class ConformalPredictor():
+    """
+    Conformal Predictor.
 
+    The class contains two sub-classes: ConformalRegressor 
+    and ConformalPredictiveSystem.
+    
+    """
     
     def __init__(self):
         self.alphas = None
@@ -39,16 +40,18 @@ class ConformalRegressor(ConformalPredictor):
     """
     Conformal Regressor.
 
-    A conformal regressor transforms point predictions (regression values) into
-    prediction intervals, for a certain confidence level.
+    A conformal regressor transforms point predictions (regression 
+    values) into prediction intervals, for a certain confidence level.  
     
     """
     
     def __repr__(self):
         if self.fitted:
-            return "ConformalRegressor(fitted={}, normalized={}, mondrian={})".format(self.fitted, self.normalized, self.mondrian)
+            return (f"ConformalRegressor(fitted={self.fitted}, "
+                    f"normalized={self.normalized}, "
+                    f"mondrian={self.mondrian})")
         else:
-            return "ConformalRegressor(fitted={})".format(self.fitted)
+            return f"ConformalRegressor(fitted={self.fitted})"
     
     def fit(self, residuals=None, sigmas=None, bins=None):
         """
@@ -56,12 +59,12 @@ def fit(self, residuals=None, sigmas=None, bins=None):
 
         Parameters
         ----------
-        residuals : array-like of shape (n_values,)
-            Residuals; actual - predicted
-        sigmas: array-like of shape (n_values,)
-            Sigmas; difficulty estimates
-        bins : array-like of shape (n_values,)
-            Bins; Mondrian categories
+        residuals : array-like of shape (n_values,), default=None
+            actual values - predicted values
+        sigmas: array-like of shape (n_values,), default=None
+            difficulty estimates
+        bins : array-like of shape (n_values,), default=None
+            Mondrian categories
 
         Returns
         -------
@@ -84,38 +87,42 @@ def fit(self, residuals=None, sigmas=None, bins=None):
             bin_values = np.unique(bins)
             if sigmas is None:            
                 self.normalized = False
-                self.alphas = (bin_values,[np.sort(abs_residuals[bins==b])[::-1] for b in bin_values])
+                self.alphas = (bin_values,[np.sort(
+                    abs_residuals[bins==b])[::-1] for b in bin_values])
             else:
                 self.normalized = True
-                self.alphas = (bin_values, [np.sort(abs_residuals[bins==b]/sigmas[bins==b])[::-1] for b in bin_values])                
+                self.alphas = (bin_values, [np.sort(
+                    abs_residuals[bins==b]/sigmas[bins==b])[::-1]
+                                           for b in bin_values])                
         self.fitted = True
         toc = time.time()
         self.time_fit = toc-tic
         return self
 
-    def predict(self, y_hat=None, sigmas=None, bins=None, confidence=0.95, y_min=-np.inf, y_max=np.inf):
+    def predict(self, y_hat=None, sigmas=None, bins=None, confidence=0.95,
+                y_min=-np.inf, y_max=np.inf):
         """
-        Predict using the conformal regressor.
+        Predict using conformal regressor.
 
         Parameters
         ----------
-        y_hat : array-like of shape (n_values,)
-            predicted (regression) values
-        sigmas : array-like of shape (n_values,)
-            Sigmas; difficulty estimates
-        bins : array-like of shape (n_values,)
-            Bins; Mondrian categories
-        confidence : float in range (0,1), default = 0.95
-            The confidence level.
-        y_min : float or int, default = -np.inf
-            The minimum value to include in prediction intervals.
-        y_max : float or int, default = np.inf
-            The maximum value to include in prediction intervals.
+        y_hat : array-like of shape (n_values,), default=None
+            predicted values
+        sigmas : array-like of shape (n_values,), default=None
+            difficulty estimates
+        bins : array-like of shape (n_values,), default=None
+            Mondrian categories
+        confidence : float in range (0,1), default=0.95
+            confidence level
+        y_min : float or int, default=-np.inf
+            minimum value to include in prediction intervals
+        y_max : float or int, default=np.inf
+            maximum value to include in prediction intervals
 
         Returns
         -------
         intervals : ndarray of shape (n_values, 2)
-            Prediction intervals.
+            prediction intervals
         """
 
         tic = time.time()
@@ -131,21 +138,31 @@ def predict(self, y_hat=None, sigmas=None, bins=None, confidence=0.95, y_min=-np
                     intervals[:,0] = y_hat-alpha
                     intervals[:,1] = y_hat+alpha
             else:
-                intervals[:,0] = -np.inf # If the no. of calibration instances is too small for the chosen confidence level, 
-                intervals[:,1] = np.inf  # then the intervals will be of maximum size
+                intervals[:,0] = -np.inf 
+                intervals[:,1] = np.inf
+                # If the no. of calibration examples is too small for
+                # the chosen confidence level, then the intervals will
+                # be of maximum size
         else:           
             bin_values, bin_alphas = self.alphas
             bin_indexes = [np.argwhere(bins == b).T[0] for b in bin_values]
-            alpha_indexes = [int((1-confidence)*(len(bin_alphas[b])+1))-1 for b in range(len(bin_values))]
-            bin_alpha = [bin_alphas[b][alpha_indexes[b]] if alpha_indexes[b]>=0 else np.inf for b in range(len(bin_values))]
+            alpha_indexes = [int((1-confidence)*(len(bin_alphas[b])+1))-1
+                             for b in range(len(bin_values))]
+            bin_alpha = [bin_alphas[b][alpha_indexes[b]]
+                         if alpha_indexes[b]>=0 else np.inf
+                         for b in range(len(bin_values))]
             if self.normalized:
                 for b in range(len(bin_values)):
-                    intervals[bin_indexes[b],0] = y_hat[bin_indexes[b]]-bin_alpha[b]*sigmas[bin_indexes[b]]
-                    intervals[bin_indexes[b],1] = y_hat[bin_indexes[b]]+bin_alpha[b]*sigmas[bin_indexes[b]]
+                    intervals[bin_indexes[b],0] = y_hat[bin_indexes[b]] \
+                        - bin_alpha[b]*sigmas[bin_indexes[b]]
+                    intervals[bin_indexes[b],1] = y_hat[bin_indexes[b]] \
+                        + bin_alpha[b]*sigmas[bin_indexes[b]]
             else:
                 for b in range(len(bin_values)):
-                    intervals[bin_indexes[b],0] = y_hat[bin_indexes[b]]-bin_alpha[b]
-                    intervals[bin_indexes[b],1] = y_hat[bin_indexes[b]]+bin_alpha[b]                
+                    intervals[bin_indexes[b],0] = y_hat[bin_indexes[b]] \
+                        - bin_alpha[b]
+                    intervals[bin_indexes[b],1] = y_hat[bin_indexes[b]] \
+                        + bin_alpha[b]                
         if y_min > -np.inf:
             intervals[intervals<y_min] = y_min
         if y_max < np.inf:
@@ -154,31 +171,33 @@ def predict(self, y_hat=None, sigmas=None, bins=None, confidence=0.95, y_min=-np
         self.time_predict = toc-tic            
         return intervals
 
-    def evaluate(self, y_hat=None, y=None, sigmas=None, bins=None, confidence=0.95, y_min=-np.inf, y_max=np.inf, metrics=None):
+    def evaluate(self, y_hat=None, y=None, sigmas=None, bins=None,
+                 confidence=0.95, y_min=-np.inf, y_max=np.inf, metrics=None):
         """
-        Evaluate the conformal regressor.
+        Evaluate conformal regressor.
 
         Parameters
         ----------
-        y_hat : array-like of shape (n_values,)
-            predicted (regression) values
-        sigmas : array-like of shape (n_values,)
-            Sigmas; difficulty estimates
-        bins : array-like of shape (n_values,)
-            Bins; Mondrian categories
-        confidence : float in range (0,1), default = 0.95
-            The confidence level.
-        y_min : float or int, default = -np.inf
-            The minimum value to include in prediction intervals.
-        y_max : float or int, default = np.inf
-            The maximum value to include in prediction intervals.
-        metrics : a string or a list of strings, default = list of all metrics
-            Evaluation metrics: "error","efficiency", "time_fit","time_evaluate"
+        y_hat : array-like of shape (n_values,), default=None
+            predicted values
+        sigmas : array-like of shape (n_values,), default=None
+            difficulty estimates
+        bins : array-like of shape (n_values,), default=None
+            Mondrian categories
+        confidence : float in range (0,1), default=0.95
+            confidence level
+        y_min : float or int, default=-np.inf
+            minimum value to include in prediction intervals
+        y_max : float or int, default=np.inf
+            maximum value to include in prediction intervals
+        metrics : a string or a list of strings, 
+                  default=list of all metrics, i.e., 
+                  ["error", "efficiency", "time_fit", "time_evaluate"]
         
         Returns
         -------
         results : dictionary with a key for each selected metric 
-            Estimated performance using the metrics.
+            estimated performance using the metrics
         """
 
         tic = time.time()
@@ -187,7 +206,8 @@ def evaluate(self, y_hat=None, y=None, sigmas=None, bins=None, confidence=0.95,
         test_results = {}
         intervals = self.predict(y_hat, sigmas, bins, confidence, y_min, y_max)
         if "error" in metrics:
-            test_results["error"] = 1-np.mean(np.logical_and(intervals[:,0]<=y,y<=intervals[:,1]))
+            test_results["error"] = 1-np.mean(
+                np.logical_and(intervals[:,0]<=y,y<=intervals[:,1]))
         if "efficiency" in metrics:            
             test_results["efficiency"] = np.mean(intervals[:,1]-intervals[:,0])
         if "time_fit" in metrics:
@@ -202,17 +222,20 @@ class ConformalPredictiveSystem(ConformalPredictor):
     """
     Conformal Predictive System.
 
-    A conformal predictive system transforms point predictions (regression values) into
-    cumulative distributions (conformal predictive distributions).
+    A conformal predictive system transforms point predictions 
+    (regression values) into cumulative distributions (conformal 
+    predictive distributions).
     
     """
     
 
     def __repr__(self):
         if self.fitted:
-            return "ConformalPredictiveSystem(fitted={}, normalized={}, mondrian={})".format(self.fitted, self.normalized, self.mondrian)
+            return (f"ConformalPredictiveSystem(fitted={self.fitted}, "
+                    f"normalized={self.normalized}, "
+                    f"mondrian={self.mondrian})")
         else:
-            return "ConformalPredictiveSystem(fitted={})".format(self.fitted)
+            return f"ConformalPredictiveSystem(fitted={self.fitted})"
 
     def fit(self, residuals=None, sigmas=None, bins=None):
         """
@@ -220,12 +243,12 @@ def fit(self, residuals=None, sigmas=None, bins=None):
 
         Parameters
         ----------
-        residuals : array-like of shape (n_values,)
-            Residuals; actual - predicted
-        sigmas: array-like of shape (n_values,)
-            Sigmas; difficulty estimates
-        bins : array-like of shape (n_values,)
-            Bins; Mondrian categories
+        residuals : array-like of shape (n_values,), default=None
+            actual values - predicted values
+        sigmas: array-like of shape (n_values,), default=None
+            difficulty estimates
+        bins : array-like of shape (n_values,), default=None
+            Mondrian categories
 
         Returns
         -------
@@ -247,96 +270,251 @@ def fit(self, residuals=None, sigmas=None, bins=None):
             bin_values = np.unique(bins)
             if sigmas is None:            
                 self.normalized = False
-                self.alphas = (bin_values,[np.sort(residuals[bins==b]) for b in bin_values])
+                self.alphas = (bin_values, [np.sort(
+                    residuals[bins==b]) for b in bin_values])
             else:
                 self.normalized = True
-                self.alphas = (bin_values, [np.sort(residuals[bins==b]/sigmas[bins==b]) for b in bin_values])                
+                self.alphas = (bin_values, [np.sort(
+                    residuals[bins==b]/sigmas[bins==b]) for b in bin_values])                
         self.fitted = True
         toc = time.time()
         self.time_fit = toc-tic
         return self
         
     def predict(self, y_hat=None, sigmas=None, bins=None,
-                y=[], lower_percentiles=[], higher_percentiles=[], y_min=-np.inf,
-                y_max=np.inf, return_cpds=False):    
+                y=None, lower_percentiles=None, higher_percentiles=None,
+                y_min=-np.inf, y_max=np.inf, return_cpds=False,
+                cpds_by_bins=False):    
+        """
+        Predict using conformal predictive system.
+
+        Parameters
+        ----------
+        y_hat : array-like of shape (n_values,), default=None
+            predicted values
+        sigmas : array-like of shape (n_values,), default=None
+            difficulty estimates
+        bins : array-like of shape (n_values,), default=None
+            Mondrian categories
+        y : float, int or array-like of shape (n_values,), default=None
+            values for which cumulative probabilities should be returned
+        lower_percentiles : array-like of shape (l_values,), default=None
+            percentiles for which a lower value will be output 
+            in case a percentile lies between two values
+            (similar to `interpolation="lower"` in `numpy.percentile`)
+        higher_percentiles : array-like of shape (h_values,), default=None
+            percentiles for which a higher value will be output 
+            in case a percentile lies between two values
+            (similar to `interpolation="higher"` in `numpy.percentile`)
+        y_min : float or int, default=-np.inf
+            The minimum value to include in prediction intervals.
+        y_max : float or int, default=np.inf
+            The maximum value to include in prediction intervals.
+        return_cpds : Boolean, default=False
+            specifies whether conformal predictive distributions (cpds)
+            should be output or not
+        cpds_by_bins : Boolean, default=False
+            specifies whether the output cpds should be grouped by bin or not; 
+            only applicable when bins is not None and return_cpds = True
+
+        Returns
+        -------
+
+        results : ndarray of shape (n_values, p_values+l_values+z_values)
+            where p_values = 1 if y is not None and 0 otherwise. A matrix
+            where the first column contains p-values, if p_values = 1,
+            the following l_values columns contain lower percentiles, and
+            the following h_values columns contain higher percentiles.
+            Only returned if p_values + l_values + z_values > 0.
+        cpds : ndarray of (n_values, c_values), ndarray of (n_values,)
+               or list of ndarrays
+            conformal predictive distributions. Only returned if 
+            return_cpds == True. If bins is None, the distributions are
+            represented by a single array, where the number of columns
+            (c_values) is determined by the number of residuals of the fitted
+            conformal predictive system. Otherwise, the distributions
+            are represented by a vector of arrays, if cpds_by_bins = False,
+            or a list of arrays, with one element for each bin, if 
+            cpds_by_bins = True.
+        """
+
         tic = time.time()
         if not self.mondrian:
                 if self.normalized:
-                    cpds = np.array([y_hat[i]+sigmas[i]*self.alphas for i in range(len(y_hat))])
+                    cpds = np.array([y_hat[i]+sigmas[i]*self.alphas
+                                     for i in range(len(y_hat))])
                 else:
-                    cpds = np.array([y_hat[i]+self.alphas for i in range(len(y_hat))])
+                    cpds = np.array([y_hat[i]+self.alphas
+                                     for i in range(len(y_hat))])
         else:           
             bin_values, bin_alphas = self.alphas
             bin_indexes = [np.argwhere(bins == b).T[0] for b in bin_values]
             if self.normalized:
-                cpds = [np.array([y_hat[i]+sigmas[i]*bin_alphas[b] for i in bin_indexes[b]]) for b in range(len(bin_values))]
+                cpds = [np.array([y_hat[i]+sigmas[i]*bin_alphas[b]
+                                  for i in bin_indexes[b]])
+                        for b in range(len(bin_values))]
             else:
-                cpds = [np.array([y_hat[i]+bin_alphas[b] for i in bin_indexes[b]]) for b in range(len(bin_values))]
+                cpds = [np.array([y_hat[i]+bin_alphas[b] for
+                                  i in bin_indexes[b]])
+                        for b in range(len(bin_values))]
         no_prec_result_cols = 0
-        if type(lower_percentiles) == int or type(lower_percentiles) == float:
+        if type(lower_percentiles) in [int, float]:
             lower_percentiles = [lower_percentiles]
-        if type(higher_percentiles) == int or type(higher_percentiles) == float:
+        if type(higher_percentiles) in [int, float]:
             higher_percentiles = [higher_percentiles]
-        no_result_columns = (y != [])+len(lower_percentiles)+len(higher_percentiles)
+        if lower_percentiles is None:
+            lower_percentiles = []
+        if higher_percentiles is None:
+            higher_percentiles = []
+        no_result_columns = \
+            (y is not None) + len(lower_percentiles) + len(higher_percentiles)
         if no_result_columns > 0:
             result = np.zeros((len(y_hat),no_result_columns))
-        if len(y) > 0:
+        if y is not None:
             no_prec_result_cols += 1
             gammas = np.random.rand(len(y_hat))
-            if type(y) == int or type(y) == float:
+            if type(y) in [int, float]:
                 if not self.mondrian:
-                    result[:,0] = np.array([(len(np.argwhere(cpds[i]<y))+gammas[i])/(len(cpds[i])+1) for i in range(len(cpds))])
+                    result[:,0] = np.array([(len(np.argwhere(cpds[i]<y)) \
+                                             + gammas[i])/(len(cpds[i])+1)
+                                            for i in range(len(cpds))])
                 else:
                     for b in range(len(bin_values)):
-                        result[bin_indexes[b],0] = np.array([(len(np.argwhere(cpds[b][i]<y))+gammas[bin_indexes[b]][i])/(len(cpds[b])+1)
-                                                             for i in range(len(cpds[b]))])
-            elif (type(y) == list or type(y) == np.ndarray) and len(y) == len(y_hat):
+                        result[bin_indexes[b],0] = np.array(
+                            [(len(np.argwhere(cpds[b][i]<y)) \
+                              + gammas[bin_indexes[b]][i])/(len(cpds[b])+1)
+                             for i in range(len(cpds[b]))])
+            elif type(y) in [list, np.ndarray] and len(y) == len(y_hat):
                 if not self.mondrian:
-                    result[:,0] = np.array([(len(np.argwhere(cpds[i]<y[i]))+gammas[i])/(len(cpds[i])+1) for i in range(len(cpds))])
+                    result[:,0] = np.array([(len(np.argwhere(cpds[i]<y[i])) \
+                                             + gammas[i])/(len(cpds[i])+1)
+                                            for i in range(len(cpds))])
                 else:
                     for b in range(len(bin_values)):
-                        result[bin_indexes[b],0] = np.array([(len(np.argwhere(cpds[b][i]<y[bin_indexes[b]][i]))+gammas[bin_indexes[b]][i])/(len(cpds[b][0])+1)
-                                                             for i in range(len(cpds[b]))])
+                        result[bin_indexes[b],0] = \
+                            np.array([(len(np.argwhere(
+                                cpds[b][i]<y[bin_indexes[b]][i])) \
+                                       + gammas[bin_indexes[b]][i]) \
+                                      /(len(cpds[b][0])+1)
+                                      for i in range(len(cpds[b]))])
             else:
-                raise ValueError("y must either be a single int or float or a list/numpy array of the same length as the number of point predictions")
+                raise ValueError(("y must either be a single int, float or"
+                                  "a list/numpy array of the same length as "
+                                  "the residuals"))
         if len(lower_percentiles) > 0:
                 if not self.mondrian:
-                    lower_indexes = [int(lower_percentile/100*(len(self.alphas)+1))-1 for lower_percentile in lower_percentiles]                
-                    result[:,no_prec_result_cols:no_prec_result_cols+len(lower_percentiles)] = cpds[:,lower_indexes]
-                    y_min_columns = [no_prec_result_cols+i for i in range(len(lower_indexes)) if lower_indexes[i]<0]
+                    lower_indexes = [int(lower_percentile/100 \
+                                         * (len(self.alphas)+1))-1
+                                     for lower_percentile in lower_percentiles]
+                    result[:,no_prec_result_cols:no_prec_result_cols \
+                           + len(lower_percentiles)] = cpds[:,lower_indexes]
+                    y_min_columns = [no_prec_result_cols+i
+                                     for i in range(len(lower_indexes))
+                                     if lower_indexes[i]<0]
                     result[:,y_min_columns] = y_min
                 else:
                     for b in range(len(bin_values)):
-                        lower_indexes = [int(lower_percentile/100*(len(bin_alphas[b])+1))-1 for lower_percentile in lower_percentiles]                
-                        result[bin_indexes[b],no_prec_result_cols:no_prec_result_cols+len(lower_indexes)] = cpds[b][:,lower_indexes]
-                    y_min_columns = [no_prec_result_cols+i for i in range(len(lower_indexes)) if lower_indexes[i]<0]
+                        lower_indexes = [int(lower_percentile/100 \
+                                             * (len(bin_alphas[b])+1))-1
+                                         for lower_percentile
+                                         in lower_percentiles]                
+                        result[bin_indexes[b],
+                               no_prec_result_cols:no_prec_result_cols \
+                               + len(lower_indexes)] = cpds[b][:,lower_indexes]
+                    y_min_columns = [no_prec_result_cols+i
+                                     for i in range(len(lower_indexes))
+                                     if lower_indexes[i]<0]
                     result[:,y_min_columns] = y_min                    
         if y_min > -np.inf:
-            result[:,no_prec_result_cols:no_prec_result_cols+len(lower_percentiles)][result[:,no_prec_result_cols:no_prec_result_cols+len(lower_percentiles)]<y_min] = y_min
+            result[:,
+                   no_prec_result_cols:no_prec_result_cols \
+                   + len(lower_percentiles)]\
+                   [result[:,no_prec_result_cols:no_prec_result_cols \
+                           + len(lower_percentiles)]<y_min] = y_min
         no_prec_result_cols += len(lower_percentiles)
         if len(higher_percentiles) > 0:
                 if not self.mondrian:
-                    higher_indexes = np.array([int(np.ceil(higher_percentile/100*(len(self.alphas)+1)))-1 for higher_percentile in higher_percentiles], dtype=int)
-                    too_high_indexes = np.array([i for i in range(len(higher_indexes)) if higher_indexes[i] > len(self.alphas)-1], dtype=int)
+                    higher_indexes = np.array(
+                        [int(np.ceil(higher_percentile/100 \
+                                     * (len(self.alphas)+1)))-1
+                         for higher_percentile in higher_percentiles],
+                        dtype=int)
+                    too_high_indexes = np.array(
+                        [i for i in range(len(higher_indexes))
+                         if higher_indexes[i] > len(self.alphas)-1], dtype=int)
                     higher_indexes[too_high_indexes] = len(self.alphas)-1
-                    result[:,no_prec_result_cols:no_prec_result_cols+len(higher_indexes)] = cpds[:,higher_indexes]
+                    result[:,no_prec_result_cols:no_prec_result_cols \
+                           + len(higher_indexes)] = cpds[:,higher_indexes]
                     result[:,no_prec_result_cols+too_high_indexes] = y_max
                 else:
                     for b in range(len(bin_values)):
-                        higher_indexes = [int(np.ceil(higher_percentile/100*(len(bin_alphas[b])+1)))-1 for higher_percentile in higher_percentiles]
-                        result[bin_indexes[b],no_prec_result_cols:no_prec_result_cols+len(higher_indexes)] = cpds[b][:,higher_indexes]
+                        higher_indexes = [
+                            int(np.ceil(higher_percentile/100 \
+                                        * (len(bin_alphas[b])+1)))-1
+                            for higher_percentile in higher_percentiles]
+                        result[bin_indexes[b],
+                               no_prec_result_cols:no_prec_result_cols \
+                               + len(higher_indexes)] = cpds[b]\
+                                   [:,higher_indexes]
         if y_max < np.inf:
-            result[:,no_prec_result_cols:no_prec_result_cols+len(higher_percentiles)][result[:,no_prec_result_cols:no_prec_result_cols+len(higher_percentiles)]>y_max] = y_max
+            result[:,no_prec_result_cols:no_prec_result_cols\
+                   + len(higher_percentiles)]\
+                   [result[:,no_prec_result_cols:no_prec_result_cols \
+                           + len(higher_percentiles)]>y_max] = y_max
         toc = time.time()
         self.time_predict = toc-tic            
         if no_result_columns > 0 and return_cpds:
-            return result, cpds
+            if not self.mondrian or cpds_by_bins:
+                cpds_out = cpds
+            else:
+                cpds_out = np.empty(len(y_hat), dtype=object)
+                for b in range(len(bin_values)):
+                    cpds_out[bin_indexes[b]] = [cpds[b][i]
+                                                for i in range(len(cpds[b]))]
+            return result, cpds_out
         elif no_result_columns > 0:
             return result
         elif return_cpds:
-            return cpds
+            if not self.mondrian or cpds_by_bins:
+                cpds_out = cpds
+            else:
+                cpds_out = np.empty(len(y_hat), dtype=object)
+                for b in range(len(bin_values)):
+                    cpds_out[bin_indexes[b]] = [cpds[b][i]
+                                                for i in range(len(cpds[b]))]
+            return cpds_out
+
+    def evaluate(self, y_hat=None, y=None, sigmas=None, bins=None,
+                 confidence=0.95, y_min=-np.inf, y_max=np.inf, metrics=None):
+        """
+        Evaluate conformal predictive system.
+
+        Parameters
+        ----------
+        y_hat : array-like of shape (n_values,), default=None,
+            predicted values
+        y : array-like of shape (n_values,), default=None,
+            correct target values
+        sigmas : array-like of shape (n_values,), default=None,
+            difficulty estimates
+        bins : array-like of shape (n_values,), default=None,
+            Mondrian categories
+        confidence : float in range (0,1), default=0.95
+            confidence level
+        y_min : float or int, default=-np.inf
+            minimum value to include in prediction intervals
+        y_max : float or int, default=np.inf
+            maximum value to include in prediction intervals
+        metrics : a string or a list of strings, default=list of all 
+            metrics; ["error", "efficiency", "CRPS", "time_fit", 
+                      "time_evaluate"]
+        
+        Returns
+        -------
+        results : dictionary with a key for each selected metric 
+            estimated performance using the metrics
+        """
 
-    def evaluate(self, y_hat=None, y=None, sigmas=None, bins=None, confidence=0.95, y_min=-np.inf, y_max=np.inf, metrics=None):
         tic = time.time()
         if metrics is None:
             metrics = ["error","efficiency","CRPS","time_fit","time_evaluate"]
@@ -344,32 +522,48 @@ def evaluate(self, y_hat=None, y=None, sigmas=None, bins=None, confidence=0.95,
         higher_percentile = (confidence+(1-confidence)/2)*100
         test_results = {}
         if "CRPS" in metrics:
-            results, cpds = self.predict(y_hat, sigmas=sigmas, bins=bins, y=y, lower_percentiles=lower_percentile,
-                                         higher_percentiles=higher_percentile, y_min=y_min, y_max=y_max, return_cpds=True)
+            results, cpds = self.predict(y_hat, sigmas=sigmas, bins=bins, y=y,
+                                         lower_percentiles=lower_percentile,
+                                         higher_percentiles=higher_percentile,
+                                         y_min=y_min, y_max=y_max,
+                                         return_cpds=True, cpds_by_bins=True)
             intervals = results[:,[1,2]]
             if not self.mondrian:
                 if self.normalized:
-                    crps = calculate_crps(cpds, self.alphas, y_hat, sigmas, y)
+                    crps = calculate_crps(cpds, self.alphas, sigmas, y)
                 else:
-                    crps = calculate_crps(cpds, self.alphas, y_hat, np.ones(len(y_hat)), y)
+                    crps = calculate_crps(cpds, self.alphas,
+                                          np.ones(len(y_hat)), y)
             else:
                 bin_values, bin_alphas = self.alphas
-                bin_indexes = [np.argwhere(bins == b).T[0] for b in bin_values]                
+                bin_indexes = [np.argwhere(bins == b).T[0]
+                               for b in bin_values]
                 if self.normalized:
-                    crps = np.sum([calculate_crps(cpds[b], bin_alphas[b], y_hat[bin_indexes[b]], sigmas[bin_indexes[b]], y[bin_indexes[b]])*len(bin_indexes[b])
+                    crps = np.sum([calculate_crps(cpds[b],
+                                                  bin_alphas[b],
+                                                  sigmas[bin_indexes[b]],
+                                                  y[bin_indexes[b]]) \
+                                   * len(bin_indexes[b])
                                    for b in range(len(bin_values))])/len(y)
                 else:
-                    crps = np.sum([calculate_crps(cpds[b], bin_alphas[b], y_hat[bin_indexes[b]], np.ones(len(bin_indexes[b])), y[bin_indexes[b]])*len(bin_indexes[b])
+                    crps = np.sum([calculate_crps(cpds[b],
+                                                  bin_alphas[b],
+                                                  np.ones(len(bin_indexes[b])),
+                                                  y[bin_indexes[b]]) \
+                                   * len(bin_indexes[b])
                                    for b in range(len(bin_values))])/len(y)
-            
         else:
-            intervals = self.predict(y_hat, sigmas=sigmas, bins=bins, lower_percentiles=lower_percentile,
-                                     higher_percentiles=higher_percentile, y_min=y_min, y_max=y_max, return_CRPS=False)
+            intervals = self.predict(y_hat, sigmas=sigmas, bins=bins,
+                                     lower_percentiles=lower_percentile,
+                                     higher_percentiles=higher_percentile,
+                                     y_min=y_min, y_max=y_max,
+                                     return_CRPS=False)
         if "error" in metrics:
-            test_results["error"] = 1-np.mean(np.logical_and(intervals[:,0]<=y,y<=intervals[:,1]))
+            test_results["error"] = 1-np.mean(np.logical_and(
+                intervals[:,0]<=y,y<=intervals[:,1]))
         if "efficiency" in metrics:            
             test_results["efficiency"] = np.mean(intervals[:,1]-intervals[:,0])
-        if "CRPS" in metrics:            
+        if "CRPS" in metrics:
             test_results["CRPS"] = crps
         if "time_fit" in metrics:
             test_results["time_fit"] = self.time_fit
@@ -379,23 +573,73 @@ def evaluate(self, y_hat=None, y=None, sigmas=None, bins=None, confidence=0.95,
             test_results["time_evaluate"] = self.time_evaluate
         return test_results
         
-def calculate_crps(cpds, alphas, predictions, sigmas, y):
+def calculate_crps(cpds, alphas, sigmas, y):
+    """
+    Calculate mean continuous-ranked probability score (crps)
+    for a set of conformal predictive distributions.
+
+    Parameters
+    ----------
+    cpds : array-like of shape (n_values, c_values)
+        conformal predictive distributions
+    alphas : array-like of shape (c_values,)
+        sorted (normalized) residuals of the calibration examples 
+    sigmas : array-like of shape (n_values,),
+        difficulty estimates
+    y : array-like of shape (n_values,)
+        correct target values
+        
+    Returns
+    -------
+    crps : float
+        mean continuous-ranked probability score for the conformal
+        predictive distributions 
+    """
     widths = np.array([alphas[i+1]-alphas[i] for i in range(len(alphas)-1)])
     cum_probs = np.cumsum([1/len(alphas) for i in range(len(alphas)-1)])
     lower_errors = cum_probs**2
-    upper_errors = (1-cum_probs)**2
+    higher_errors = (1-cum_probs)**2
     cpd_indexes = [np.argwhere(cpds[i]<y[i]) for i in range(len(y))]
     cpd_indexes = [-1 if len(c)==0 else c[-1][0] for c in cpd_indexes]
-    return np.mean([get_crps(cpd_indexes[i], lower_errors, upper_errors, widths, sigmas[i], cpds[i], y[i]) for i in range(len(y))])
+    return np.mean([get_crps(cpd_indexes[i], lower_errors, higher_errors,
+                             widths, sigmas[i], cpds[i], y[i])
+                    for i in range(len(y))])
         
-def get_crps(cpd_index, lower_errors, upper_errors, widths, sigma, cpd, y):
+def get_crps(cpd_index, lower_errors, higher_errors, widths, sigma, cpd, y):
+    """
+    Calculate continuous-ranked probability score (crps) for a single
+    conformal predictive distribution. 
+
+    Parameters
+    ----------
+    cpd_index : int
+        highest index for which y is higher than the corresponding cpd value
+    lower_errors : array-like of shape (c_values-1,)
+        values to add to crps for values less than y
+    higher_errors : array-like of shape (c_values-1,)
+        values to add to crps for values higher than y
+    widths : array-like of shape (c_values-1,),
+        differences between consecutive pairs of sorted (normalized) residuals 
+        of the calibration examples 
+    sigma : int or float
+        difficulty estimate for single object
+    cpd : array-like of shape (c_values,)
+        conformal predictive distyribution
+    y : int or float
+        correct target value
+        
+    Returns
+    -------
+    crps : float
+        continuous-ranked probability score
+    """
     if cpd_index == -1:
-        score = np.sum(upper_errors*widths*sigma)+(cpd[0]-y) 
+        score = np.sum(higher_errors*widths*sigma)+(cpd[0]-y) 
     elif cpd_index == len(cpd)-1:
         score = np.sum(lower_errors*widths*sigma)+(y-cpd[-1]) 
     else:
         score = np.sum(lower_errors[:cpd_index]*widths[:cpd_index]*sigma) +\
-            np.sum(upper_errors[cpd_index+1:]*widths[cpd_index+1:]*sigma) +\
+            np.sum(higher_errors[cpd_index+1:]*widths[cpd_index+1:]*sigma) +\
             lower_errors[cpd_index]*(y-cpd[cpd_index])*sigma +\
-            upper_errors[cpd_index]*(cpd[cpd_index+1]-y)*sigma
+            higher_errors[cpd_index]*(cpd[cpd_index+1]-y)*sigma
     return score
diff --git a/src/crepes/fillings.py b/src/crepes/fillings.py
index 210ac0e..c1db9e7 100644
--- a/src/crepes/fillings.py
+++ b/src/crepes/fillings.py
@@ -1,21 +1,16 @@
 """Conformal regressors and predictive systems (crepes) fillings
 
 Helper functions to generate residuals and sigmas, with and without
-out-of-bag calibration, for conformal regressors and conformal
+out-of-bag predictions, for conformal regressors and conformal
 predictive systems.
 
 Author: Henrik Boström (bostromh@kth.se)
 
-Copyright 2021 Henrik Boström
+Copyright 2022 Henrik Boström
 
 License: BSD 3 clause
-"""
 
-# To do:
-#
-# - "min-bin-size" as alternative to "no_bins" for the helper function "binning"
-# - error messages
-# - commenting and documentation 
+"""
 
 import numpy as np
 import pandas as pd
@@ -24,13 +19,52 @@
 from sklearn.preprocessing import MinMaxScaler
 
 def sigma_variance(X=None, learner=None, beta=0.01):
+    """
+    Provides difficulty estimates for a set of objects
+    using the variance of the predictions by a learner.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features), default=None
+        set of objects
+    learner : an object with the attribute learner.estimators_, default=None
+        an ensemble model where each model m in learner.estimators_ has a
+        method m.predict
+    beta : int or float, default=0.01 
+        value to add to the difficulty estimates
+        
+    Returns
+    -------
+    sigmas : array-like of shape (n_samples,)
+        difficulty estimates 
+    """
     try:
         learner.estimators_
     except:
         raise ValueError("The learner is missing the attribute estimators_")
-    return np.var([model.predict(X) for model in learner.estimators_], axis=0) + beta
+    return np.var([model.predict(X) for model in learner.estimators_],
+                  axis=0) + beta
 
 def sigma_variance_oob(X=None, learner=None, beta=0.01):
+    """
+    Provides difficulty estimates for a set of objects
+    using the variance of the out-of-bag predictions by a learner.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features), default=None
+        set of objects
+    learner : an object with the attribute learner.estimators_, default=None
+        an ensemble model where each model m in learner.estimators_ has an
+        attribute m.random_state
+    beta : int or float, default=0.01 
+        value to add to the difficulty estimates
+        
+    Returns
+    -------
+    sigmas : array-like of shape (n_samples,)
+        difficulty estimates 
+    """
     try:
         learner.estimators_
     except:
@@ -38,17 +72,59 @@ def sigma_variance_oob(X=None, learner=None, beta=0.01):
     try:
         learner.estimators_[0].random_state
     except:
-        raise ValueError("The learner.estimators_ is missing the attribute random_state")
+        raise ValueError(("The learner.estimators_ is missing the attribute"
+                          "random_state"))
     predictions = np.array([model.predict(X) for model in learner.estimators_])
-    oob_masks = np.array([get_oob_mask(learner.estimators_[i].random_state,len(X))
+    oob_masks = np.array([get_oob(learner.estimators_[i].random_state, len(X))
                           for i in range(len(learner.estimators_))])
-    return np.array([np.var(predictions[oob_masks[:,i],i]) for i in range(len(X))]) + beta
+    return np.array([np.var(predictions[oob_masks[:,i],i])
+                     for i in range(len(X))]) + beta
 
-def get_oob_mask(seed, n):
-    return np.bincount(np.random.RandomState(seed).randint(0, n, n),
-                       minlength=n) == 0
+def get_oob(seed, n_samples):
+    """
+    Provides out-of-bag samples from a random seed and sample size.
+
+    Parameters
+    ----------
+    seed : int
+        random seed
+    n_samples : int
+        sample size
+        
+    Returns
+    -------
+    oob : array-like of shape (n_samples,)
+        binary vector indicating which samples are out-of-bag and not 
+    """
+    return np.bincount(np.random.RandomState(seed).randint(0, n_samples,
+                                                           n_samples),
+                       minlength=n_samples) == 0
 
 def sigma_knn(X=None, residuals=None, X_test=None, k=5, beta=0.01):
+    """
+    Provides difficulty estimates for a set of objects using the absolute 
+    residuals of the nearest neighbors.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features), default=None
+        set of objects
+    residuals : array-like of shape (n_samples,), default=None
+        residuals of the objects X
+    X_test : array-like of shape (n_test_samples, n_features), default=None
+        set of test objects
+    k: int, default=5
+        number of neighbors
+    beta : int or float, default=0.01 
+        value to add to the difficulty estimates
+        
+    Returns
+    -------
+    sigmas : array-like of shape (n_samples,) or (n_test_samples)
+        difficulty estimates; if X_test is None, sigmas will contain one int 
+        or float for each object in X; otherwise, sigmas will contain one int 
+        or float for each object in X_test
+    """
     nn = NearestNeighbors(n_neighbors=k, n_jobs=-1)
     scaler = MinMaxScaler(clip=True)
     scaler.fit(X)
@@ -59,16 +135,41 @@ def sigma_knn(X=None, residuals=None, X_test=None, k=5, beta=0.01):
     else:
         X_test_scaled = scaler.transform(X_test)
         neighbor_indexes = nn.kneighbors(X_test_scaled, return_distance=False)
-    return np.array([np.mean(np.abs(residuals[indexes])) for indexes in neighbor_indexes]) + beta
+    return np.array([np.mean(np.abs(residuals[indexes]))
+                     for indexes in neighbor_indexes]) + beta
 
 def binning(values=None, bins=10):
-    mod_values = values+np.random.rand(len(values))*1e-9 # Adding a very small random number, which a.s. avoids ties
-                                                         # without affecting performance
+    """
+    Provides bins for a set of values.
+
+    Parameters
+    ----------
+    values : array-like of shape (n_samples,), default=None
+        set of values
+    bins : int or array-like of shape (n_bins,), default=10
+        number of bins to use for equal-sized binning or threshold values 
+        to use for binning
+        
+    Returns
+    -------
+    assigned_bins : array-like of shape (n_samples,)
+        bins to which values have been assigned
+    boundaries : array-like of shape (bins+1,)
+        threshold values for the bins; the first is always -np.inf and
+        the last is np.inf. Returned only if bins is an int.
+    """
+    mod_values = values+np.random.rand(len(values))*1e-9
+    # Adding a very small random number, which a.s. avoids ties
+    # without affecting performance
     if type(bins) == int:
-        assigned_bins, bin_boundaries = pd.qcut(mod_values,bins+1,labels=False,retbins=True,duplicates="drop",precision=12)
+        assigned_bins, bin_boundaries = pd.qcut(mod_values,bins,
+                                                labels=False,retbins=True,
+                                                duplicates="drop",
+                                                precision=12)
         bin_boundaries[0] = -np.inf
         bin_boundaries[-1] = np.inf
         return assigned_bins, bin_boundaries
     else:
         assigned_bins = pd.cut(mod_values,bins,labels=False,retbins=False)
         return assigned_bins
+