krmayankb · krmayankb · Dec 24, 2022 · Dec 24, 2022 · Dec 24, 2022 · Dec 24, 2022
diff --git a/cleanlab/__init__.py b/cleanlab/__init__.py
@@ -8,3 +8,4 @@
 from . import multiannotator
 from . import outlier
 from . import token_classification
+from . import regression
diff --git a/cleanlab/regression/__init__.py b/cleanlab/regression/__init__.py
@@ -0,0 +1 @@
+from . import rank
diff --git a/cleanlab/regression/rank.py b/cleanlab/regression/rank.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+""" generate label quality score for regression dataset"""
+
+
+def get_label_quality_scores(labels: np.ndarray, pred_labels: np.ndarray) -> np.ndarray:
+    """
+    Returns label quality score for each example in the regression dataset.
+
+    Each score is continous value in range [0,1]
+    1 - clean label (given label is likely correct).
+    0 - dirty label (given label is likely incorrect).
+
+    Parameters
+    ----------
+    labels:
+        Raw labels from original dataset.
+        Array of shape ``(N, )`` consisting given labels, where N is number of datapoints in the regression dataset.
+
+    pred_labels:
+        Predicated labels from regressor fitted on the dataset.
+        Array of shape ``(N,)`` consisting predicted labels, where N is number of datapoints in the regression dataset.
+
+    Returns
+    -------
+    label_quality_scores:
+        Array of shape ``(N, )`` of scores between 0 and 1, one per datapoint in the dataset.
+
+        Lower scores indicate datapoint more likely to contain a label issue.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from cleanlab.regression.rank import get_label_quality_scores
+    >>> labels = np.array([1,2,3,4])
+    >>> pred_labels = np.array([2,2,5,4.1])
+    >>> label_quality_scores = get_label_quality_scores(labels, pred_labels)
+    >>> label_quality_scores
+    array([0.36787944, 1.        , 0.13533528, 0.90483742])
+    """
+
+    assert (
+        labels.shape == pred_labels.shape
+    ), f"shape of label {labels.shape} and predicted labels {pred_labels.shape} are not same."
+
+    residual = pred_labels - labels
+    quality_scores = np.exp(-abs(residual))
+    return quality_scores
diff --git a/docs/source/cleanlab/regression.rst b/docs/source/cleanlab/regression.rst
@@ -0,0 +1,8 @@
+regression
+==============
+
+.. automodule:: cleanlab.regression
+   :autosummary:
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst
@@ -15,3 +15,4 @@ Tutorials
    token_classification
    pred_probs_cross_val
    faq
+   regression
diff --git a/docs/source/tutorials/regression.ipynb b/docs/source/tutorials/regression.ipynb