From d30dd7aa69b9148d99d51b02b1a487a4e70cfb91 Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Fri, 27 May 2022 11:37:33 -0600
Subject: [PATCH 01/12] one way to add per-class accuracy

---
 armory/instrument/config.py     | 7 +++++++
 armory/utils/config_schema.json | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/armory/instrument/config.py b/armory/instrument/config.py
index 1c20dd208..97e8144f0 100644
--- a/armory/instrument/config.py
+++ b/armory/instrument/config.py
@@ -216,6 +216,8 @@ def _write(self, name, batch, result):
         elif any(m in name for m in QUANTITY_METRICS):
             # Don't include % symbol
             f_result = f"{np.mean(result):.2}"
+        elif isinstance(result, dict):
+            f_result = f"{result}"
         else:
             f_result = f"{np.mean(result):.2%}"
         log.success(
@@ -253,6 +255,11 @@ def _task_metric(
     elif name == "word_error_rate":
         final = metrics.get("total_wer")
         final_suffix = "total_word_error_rate"
+    elif name == "per_class_mean_accuracy":
+        metric = metrics.get("identity_unzip")
+        func = metrics.get("per_class_mean_accuracy")
+        final = lambda x: func(*metrics.task.identity_zip(x))
+        final_suffix = name
     elif use_mean:
         final = np.mean
         final_suffix = f"mean_{name}"
diff --git a/armory/utils/config_schema.json b/armory/utils/config_schema.json
index c2da4be7c..5144712c4 100644
--- a/armory/utils/config_schema.json
+++ b/armory/utils/config_schema.json
@@ -261,7 +261,8 @@
                 "carla_od_disappearance_rate",
                 "carla_od_hallucinations_per_image",
                 "carla_od_misclassification_rate",
-                "carla_od_true_positive_rate"
+                "carla_od_true_positive_rate",
+                "per_class_mean_accuracy"
             ]
         },
         "sysconfig": {

From 6ea83fb8aa328203ac4b8098cbf95c05fc28b6e0 Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Mon, 30 May 2022 10:24:29 -0600
Subject: [PATCH 02/12] replace lambda with normal function

---
 armory/instrument/config.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/armory/instrument/config.py b/armory/instrument/config.py
index 97e8144f0..d6a23077a 100644
--- a/armory/instrument/config.py
+++ b/armory/instrument/config.py
@@ -258,7 +258,10 @@ def _task_metric(
     elif name == "per_class_mean_accuracy":
         metric = metrics.get("identity_unzip")
         func = metrics.get("per_class_mean_accuracy")
-        final = lambda x: func(*metrics.task.identity_zip(x))
+
+        def final(x):
+            return func(*metrics.task.identity_zip(x))
+
         final_suffix = name
     elif use_mean:
         final = np.mean

From 0874baec2341c3391941bbafc057859c1fd0e32c Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Mon, 30 May 2022 12:18:02 -0600
Subject: [PATCH 03/12] add confusion matrix metric

---
 armory/instrument/config.py     | 10 ++++++++++
 armory/metrics/task.py          | 18 ++++++++++++++++++
 armory/utils/config_schema.json |  3 ++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/armory/instrument/config.py b/armory/instrument/config.py
index d6a23077a..a550965cd 100644
--- a/armory/instrument/config.py
+++ b/armory/instrument/config.py
@@ -205,6 +205,8 @@ def _write(self, name, batch, result):
                 f"neutral: {result['neutral']}/{total}, "
                 f"entailment: {result['entailment']}/{total}"
             )
+        elif "confusion_matrix" in name:
+            f_result = f"{result}"
         elif any(m in name for m in MEAN_AP_METRICS):
             if "input_to" in name:
                 for m in MEAN_AP_METRICS:
@@ -259,6 +261,14 @@ def _task_metric(
         metric = metrics.get("identity_unzip")
         func = metrics.get("per_class_mean_accuracy")
 
+        def final(x):
+            return func(*metrics.task.identity_zip(x))
+
+        final_suffix = name
+    elif name == "confusion_matrix":
+        metric = metrics.get("identity_unzip")
+        func = metrics.get("confusion_matrix")
+
         def final(x):
             return func(*metrics.task.identity_zip(x))
 
diff --git a/armory/metrics/task.py b/armory/metrics/task.py
index 5f0f76800..63084ac3c 100644
--- a/armory/metrics/task.py
+++ b/armory/metrics/task.py
@@ -230,6 +230,24 @@ def __call__(self, values, **kwargs):
         return {"mean": mean_ap, "class": ap}
 
 
+@populationwise
+def confusion_matrix(y, y_pred):
+    """
+    Produce a matrix C such that C[i,j] is the percentage of class i that was classified as j
+    """
+    # Assumes that every class is represented in y
+    y = np.array(y)
+    y_pred = np.argmax(y_pred, axis=1)
+    N = len(np.unique(y))
+    C = np.zeros((N, N))
+    for i in range(N):
+        for j in range(N):
+            C[i, j] = np.sum(y_pred[y == i] == j)
+    sums = np.sum(C, axis=1)
+    C = C / sums[:, np.newaxis]
+    return C
+
+
 @populationwise
 def tpr_fpr(actual_conditions, predicted_conditions):
     """
diff --git a/armory/utils/config_schema.json b/armory/utils/config_schema.json
index 5144712c4..dc29db22e 100644
--- a/armory/utils/config_schema.json
+++ b/armory/utils/config_schema.json
@@ -262,7 +262,8 @@
                 "carla_od_hallucinations_per_image",
                 "carla_od_misclassification_rate",
                 "carla_od_true_positive_rate",
-                "per_class_mean_accuracy"
+                "per_class_mean_accuracy",
+                "confusion_matrix"
             ]
         },
         "sysconfig": {

From 4a79ccdec6b268b22999d71fbf0405a30beab1d6 Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Tue, 31 May 2022 07:07:49 -0600
Subject: [PATCH 04/12] add precision and recall

---
 armory/instrument/config.py     | 10 ++++++++++
 armory/metrics/statistical.py   | 28 ++++++++++++++++++++++++++++
 armory/utils/config_schema.json |  3 ++-
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/armory/instrument/config.py b/armory/instrument/config.py
index a550965cd..4b7a8712c 100644
--- a/armory/instrument/config.py
+++ b/armory/instrument/config.py
@@ -273,6 +273,16 @@ def final(x):
             return func(*metrics.task.identity_zip(x))
 
         final_suffix = name
+    elif name == "precision_and_recall":
+        metric = metrics.get("identity_unzip")
+        func = metrics.get("precision_and_recall")
+
+        def final(x):
+            return func(*metrics.task.identity_zip(x))
+
+        final_suffix = name
+
+
     elif use_mean:
         final = np.mean
         final_suffix = f"mean_{name}"
diff --git a/armory/metrics/statistical.py b/armory/metrics/statistical.py
index c827dba50..785066cbf 100644
--- a/armory/metrics/statistical.py
+++ b/armory/metrics/statistical.py
@@ -10,6 +10,7 @@
 from sklearn.metrics import silhouette_samples
 
 from armory.metrics.perturbation import MetricNameSpace, set_namespace
+from armory.metrics.task import populationwise
 
 registered = MetricNameSpace()
 
@@ -21,6 +22,33 @@ def register(metric):
     return set_namespace(registered, metric)
 
 
+@populationwise
+def precision_and_recall(y, y_pred):
+    """
+    Produce a dictionary whose keys are class labels, and values are (precision, recall) for that class
+    """
+    # Assumes that every class is represented in y
+
+    C = confusion_matrix(y, y_pred, normalize=False)
+    # breakpoint()
+    N = C.shape[0]
+    D = {}
+    for class_ in range(N):
+        # precision: true positives / number of items identified as class_
+        tp = C[class_, class_]
+        total_selected = C[:, class_].sum()
+        precision = tp / total_selected
+
+        #recall: true positives / number of actual items in class_
+        total_class_ = C[class_, :].sum()
+        recall = tp / total_class_
+
+        D[class_] = (precision, recall)
+
+    return D
+
+
+
 @register
 def chi2_p_value(contingency_table: np.ndarray) -> List[float]:
     """
diff --git a/armory/utils/config_schema.json b/armory/utils/config_schema.json
index dc29db22e..20b7c7f2e 100644
--- a/armory/utils/config_schema.json
+++ b/armory/utils/config_schema.json
@@ -263,7 +263,8 @@
                 "carla_od_misclassification_rate",
                 "carla_od_true_positive_rate",
                 "per_class_mean_accuracy",
-                "confusion_matrix"
+                "confusion_matrix",
+                "precision_and_recall"
             ]
         },
         "sysconfig": {

From 416906d5555a00303582e942b32569d8905606ea Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Tue, 31 May 2022 07:09:30 -0600
Subject: [PATCH 05/12] move confusion_matrix from task.py to statistical.py

---
 armory/metrics/statistical.py | 19 +++++++++++++++++++
 armory/metrics/task.py        | 18 ------------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/armory/metrics/statistical.py b/armory/metrics/statistical.py
index 785066cbf..3b0cc3b90 100644
--- a/armory/metrics/statistical.py
+++ b/armory/metrics/statistical.py
@@ -48,6 +48,25 @@ def precision_and_recall(y, y_pred):
     return D
 
 
+@populationwise
+def confusion_matrix(y, y_pred, normalize=True):
+    """
+    Produce a matrix C such that C[i,j] is the percentage of class i that was classified as j
+    If normalize is False, C[i,j] is the actual number of such elements, rather than the percentage
+    """
+    # Assumes that every class is represented in y
+    y = np.array(y)
+    y_pred = np.argmax(y_pred, axis=1)
+    N = len(np.unique(y))
+    C = np.zeros((N, N))
+    for i in range(N):
+        for j in range(N):
+            C[i, j] = np.sum(y_pred[y == i] == j)
+    if normalize:
+        sums = np.sum(C, axis=1)
+        C = C / sums[:, np.newaxis]
+    return C
+
 
 @register
 def chi2_p_value(contingency_table: np.ndarray) -> List[float]:
diff --git a/armory/metrics/task.py b/armory/metrics/task.py
index 63084ac3c..5f0f76800 100644
--- a/armory/metrics/task.py
+++ b/armory/metrics/task.py
@@ -230,24 +230,6 @@ def __call__(self, values, **kwargs):
         return {"mean": mean_ap, "class": ap}
 
 
-@populationwise
-def confusion_matrix(y, y_pred):
-    """
-    Produce a matrix C such that C[i,j] is the percentage of class i that was classified as j
-    """
-    # Assumes that every class is represented in y
-    y = np.array(y)
-    y_pred = np.argmax(y_pred, axis=1)
-    N = len(np.unique(y))
-    C = np.zeros((N, N))
-    for i in range(N):
-        for j in range(N):
-            C[i, j] = np.sum(y_pred[y == i] == j)
-    sums = np.sum(C, axis=1)
-    C = C / sums[:, np.newaxis]
-    return C
-
-
 @populationwise
 def tpr_fpr(actual_conditions, predicted_conditions):
     """

From 0431ffbe77a3cc529c75850fbca33d3f6b1c6a0a Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Tue, 31 May 2022 07:45:20 -0600
Subject: [PATCH 06/12] add tests for confusion matrix and precision/recall

---
 armory/instrument/config.py            |  1 -
 armory/metrics/statistical.py          |  6 ++++--
 tests/unit/test_statistical_metrics.py | 29 ++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/armory/instrument/config.py b/armory/instrument/config.py
index 4b7a8712c..784ec9fa3 100644
--- a/armory/instrument/config.py
+++ b/armory/instrument/config.py
@@ -282,7 +282,6 @@ def final(x):
 
         final_suffix = name
 
-
     elif use_mean:
         final = np.mean
         final_suffix = f"mean_{name}"
diff --git a/armory/metrics/statistical.py b/armory/metrics/statistical.py
index 3b0cc3b90..6e8213f15 100644
--- a/armory/metrics/statistical.py
+++ b/armory/metrics/statistical.py
@@ -39,7 +39,7 @@ def precision_and_recall(y, y_pred):
         total_selected = C[:, class_].sum()
         precision = tp / total_selected
 
-        #recall: true positives / number of actual items in class_
+        # recall: true positives / number of actual items in class_
         total_class_ = C[class_, :].sum()
         recall = tp / total_class_
 
@@ -56,7 +56,9 @@ def confusion_matrix(y, y_pred, normalize=True):
     """
     # Assumes that every class is represented in y
     y = np.array(y)
-    y_pred = np.argmax(y_pred, axis=1)
+    y_pred = np.array(y_pred)
+    if y_pred.ndim == 2:
+        y_pred = np.argmax(y_pred, axis=1)
     N = len(np.unique(y))
     C = np.zeros((N, N))
     for i in range(N):
diff --git a/tests/unit/test_statistical_metrics.py b/tests/unit/test_statistical_metrics.py
index 2f4213ed0..cd17a9a14 100644
--- a/tests/unit/test_statistical_metrics.py
+++ b/tests/unit/test_statistical_metrics.py
@@ -11,6 +11,35 @@
 pytestmark = pytest.mark.unit
 
 
+def test_confusion_matrix():
+    y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+    y_pred = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
+    assert statistical.confusion_matrix(y, y) == pytest.approx(
+        np.array([[1, 0], [0, 1]])
+    )
+    assert statistical.confusion_matrix(y, y_pred) == pytest.approx(
+        np.array([[0.6, 0.4], [0.2, 0.8]])
+    )
+    assert statistical.confusion_matrix(y, y_pred, normalize=False) == pytest.approx(
+        np.array([[3, 2], [1, 4]])
+    )
+
+
+def test_precision_and_recall():
+    y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+    y_pred = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
+    D = statistical.precision_and_recall(y, y_pred)
+    assert D[0] == pytest.approx((0.75, 0.6))
+    assert D[1] == pytest.approx((0.66666667, 0.8))
+
+    y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])
+    y_pred = np.array([0, 0, 0, 0, 1, 1, 2, 1, 2, 2, 0, 1])
+    D = statistical.precision_and_recall(y, y_pred)
+    assert D[0] == pytest.approx((0.8, 1))
+    assert D[1] == pytest.approx((0.75, 0.75))
+    assert D[2] == pytest.approx((0.666666667, 0.5))
+
+
 def test_chi2_p_value():
 
     table1 = np.array([[2, 3], [4, 6]])

From f63fe0381f8c7ba41e6251ec1076dc53468dedfc Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Thu, 2 Jun 2022 09:38:14 -0600
Subject: [PATCH 07/12] move metrics from statistical to task

---
 armory/metrics/statistical.py | 49 -----------------------------------
 armory/metrics/task.py        | 48 ++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/armory/metrics/statistical.py b/armory/metrics/statistical.py
index 6e8213f15..c827dba50 100644
--- a/armory/metrics/statistical.py
+++ b/armory/metrics/statistical.py
@@ -10,7 +10,6 @@
 from sklearn.metrics import silhouette_samples
 
 from armory.metrics.perturbation import MetricNameSpace, set_namespace
-from armory.metrics.task import populationwise
 
 registered = MetricNameSpace()
 
@@ -22,54 +21,6 @@ def register(metric):
     return set_namespace(registered, metric)
 
 
-@populationwise
-def precision_and_recall(y, y_pred):
-    """
-    Produce a dictionary whose keys are class labels, and values are (precision, recall) for that class
-    """
-    # Assumes that every class is represented in y
-
-    C = confusion_matrix(y, y_pred, normalize=False)
-    # breakpoint()
-    N = C.shape[0]
-    D = {}
-    for class_ in range(N):
-        # precision: true positives / number of items identified as class_
-        tp = C[class_, class_]
-        total_selected = C[:, class_].sum()
-        precision = tp / total_selected
-
-        # recall: true positives / number of actual items in class_
-        total_class_ = C[class_, :].sum()
-        recall = tp / total_class_
-
-        D[class_] = (precision, recall)
-
-    return D
-
-
-@populationwise
-def confusion_matrix(y, y_pred, normalize=True):
-    """
-    Produce a matrix C such that C[i,j] is the percentage of class i that was classified as j
-    If normalize is False, C[i,j] is the actual number of such elements, rather than the percentage
-    """
-    # Assumes that every class is represented in y
-    y = np.array(y)
-    y_pred = np.array(y_pred)
-    if y_pred.ndim == 2:
-        y_pred = np.argmax(y_pred, axis=1)
-    N = len(np.unique(y))
-    C = np.zeros((N, N))
-    for i in range(N):
-        for j in range(N):
-            C[i, j] = np.sum(y_pred[y == i] == j)
-    if normalize:
-        sums = np.sum(C, axis=1)
-        C = C / sums[:, np.newaxis]
-    return C
-
-
 @register
 def chi2_p_value(contingency_table: np.ndarray) -> List[float]:
     """
diff --git a/armory/metrics/task.py b/armory/metrics/task.py
index 5f0f76800..c1f9ba2bc 100644
--- a/armory/metrics/task.py
+++ b/armory/metrics/task.py
@@ -286,6 +286,54 @@ def tpr_fpr(actual_conditions, predicted_conditions):
     )
 
 
+@populationwise
+def precision_and_recall(y, y_pred):
+    """
+    Produce a dictionary whose keys are class labels, and values are (precision, recall) for that class
+    """
+    # Assumes that every class is represented in y
+
+    C = confusion_matrix(y, y_pred, normalize=False)
+    # breakpoint()
+    N = C.shape[0]
+    D = {}
+    for class_ in range(N):
+        # precision: true positives / number of items identified as class_
+        tp = C[class_, class_]
+        total_selected = C[:, class_].sum()
+        precision = tp / total_selected
+
+        # recall: true positives / number of actual items in class_
+        total_class_ = C[class_, :].sum()
+        recall = tp / total_class_
+
+        D[class_] = (precision, recall)
+
+    return D
+
+
+@populationwise
+def confusion_matrix(y, y_pred, normalize=True):
+    """
+    Produce a matrix C such that C[i,j] is the percentage of class i that was classified as j
+    If normalize is False, C[i,j] is the actual number of such elements, rather than the percentage
+    """
+    # Assumes that every class is represented in y
+    y = np.array(y)
+    y_pred = np.array(y_pred)
+    if y_pred.ndim == 2:
+        y_pred = np.argmax(y_pred, axis=1)
+    N = len(np.unique(y))
+    C = np.zeros((N, N))
+    for i in range(N):
+        for j in range(N):
+            C[i, j] = np.sum(y_pred[y == i] == j)
+    if normalize:
+        sums = np.sum(C, axis=1)
+        C = C / sums[:, np.newaxis]
+    return C
+
+
 @batchwise
 def per_class_accuracy(y, y_pred):
     """

From d581625f90689d5550c8de04b6b7c0bb278ca6cb Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Thu, 2 Jun 2022 09:42:28 -0600
Subject: [PATCH 08/12] move tests from statistical to task

---
 tests/unit/test_statistical_metrics.py | 29 --------------------------
 tests/unit/test_task_metrics.py        | 29 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/tests/unit/test_statistical_metrics.py b/tests/unit/test_statistical_metrics.py
index cd17a9a14..2f4213ed0 100644
--- a/tests/unit/test_statistical_metrics.py
+++ b/tests/unit/test_statistical_metrics.py
@@ -11,35 +11,6 @@
 pytestmark = pytest.mark.unit
 
 
-def test_confusion_matrix():
-    y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
-    y_pred = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
-    assert statistical.confusion_matrix(y, y) == pytest.approx(
-        np.array([[1, 0], [0, 1]])
-    )
-    assert statistical.confusion_matrix(y, y_pred) == pytest.approx(
-        np.array([[0.6, 0.4], [0.2, 0.8]])
-    )
-    assert statistical.confusion_matrix(y, y_pred, normalize=False) == pytest.approx(
-        np.array([[3, 2], [1, 4]])
-    )
-
-
-def test_precision_and_recall():
-    y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
-    y_pred = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
-    D = statistical.precision_and_recall(y, y_pred)
-    assert D[0] == pytest.approx((0.75, 0.6))
-    assert D[1] == pytest.approx((0.66666667, 0.8))
-
-    y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])
-    y_pred = np.array([0, 0, 0, 0, 1, 1, 2, 1, 2, 2, 0, 1])
-    D = statistical.precision_and_recall(y, y_pred)
-    assert D[0] == pytest.approx((0.8, 1))
-    assert D[1] == pytest.approx((0.75, 0.75))
-    assert D[2] == pytest.approx((0.666666667, 0.5))
-
-
 def test_chi2_p_value():
 
     table1 = np.array([[2, 3], [4, 6]])
diff --git a/tests/unit/test_task_metrics.py b/tests/unit/test_task_metrics.py
index 5b680ccac..869cc14c3 100644
--- a/tests/unit/test_task_metrics.py
+++ b/tests/unit/test_task_metrics.py
@@ -12,6 +12,35 @@
 pytestmark = pytest.mark.unit
 
 
+def test_confusion_matrix():
+    y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+    y_pred = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
+    assert task.confusion_matrix(y, y) == pytest.approx(
+        np.array([[1, 0], [0, 1]])
+    )
+    assert task.confusion_matrix(y, y_pred) == pytest.approx(
+        np.array([[0.6, 0.4], [0.2, 0.8]])
+    )
+    assert task.confusion_matrix(y, y_pred, normalize=False) == pytest.approx(
+        np.array([[3, 2], [1, 4]])
+    )
+
+
+def test_precision_and_recall():
+    y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+    y_pred = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
+    D = task.precision_and_recall(y, y_pred)
+    assert D[0] == pytest.approx((0.75, 0.6))
+    assert D[1] == pytest.approx((0.66666667, 0.8))
+
+    y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])
+    y_pred = np.array([0, 0, 0, 0, 1, 1, 2, 1, 2, 2, 0, 1])
+    D = task.precision_and_recall(y, y_pred)
+    assert D[0] == pytest.approx((0.8, 1))
+    assert D[1] == pytest.approx((0.75, 0.75))
+    assert D[2] == pytest.approx((0.666666667, 0.5))
+
+
 @pytest.mark.docker_required
 @pytest.mark.pytorch_deepspeech
 @pytest.mark.slow

From 873a9d1df07f0d0150b96003d82751e0cc090cde Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Thu, 2 Jun 2022 11:35:09 -0600
Subject: [PATCH 09/12] rename function and update comments

---
 armory/metrics/task.py          | 18 +++++++++++-------
 tests/unit/test_task_metrics.py | 10 ++++------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/armory/metrics/task.py b/armory/metrics/task.py
index c1f9ba2bc..cf2e8fa6b 100644
--- a/armory/metrics/task.py
+++ b/armory/metrics/task.py
@@ -287,7 +287,7 @@ def tpr_fpr(actual_conditions, predicted_conditions):
 
 
 @populationwise
-def precision_and_recall(y, y_pred):
+def per_class_precision_and_recall(y, y_pred):
     """
     Produce a dictionary whose keys are class labels, and values are (precision, recall) for that class
     """
@@ -313,22 +313,26 @@ def precision_and_recall(y, y_pred):
 
 
 @populationwise
-def confusion_matrix(y, y_pred, normalize=True):
+def confusion_matrix(y, y_pred, normalize_rows=True):
     """
-    Produce a matrix C such that C[i,j] is the percentage of class i that was classified as j
-    If normalize is False, C[i,j] is the actual number of such elements, rather than the percentage
+    Produce a matrix C such that C[i,j] describes how often class i is classified as class j.
+    If normalize_rows is False, C[i,j] is the actual number of i's classified as j.
+    If normalize_rows is True (default), the rows are normalized in L1, so that C[i,j] is the percentage of class i that was marked as j.
     """
     # Assumes that every class is represented in y
+
     y = np.array(y)
     y_pred = np.array(y_pred)
-    if y_pred.ndim == 2:
+    if y_pred.ndim == 2:  # if y_pred is logits
         y_pred = np.argmax(y_pred, axis=1)
-    N = len(np.unique(y))
+    N = len(np.unique(y))  # number of classes
     C = np.zeros((N, N))
     for i in range(N):
         for j in range(N):
+            # count items of class i that were classified as j
             C[i, j] = np.sum(y_pred[y == i] == j)
-    if normalize:
+    if normalize_rows:
+        # divide rows by their sum so that each element is a percentage of class i, not a count
         sums = np.sum(C, axis=1)
         C = C / sums[:, np.newaxis]
     return C
diff --git a/tests/unit/test_task_metrics.py b/tests/unit/test_task_metrics.py
index 869cc14c3..010eaab43 100644
--- a/tests/unit/test_task_metrics.py
+++ b/tests/unit/test_task_metrics.py
@@ -15,9 +15,7 @@
 def test_confusion_matrix():
     y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
     y_pred = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
-    assert task.confusion_matrix(y, y) == pytest.approx(
-        np.array([[1, 0], [0, 1]])
-    )
+    assert task.confusion_matrix(y, y) == pytest.approx(np.array([[1, 0], [0, 1]]))
     assert task.confusion_matrix(y, y_pred) == pytest.approx(
         np.array([[0.6, 0.4], [0.2, 0.8]])
     )
@@ -26,16 +24,16 @@ def test_confusion_matrix():
     )
 
 
-def test_precision_and_recall():
+def test_per_class_precision_and_recall():
     y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
     y_pred = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 0])
-    D = task.precision_and_recall(y, y_pred)
+    D = task.per_class_precision_and_recall(y, y_pred)
     assert D[0] == pytest.approx((0.75, 0.6))
     assert D[1] == pytest.approx((0.66666667, 0.8))
 
     y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])
     y_pred = np.array([0, 0, 0, 0, 1, 1, 2, 1, 2, 2, 0, 1])
-    D = task.precision_and_recall(y, y_pred)
+    D = task.per_class_precision_and_recall(y, y_pred)
     assert D[0] == pytest.approx((0.8, 1))
     assert D[1] == pytest.approx((0.75, 0.75))
     assert D[2] == pytest.approx((0.666666667, 0.5))

From 8153642ca5321302dd51ed2c72ffb40256683876 Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Thu, 2 Jun 2022 14:01:38 -0600
Subject: [PATCH 10/12] additional name updates and consolidate repeated code

---
 armory/instrument/config.py     | 20 ++------------------
 armory/metrics/task.py          |  2 +-
 armory/utils/config_schema.json |  2 +-
 3 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/armory/instrument/config.py b/armory/instrument/config.py
index 784ec9fa3..4ccac7e2e 100644
--- a/armory/instrument/config.py
+++ b/armory/instrument/config.py
@@ -257,25 +257,9 @@ def _task_metric(
     elif name == "word_error_rate":
         final = metrics.get("total_wer")
         final_suffix = "total_word_error_rate"
-    elif name == "per_class_mean_accuracy":
+    elif name in ["per_class_mean_accuracy", "per_class_precision_and_recall", "confusion_matrix"]:
         metric = metrics.get("identity_unzip")
-        func = metrics.get("per_class_mean_accuracy")
-
-        def final(x):
-            return func(*metrics.task.identity_zip(x))
-
-        final_suffix = name
-    elif name == "confusion_matrix":
-        metric = metrics.get("identity_unzip")
-        func = metrics.get("confusion_matrix")
-
-        def final(x):
-            return func(*metrics.task.identity_zip(x))
-
-        final_suffix = name
-    elif name == "precision_and_recall":
-        metric = metrics.get("identity_unzip")
-        func = metrics.get("precision_and_recall")
+        func = metrics.get(name)
 
         def final(x):
             return func(*metrics.task.identity_zip(x))
diff --git a/armory/metrics/task.py b/armory/metrics/task.py
index cf2e8fa6b..e931c5e70 100644
--- a/armory/metrics/task.py
+++ b/armory/metrics/task.py
@@ -293,7 +293,7 @@ def per_class_precision_and_recall(y, y_pred):
     """
     # Assumes that every class is represented in y
 
-    C = confusion_matrix(y, y_pred, normalize=False)
+    C = confusion_matrix(y, y_pred, normalize_rows=False)
     # breakpoint()
     N = C.shape[0]
     D = {}
diff --git a/armory/utils/config_schema.json b/armory/utils/config_schema.json
index 20b7c7f2e..d6b50d907 100644
--- a/armory/utils/config_schema.json
+++ b/armory/utils/config_schema.json
@@ -264,7 +264,7 @@
                 "carla_od_true_positive_rate",
                 "per_class_mean_accuracy",
                 "confusion_matrix",
-                "precision_and_recall"
+                "per_class_precision_and_recall"
             ]
         },
         "sysconfig": {

From e34f632aebf7f8cbfa57fc02cb12975c224be3eb Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Thu, 2 Jun 2022 14:33:57 -0600
Subject: [PATCH 11/12] black

---
 armory/instrument/config.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/armory/instrument/config.py b/armory/instrument/config.py
index 4ccac7e2e..23b215fe3 100644
--- a/armory/instrument/config.py
+++ b/armory/instrument/config.py
@@ -257,7 +257,11 @@ def _task_metric(
     elif name == "word_error_rate":
         final = metrics.get("total_wer")
         final_suffix = "total_word_error_rate"
-    elif name in ["per_class_mean_accuracy", "per_class_precision_and_recall", "confusion_matrix"]:
+    elif name in [
+        "per_class_mean_accuracy",
+        "per_class_precision_and_recall",
+        "confusion_matrix",
+    ]:
         metric = metrics.get("identity_unzip")
         func = metrics.get(name)
 

From 07a38788651550770fd8ff021f5960998c15b4d5 Mon Sep 17 00:00:00 2001
From: Sterling Suggs <sterling.suggs@twosixtech.com>
Date: Fri, 3 Jun 2022 09:37:21 -0600
Subject: [PATCH 12/12] fix keyword argument

---
 tests/unit/test_task_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_task_metrics.py b/tests/unit/test_task_metrics.py
index 010eaab43..aaa01b48d 100644
--- a/tests/unit/test_task_metrics.py
+++ b/tests/unit/test_task_metrics.py
@@ -19,7 +19,7 @@ def test_confusion_matrix():
     assert task.confusion_matrix(y, y_pred) == pytest.approx(
         np.array([[0.6, 0.4], [0.2, 0.8]])
     )
-    assert task.confusion_matrix(y, y_pred, normalize=False) == pytest.approx(
+    assert task.confusion_matrix(y, y_pred, normalize_rows=False) == pytest.approx(
         np.array([[3, 2], [1, 4]])
     )