villagecomputing · Codyk12 · Apr 18, 2024 · Apr 19, 2024 · Apr 29, 2024 · May 1, 2024
diff --git a/superpipe/grid_search.py b/superpipe/grid_search.py
@@ -115,6 +115,8 @@ def run(self, df: pd.DataFrame, output_dir=None, verbose=False, styled=True):
             result = {
                 **GridSearch._flatten_params_dict(params),
                 'score': self.pipeline.score,
+                'lables': self.pipeline.labels,
+                'Confusion_matrix': self.pipeline.cm,
                 'input_cost': self.pipeline.statistics.input_cost,
                 'output_cost': self.pipeline.statistics.output_cost,
                 'total_latency': self.pipeline.statistics.total_latency,

diff --git a/superpipe/pipeline.py b/superpipe/pipeline.py
@@ -7,11 +7,13 @@
 from prettytable import PrettyTable
 from superpipe.steps import Step, LLMStep, LLMStructuredStep, LLMStructuredCompositeStep
 from superpipe.config import is_dev, studio_enabled
+from sklearn.metrics import confusion_matrix
 
 
 @dataclass
 class PipelineStatistics:
     score: Optional[float] = None
+    cm = None
     input_tokens: dict = field(default_factory=lambda: defaultdict(int))
     output_tokens: dict = field(default_factory=lambda: defaultdict(int))
     input_cost: float = 0.0
@@ -68,6 +70,8 @@ def __init__(self,
         self.output_fields = output_fields or steps[-1].output_fields()
         self.data = None
         self.score = None
+        self.labels = None
+        self.cm = None
         self.name = name or self.__class__.__name__
         self.statistics = PipelineStatistics()
 
@@ -138,6 +142,10 @@ def evaluate(self, evaluation_fn=None):
         results = self.data.apply(lambda row: evaluation_fn(row), axis=1)
         self.data[f"__{evaluation_fn.__name__}__"] = results
         self.score = results.sum() / len(results)
+        labels = self.data.label.str.lower()
+        predicts = self.data.predict.str.lower()
+        self.labels = sorted(list(set(labels).union(predicts)))
+        self.cm = confusion_matrix(labels, predicts)
         return self.score
 
     def _aggregate_statistics(self, data: Union[pd.DataFrame, Dict]):