added confidence scores to OCR (#159)

arinkulshi-skylight · arinkulshi · web-flow · commit 8de679f96cb6 · 2024-08-06T17:27:29.000Z
* added confidence scores to OCR

* edited tests to reflect addition of confidence score

---------

Co-authored-by: Arindam Kulshi &lt;akulshi04@gmail.com&gt;
diff --git a/OCR/ocr/main.py b/OCR/ocr/main.py
@@ -35,9 +35,9 @@ def main():
     ocr = ImageOCR()
     values = ocr.image_to_text(segments=segments)
 
-    print("{:<20} {:<20}".format("Label", "Text"))
-    for label, text in values.items():
-        print("{:<20} {:<20}".format(label, text))
+    print("{:<20} {:<20} {:<20}".format("Label", "Text", "Confidence"))
+    for label, (text, confidence) in values.items():
+        print("{:<20} {:<20} {:<20.2f}".format(label, text, confidence))
 
 
 if __name__ == "__main__":
diff --git a/OCR/ocr/services/image_ocr.py b/OCR/ocr/services/image_ocr.py
@@ -1,22 +1,41 @@
-import numpy as np
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import torch
+import numpy as np
 
 
 class ImageOCR:
     def __init__(self, model="microsoft/trocr-base-printed"):
         self.processor = TrOCRProcessor.from_pretrained(model)
         self.model = VisionEncoderDecoderModel.from_pretrained(model)
 
-    def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, str]:
-        digitized: dict[str, str] = {}
+    def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str, float]]:
+        digitized: dict[str, tuple[str, float]] = {}
         for label, image in segments.items():
             if image is None:
                 continue
 
             pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
 
-            generated_ids = self.model.generate(pixel_values)
-            generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)
-            digitized[label] = generated_text[0]
+            with torch.no_grad():
+                outputs = self.model.generate(pixel_values, output_scores=True, return_dict_in_generate=True)
+
+            generated_text = self.processor.batch_decode(outputs.sequences, skip_special_tokens=True)[0]
+
+            # Calculate confidence score
+            confidence = self.calculate_confidence(outputs)
+
+            digitized[label] = (generated_text, confidence)
 
         return digitized
+
+    def calculate_confidence(self, outputs):
+        probs = torch.softmax(outputs.scores[0], dim=-1)
+        max_probs = torch.max(probs, dim=-1).values
+
+        # Calculate the average confidence
+        avg_confidence = torch.mean(max_probs).item()
+
+        # Convert to percentage
+        confidence_percentage = avg_confidence * 100
+
+        return confidence_percentage
diff --git a/OCR/tests/ocr_test.py b/OCR/tests/ocr_test.py
@@ -27,8 +27,11 @@ def test_ocr_printed(self):
 
         results = ocr.image_to_text(segmenter.segment())
 
-        assert results["nbs_patient_id"] == "SIENNA HAMPTON"
-        assert results["nbs_cas_id"] == "123555"
+        patient_id, patient_confidence = results["nbs_patient_id"]
+        cas_id, cas_confidence = results["nbs_cas_id"]
+
+        assert patient_id == "SIENNA HAMPTON"
+        assert cas_id == "123555"
 
     def test_ocr_handwritten(self):
         segmenter = ImageSegmenter(
@@ -41,5 +44,27 @@ def test_ocr_handwritten(self):
 
         results = ocr.image_to_text(segmenter.segment())
 
-        assert results["nbs_patient_id"] == "Harry Potter"
-        assert results["nbs_cas_id"] == "123695"
+        patient_id, patient_confidence = results["nbs_patient_id"]
+        cas_id, cas_confidence = results["nbs_cas_id"]
+
+        assert patient_id == "Harry Potter"
+        assert cas_id == "123695"
+
+    def test_confidence_values_returned(self):
+        segmenter = ImageSegmenter(
+            raw_image,
+            segmentation_template,
+            labels_path,
+            segmentation_function=segment_by_color_bounding_box,
+        )
+        ocr = ImageOCR()
+
+        results = ocr.image_to_text(segmenter.segment())
+
+        patient_id, patient_confidence = results["nbs_patient_id"]
+        cas_id, cas_confidence = results["nbs_cas_id"]
+
+        assert isinstance(patient_confidence, float)
+        assert isinstance(cas_confidence, float)
+        assert patient_confidence > 0
+        assert cas_confidence > 0