|
1 |
| -import numpy as np |
2 | 1 | from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 2 | +import torch |
| 3 | +import numpy as np |
3 | 4 |
|
4 | 5 |
|
5 | 6 | class ImageOCR:
|
6 | 7 | def __init__(self, model="microsoft/trocr-base-printed"):
|
7 | 8 | self.processor = TrOCRProcessor.from_pretrained(model)
|
8 | 9 | self.model = VisionEncoderDecoderModel.from_pretrained(model)
|
9 | 10 |
|
10 |
| - def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, str]: |
11 |
| - digitized: dict[str, str] = {} |
| 11 | + def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str, float]]: |
| 12 | + digitized: dict[str, tuple[str, float]] = {} |
12 | 13 | for label, image in segments.items():
|
13 | 14 | if image is None:
|
14 | 15 | continue
|
15 | 16 |
|
16 | 17 | pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
|
17 | 18 |
|
18 |
| - generated_ids = self.model.generate(pixel_values) |
19 |
| - generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True) |
20 |
| - digitized[label] = generated_text[0] |
| 19 | + with torch.no_grad(): |
| 20 | + outputs = self.model.generate(pixel_values, output_scores=True, return_dict_in_generate=True) |
| 21 | + |
| 22 | + generated_text = self.processor.batch_decode(outputs.sequences, skip_special_tokens=True)[0] |
| 23 | + |
| 24 | + # Calculate confidence score |
| 25 | + confidence = self.calculate_confidence(outputs) |
| 26 | + |
| 27 | + digitized[label] = (generated_text, confidence) |
21 | 28 |
|
22 | 29 | return digitized
|
| 30 | + |
| 31 | + def calculate_confidence(self, outputs): |
| 32 | + probs = torch.softmax(outputs.scores[0], dim=-1) |
| 33 | + max_probs = torch.max(probs, dim=-1).values |
| 34 | + |
| 35 | + # Calculate the average confidence |
| 36 | + avg_confidence = torch.mean(max_probs).item() |
| 37 | + |
| 38 | + # Convert to percentage |
| 39 | + confidence_percentage = avg_confidence * 100 |
| 40 | + |
| 41 | + return confidence_percentage |
0 commit comments