updated core logic

sivakumar-mahalingam · sivakumar-mahalingam · commit ee9158fd5dd8 · 2025-01-06T23:29:08.000+04:00
diff --git a/README.md b/README.md
@@ -28,12 +28,12 @@ This repository extracts the Machine Readable Zone (MRZ) from document images. T
 ![NumPy](https://img.shields.io/badge/numpy-316192?style=for-the-badge&logo=numpy&logoColor=white)
 ![ONNX](https://img.shields.io/badge/ONNX-7B7B7B?style=for-the-badge&logo=onnx&logoColor=white)
 
-## ⚙️Installation
-
+## 🚨Prerequisites
+- Install [Tesseract OCR](https://tesseract-ocr.github.io/tessdoc/Installation.html) engine. And set `PATH` variable with the executable and ensure that tesseract can be reached from the command line. 
 
-1. Install [Tesseract OCR](https://tesseract-ocr.github.io/tessdoc/Installation.html) engine. And set `PATH` variable with the executable. 
+## ⚙️Installation
 
-2. Install `fastmrz`
+1. Install `fastmrz`
     ```bash
     pip install fastmrz
     ```
@@ -44,7 +44,7 @@ This repository extracts the Machine Readable Zone (MRZ) from document images. T
      conda activate fastmrz
      ```
 
-3. Copy  the `mrz.traineddata` file from the `tessdata` folder of the [repository](https://github.com/sivakumar-mahalingam/fastmrz/raw/main/tessdata/mrz.traineddata) into the `tessdata` folder of the Tesseract installation on **YOUR MACHINE**
+2. Copy  the `mrz.traineddata` file from the `tessdata` folder of the [repository](https://github.com/sivakumar-mahalingam/fastmrz/raw/main/tessdata/mrz.traineddata) into the `tessdata` folder of the Tesseract installation on **YOUR MACHINE**
 
 ## 💡Example
 
@@ -114,7 +114,7 @@ Now, based on the example of a national passport, let us take a closer look at t
 
 ![MRZ GIF](https://raw.githubusercontent.com/sivakumar-mahalingam/fastmrz/main/docs/mrz.gif)
 
-## 🗹ToDo
+## ✅ToDo
 
 - [x] Include mrva and mrvb documents
 - [x] Add wiki page
@@ -130,6 +130,7 @@ Now, based on the example of a national passport, let us take a closer look at t
 - [x] Add function - validate_mrz
 - [ ] Add function - generate_mrz
 - [ ] Extract face image
+- [ ] Add documentation page
 
 ## ⚖️License
 
diff --git a/fastmrz/fastmrz.py b/fastmrz/fastmrz.py
@@ -31,15 +31,6 @@ def _get_roi(self, output_data, image_path):
             pytesseract.pytesseract.tesseract_cmd = self.tesseract_path
         image = cv2.imread(image_path, cv2.IMREAD_COLOR) if isinstance(image_path, str) else image_path
 
-        # Add preprocessing steps
-        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        # Increase contrast
-        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-        gray = clahe.apply(gray)
-
-        # Denoise
-        denoised = cv2.fastNlMeansDenoising(gray)
-
         output_data = (output_data[0, :, :, 0] > 0.25) * 1
         output_data = np.uint8(output_data * 255)
         altered_image = cv2.resize(output_data, (image.shape[1], image.shape[0]))
@@ -58,18 +49,14 @@ def _get_roi(self, output_data, image_path):
         x, y, w, h = cv2.boundingRect(contours[np.argmax(c_area)])
 
         # Add padding to the ROI
-        padding = 10  # Adjust this value as needed
+        padding = 10
         x_start = max(0, x - padding)
         y_start = max(0, y - padding)
         x_end = min(image.shape[1], x + w + padding)
         y_end = min(image.shape[0], y + h + padding)
 
         roi_arr = image[y_start:y_end, x_start:x_end].copy()
 
-        # Optional: Show the ROI for debugging
-        # cv2.imshow("ROI", roi_arr)
-        # cv2.waitKey(0)
-
         # Apply additional preprocessing to ROI before OCR
         roi_gray = cv2.cvtColor(roi_arr, cv2.COLOR_BGR2GRAY)
         roi_thresh = cv2.threshold(roi_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
@@ -283,7 +270,12 @@ def get_details(self, input_data, input_type="imagepath", ignore_parse=False, in
         if input_type == "imagepath":
             if not self._is_valid(input_data):
                 return {"status": "FAILURE", "message": "Invalid input image"}
-            mrz_text = self._get_mrz(input_data)
+            image_file = open(input_data, "rb")
+            image_data = image_file.read()
+            image_file.close()
+            base64_string = base64.b64encode(image_data).decode("utf-8")
+            image_array = self._base64_to_image_array(base64_string)
+            mrz_text = self._get_mrz(image_array)
 
             return mrz_text if ignore_parse else self._parse_mrz(mrz_text)
         elif input_type == "numpy":