1
1
import numpy as np
2
2
import cv2
3
3
import pytesseract
4
- from datetime import datetime
5
4
import os
6
-
5
+ import base64
6
+ from datetime import datetime
7
+ from io import BytesIO
8
+ from PIL import Image
7
9
8
10
class FastMRZ :
9
11
def __init__ (self , tesseract_path = "" ):
@@ -89,6 +91,16 @@ def _format_date(self, input_date):
89
91
90
92
return formatted_date
91
93
94
+ def _get_birth_date (self , birth_date_str , expiry_date_str ):
95
+ birth_year = int (birth_date_str [:4 ])
96
+ expiry_year = int (expiry_date_str [:4 ])
97
+
98
+ if expiry_year > birth_year :
99
+ return birth_date_str
100
+ adjusted_year = birth_year - 100
101
+
102
+ return f"{ adjusted_year } -{ birth_date_str [5 :]} "
103
+
92
104
def _is_valid (self , image ):
93
105
if isinstance (image , str ):
94
106
return bool (os .path .isfile (image ))
@@ -103,23 +115,21 @@ def _get_mrz(self, image):
103
115
104
116
return self ._cleanse_roi (mrz_roi )
105
117
106
- def get_details (self , image , ignore_parse = False ):
118
+ def _base64_to_image_array (self , base64_string ):
119
+ image_data = base64 .b64decode (base64_string )
120
+ image_stream = BytesIO (image_data )
121
+ image = Image .open (image_stream )
122
+ image_array = np .array (image )
123
+
124
+ return image_array
125
+
126
+ def get_details_old (self , image , ignore_parse = False , include_checkdigit = True ):
107
127
if not self ._is_valid (image ):
108
128
return {"status" : "FAILURE" , "message" : "Invalid input image" }
109
129
mrz_text = self ._get_mrz (image )
110
130
111
131
return mrz_text if ignore_parse else self ._parse_mrz (mrz_text )
112
132
113
- def _get_birth_date (self , birth_date_str , expiry_date_str ):
114
- birth_year = int (birth_date_str [:4 ])
115
- expiry_year = int (expiry_date_str [:4 ])
116
-
117
- if expiry_year > birth_year :
118
- return birth_date_str
119
- adjusted_year = birth_year - 100
120
-
121
- return f"{ adjusted_year } -{ birth_date_str [5 :]} "
122
-
123
133
def _parse_mrz (self , mrz_text ):
124
134
if not mrz_text :
125
135
return {"status" : "FAILURE" , "message" : "No MRZ detected" }
@@ -149,22 +159,22 @@ def _parse_mrz(self, mrz_text):
149
159
mrz_code_dict ["document_number" ] = mrz_lines [1 ][:9 ].replace ("<" , "" )
150
160
mrz_code_dict ["document_number_checkdigit" ] = self ._get_check_digit (mrz_code_dict ["document_number" ])
151
161
if mrz_code_dict ["document_number_checkdigit" ] != mrz_lines [1 ][9 ]:
152
- return {"status" : "FAILURE" , "message" : "document number checksum is not matching" }
162
+ return {"status" : "FAILURE" , "message" : "Document number checksum is not matching" }
153
163
154
164
mrz_code_dict ["nationality_code" ] = mrz_lines [1 ][10 :13 ]
155
165
if not mrz_code_dict ["nationality_code" ].isalpha ():
156
166
return {"status" : "FAILURE" , "message" : "Invalid MRZ format" }
157
167
158
168
mrz_code_dict ["birth_date" ] = mrz_lines [1 ][13 :19 ]
159
169
if self ._get_check_digit (mrz_code_dict ["birth_date" ]) != mrz_lines [1 ][19 ]:
160
- return {"status" : "FAILURE" , "message" : "date of birth checksum is not matching" }
170
+ return {"status" : "FAILURE" , "message" : "Date of birth checksum is not matching" }
161
171
mrz_code_dict ["birth_date" ] = self ._format_date (mrz_code_dict ["birth_date" ])
162
172
163
173
mrz_code_dict ["sex" ] = mrz_lines [1 ][20 ]
164
174
165
175
mrz_code_dict ["expiry_date" ] = mrz_lines [1 ][21 :27 ]
166
176
if self ._get_check_digit (mrz_code_dict ["expiry_date" ]) != mrz_lines [1 ][27 ]:
167
- return {"status" : "FAILURE" , "message" : "date of expiry checksum is not matching" }
177
+ return {"status" : "FAILURE" , "message" : "Date of expiry checksum is not matching" }
168
178
mrz_code_dict ["expiry_date" ] = self ._format_date (mrz_code_dict ["expiry_date" ])
169
179
mrz_code_dict ["birth_date" ] = self ._get_birth_date (mrz_code_dict ["birth_date" ], mrz_code_dict ["expiry_date" ])
170
180
@@ -179,7 +189,7 @@ def _parse_mrz(self, mrz_text):
179
189
180
190
if (mrz_lines [1 ][- 1 ] != self ._get_final_check_digit (mrz_lines [1 ], mrz_code_dict ["mrz_type" ])
181
191
and mrz_code_dict ["mrz_type" ] not in ("MRVA" , "MRVB" )):
182
- return {"status" : "FAILURE" , "message" : "final checksum is not matching" }
192
+ return {"status" : "FAILURE" , "message" : "Final checksum is not matching" }
183
193
else :
184
194
mrz_code_dict ["mrz_type" ] = "TD1"
185
195
@@ -193,21 +203,21 @@ def _parse_mrz(self, mrz_text):
193
203
mrz_code_dict ["document_number" ] = mrz_lines [0 ][5 :14 ]
194
204
mrz_code_dict ["document_number_checkdigit" ] = self ._get_check_digit (mrz_code_dict ["document_number" ])
195
205
if mrz_code_dict ["document_number_checkdigit" ] != mrz_lines [0 ][14 ]:
196
- return {"status" : "FAILURE" , "message" : "document number checksum is not matching" }
206
+ return {"status" : "FAILURE" , "message" : "Document number checksum is not matching" }
197
207
198
208
mrz_code_dict ["optional_data_1" ] = mrz_lines [0 ][15 :].strip ("<" )
199
209
200
210
# Line 2
201
211
mrz_code_dict ["birth_date" ] = mrz_lines [1 ][:6 ]
202
212
if self ._get_check_digit (mrz_code_dict ["birth_date" ]) != mrz_lines [1 ][6 ]:
203
- return {"status" : "FAILURE" , "message" : "date of birth checksum is not matching" }
213
+ return {"status" : "FAILURE" , "message" : "Date of birth checksum is not matching" }
204
214
mrz_code_dict ["birth_date" ] = self ._format_date (mrz_code_dict ["birth_date" ])
205
215
206
216
mrz_code_dict ["sex" ] = mrz_lines [1 ][7 ]
207
217
208
218
mrz_code_dict ["expiry_date" ] = mrz_lines [1 ][8 :14 ]
209
219
if self ._get_check_digit (mrz_code_dict ["expiry_date" ]) != mrz_lines [1 ][14 ]:
210
- return {"status" : "FAILURE" , "message" : "date of expiry checksum is not matching" }
220
+ return {"status" : "FAILURE" , "message" : "Date of expiry checksum is not matching" }
211
221
mrz_code_dict ["expiry_date" ] = self ._format_date (mrz_code_dict ["expiry_date" ])
212
222
213
223
mrz_code_dict ["birth_date" ] = self ._get_birth_date (mrz_code_dict ["birth_date" ], mrz_code_dict ["expiry_date" ])
@@ -218,7 +228,7 @@ def _parse_mrz(self, mrz_text):
218
228
219
229
mrz_code_dict ["optional_data_2" ] = mrz_lines [0 ][18 :29 ].strip ("<" )
220
230
if mrz_lines [1 ][- 1 ] != self ._get_final_check_digit (mrz_lines , mrz_code_dict ["mrz_type" ]):
221
- return {"status" : "FAILURE" , "message" : "final checksum is not matching" }
231
+ return {"status" : "FAILURE" , "message" : "Final checksum is not matching" }
222
232
223
233
# Line 3
224
234
names = mrz_lines [2 ].split ("<<" )
@@ -232,7 +242,40 @@ def _parse_mrz(self, mrz_text):
232
242
233
243
return mrz_code_dict
234
244
235
- def get_details_mrz (self , mrz_text ):
245
+ def validate_mrz (self , mrz_text ):
236
246
mrz_text = self ._cleanse_roi (mrz_text )
237
247
238
- return self ._parse_mrz (mrz_text )
248
+ result = self ._parse_mrz (mrz_text )
249
+ if result .get ("status" ) == "SUCCESS" :
250
+ return {"is_valid" : True , "message" : "The given mrz is valid" }
251
+ else :
252
+ return {"is_valid" : False , "message" : result .get ("message" )}
253
+
254
+ def get_details (self , input_data , input_type = "imagepath" , ignore_parse = False , include_checkdigit = True ):
255
+ if input_type == "imagepath" :
256
+ if not self ._is_valid (input_data ):
257
+ return {"status" : "FAILURE" , "message" : "Invalid input image" }
258
+ mrz_text = self ._get_mrz (input_data )
259
+
260
+ return mrz_text if ignore_parse else self ._parse_mrz (mrz_text )
261
+ elif input_type == "numpy" :
262
+ # get_details_from_numpy(input_data, ignore_parse=False, include_checkdigit=True)
263
+ if isinstance (input_data , np .ndarray ):
264
+ image = input_data
265
+ else :
266
+ raise ValueError ("Input is not a valid NumPy array." )
267
+ pass
268
+ elif input_type == "base64" :
269
+ image_array = self ._base64_to_image_array (input_data )
270
+ mrz_text = self ._get_mrz (image_array )
271
+
272
+ return mrz_text if ignore_parse else self ._parse_mrz (mrz_text )
273
+ elif input_type == "pdf" :
274
+ # get_details_from_pdf(input_data, ignore_parse=False, include_checkdigit=True)
275
+ pass
276
+ elif input_type == "text" :
277
+ mrz_text = self ._cleanse_roi (input_data )
278
+
279
+ return mrz_text if ignore_parse else self ._parse_mrz (mrz_text )
280
+ else :
281
+ raise ValueError (f"Unsupported input_type: { input_type } " )
0 commit comments