-
Notifications
You must be signed in to change notification settings - Fork 518
/
Copy pathOCR.py
36 lines (27 loc) · 1.31 KB
/
OCR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from PIL import Image
import pytesseract
from IPython.display import display
from io import BytesIO
from ipywidgets import FileUpload
# Set the path to the Tesseract executable if necessary
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Update this if needed
# Function to handle file upload and extract text
def upload_and_extract_text():
uploader = FileUpload(accept='image/*', multiple=False)
display(uploader)
def on_upload_change(change):
if uploader.value:
# Get the uploaded file (the first file in the dictionary)
file_info = next(iter(uploader.value.values())) # Access the first uploaded file
# Use BytesIO to open the image from the content
img = Image.open(BytesIO(file_info['content'])) # Convert bytes to an image
# Display the uploaded image
display(img) # Display the image directly
# Use pytesseract to extract text
extracted_text = pytesseract.image_to_string(img)
# Debugging output
print("Extracted Text:")
print(extracted_text if extracted_text else "No text found.")
uploader.observe(on_upload_change, names='value')
# Run the function
upload_and_extract_text()