-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr.py
59 lines (51 loc) · 1.88 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python3
from os import system
from time import sleep
import cv2
import numpy as np
from behave import step
from PIL import Image
from pytesseract import image_to_string
# rescale code inspired by
# https://stackoverflow.com/questions/48311273/ocr-small-image-with-python
def rescale_image(filename):
basewidth = 3200
img = Image.open(filename)
wpercent = (basewidth/float(img.size[0]))
hsize = int((float(img.size[1])*float(wpercent)))
img = img.resize((basewidth,hsize), Image.ANTIALIAS)
img.save(filename)
# Tesseract's params https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc
#psm 4 = Assume a single column of text of variable sizes.
#oem 3 = Default, based on what is available.
def get_ocr_text(filename, config=r'--oem 3 --psm 12', rescale=False):
"""
This function will handle the core OCR processing of images.
"""
if rescale:
print('Rescaling')
rescale_image(filename)
# greyscale conversion
grayImage = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2GRAY)
# tresholding
_, img = cv2.threshold(grayImage, 127, 255, cv2.THRESH_BINARY)
kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
inverted = cv2.filter2D(cv2.bitwise_not(img), -1, kernel)
# cv2.imwrite('/tmp/grayScale.png', img)
# cv2.imwrite('/tmp/inverted.png', inverted)
original_text = image_to_string(img, lang='eng', config=config)
inverted_text = image_to_string(inverted, lang='eng', config=config)
return f'{original_text}\n{inverted_text}'
def get_screen_text():
"""
Takes a screenshot a tries to find a text on the screen
"""
IMG_LOCATION = '/tmp/ocr.png'
# screenshot
sleep(0.5)
system(f'gnome-screenshot -f {IMG_LOCATION}')
sleep(0.5)
# TODO make image attachable to result log
screen_text = get_ocr_text(IMG_LOCATION)
system(f'rm -f {IMG_LOCATION}')
return screen_text