Skip to content

Commit f35bb30

Browse files
authored
Merge pull request #96 from Marszzz1116/master
1、修复assert_ocr_not_exist断言Bug2、扩展assert_ocr_exist 与 assert_ocr_not_exist断言3、优化OCR识别范围筛选方法
2 parents 9b96e97 + 6835de3 commit f35bb30

File tree

2 files changed

+173
-54
lines changed

2 files changed

+173
-54
lines changed

src/assert_common.py

+121-15
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from src.ocr_utils import OCRUtils as OCR
1515
from src.image_utils import ImageUtils
16+
from src.mouse_key import MouseKey
1617
from src.filectl import FileCtl
1718
from src.dogtail_utils import DogtailUtils
1819
from src.custom_exception import TemplateElementNotFound
@@ -398,12 +399,11 @@ def assert_ocr_exist(
398399
timeout: [int, float] = None,
399400
max_match_number: int = None,
400401
mode: str = "all",
402+
bbox: dict = None,
401403
):
402404
"""
403405
断言文案存在
404-
:param args:
405-
目标字符,识别一个字符串或多个字符串,并返回其在图片中的坐标;
406-
如果不传参,返回图片中识别到的所有字符串。
406+
:param args: 目标字符,识别一个字符串或多个字符串。
407407
:param picture_abspath: 要识别的图片路径,如果不传默认截取全屏识别。
408408
:param similarity: 匹配度。
409409
:param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。
@@ -413,10 +413,54 @@ def assert_ocr_exist(
413413
:param timeout: 最大匹配超时,单位秒
414414
:param max_match_number: 最大匹配次数
415415
:param mode: "all" or "any",all 表示识别所有目标字符,any 表示识别任意一个目标字符,默认值为 all
416+
:param bbox:
417+
接收一个字典,包含一个区域,在区域内进行识别,用于干扰较大时提升OCR识别精准度
418+
字典字段:
419+
start_x: 开始 x 坐标(左上角)
420+
start_y: 开始 y 坐标(左上角)
421+
w: 宽度
422+
h: 高度
423+
end_x: 结束 x 坐标(右下角)
424+
end_y: 结束 y 坐标(右下角)
425+
注意 : end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组
426+
示例:
427+
{start_x=0, start_y=0, w=100, h=100}
428+
{start_x=0, start_y=0, end_x=100, end_y=100}
416429
"""
430+
431+
if len(args) == 0:
432+
raise ValueError("缺少 ocr 断言关键字")
433+
417434
pic = None
418435
if picture_abspath is not None:
419436
pic = picture_abspath + ".png"
437+
438+
resolution = MouseKey.screen_size()
439+
if bbox is not None:
440+
start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None
441+
start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None
442+
w = bbox.get("w") if bbox.get("w") is not None else None
443+
h = bbox.get("h") if bbox.get("h") is not None else None
444+
end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None
445+
end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None
446+
447+
if start_x is None or start_y is None:
448+
raise ValueError("缺失 start_x 或 start_y 坐标")
449+
450+
wh_provided = w is not None and h is not None
451+
end_xy_provided = end_x is not None and end_y is not None
452+
453+
if not (wh_provided ^ end_xy_provided):
454+
raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组")
455+
456+
if end_xy_provided:
457+
w = end_x - start_x
458+
h = end_y - start_y
459+
picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h)
460+
pic = picture_abspath + ".png"
461+
462+
resolution = f"{start_x, start_y} -> {w, h}"
463+
420464
res = OCR.ocr(
421465
*args,
422466
picture_abspath=pic,
@@ -430,7 +474,10 @@ def assert_ocr_exist(
430474
)
431475
if res is False:
432476
raise AssertionError(
433-
(f"通过OCR未识别到:{args}", f"{pic if pic else GlobalConfig.SCREEN_CACHE}")
477+
(
478+
f"通过OCR在范围[{resolution}]未识别到:{args}",
479+
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
480+
)
434481
)
435482
if isinstance(res, tuple):
436483
pass
@@ -440,14 +487,14 @@ def assert_ocr_exist(
440487
res = filter(lambda x: x[1] is False, res.items())
441488
raise AssertionError(
442489
(
443-
f"通过OCR未识别到{dict(res)}",
490+
f"通过OCR在范围[{resolution}]未识别到{dict(res)}",
444491
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
445492
)
446493
)
447494
elif mode == "any" and len(res) == list(res.values()).count(False):
448495
raise AssertionError(
449496
(
450-
f"通过OCR未识别到{args}中的任意一个",
497+
f"通过OCR在范围[{resolution}]未识别到{args}中的任意一个",
451498
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
452499
)
453500
)
@@ -463,11 +510,67 @@ def assert_ocr_not_exist(
463510
pause: [int, float] = None,
464511
timeout: [int, float] = None,
465512
max_match_number: int = None,
513+
bbox: dict = None,
466514
):
467-
"""断言文案不存在"""
515+
"""
516+
断言文案不存在
517+
:param args: 目标字符,识别一个字符串或多个字符串。
518+
:param picture_abspath: 要识别的图片路径,如果不传默认截取全屏识别。
519+
:param similarity: 匹配度。
520+
:param return_first: 只返回第一个,默认为 False,返回识别到的所有数据。
521+
:param lang: `ch`, `en`, `fr`, `german`, `korean`, `japan`
522+
:param network_retry: 连接服务器重试次数
523+
:param pause: 重试间隔时间,单位秒
524+
:param timeout: 最大匹配超时,单位秒
525+
:param max_match_number: 最大匹配次数
526+
:param bbox:
527+
接收一个字典,包含一个区域,在区域内进行识别,用于干扰较大时提升OCR识别精准度
528+
字典字段:
529+
start_x: 开始 x 坐标(左上角)
530+
start_y: 开始 y 坐标(左上角)
531+
w: 宽度
532+
h: 高度
533+
end_x: 结束 x 坐标(右下角)
534+
end_y: 结束 y 坐标(右下角)
535+
注意 : end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组
536+
示例:
537+
{start_x=0, start_y=0, w=100, h=100}
538+
{start_x=0, start_y=0, end_x=100, end_y=100}
539+
"""
540+
541+
if len(args) == 0:
542+
raise ValueError("缺少 ocr 断言关键字")
543+
468544
pic = None
469545
if picture_abspath is not None:
470546
pic = picture_abspath + ".png"
547+
548+
resolution = MouseKey.screen_size()
549+
if bbox is not None:
550+
start_x = bbox.get("start_x") if bbox.get("start_x") is not None else None
551+
start_y = bbox.get("start_y") if bbox.get("start_y") is not None else None
552+
w = bbox.get("w") if bbox.get("w") is not None else None
553+
h = bbox.get("h") if bbox.get("h") is not None else None
554+
end_x = bbox.get("end_x") if bbox.get("end_x") is not None else None
555+
end_y = bbox.get("end_y") if bbox.get("end_y") is not None else None
556+
557+
if start_x is None or start_y is None:
558+
raise ValueError("缺失 start_x 或 start_y 坐标")
559+
560+
wh_provided = w is not None and h is not None
561+
end_xy_provided = end_x is not None and end_y is not None
562+
563+
if not (wh_provided ^ end_xy_provided):
564+
raise ValueError("end_x + end_y 与 w + h 为互斥关系, 必须且只能传入其中一组")
565+
566+
if end_xy_provided:
567+
w = end_x - start_x
568+
h = end_y - start_y
569+
picture_abspath = ImageUtils.save_temporary_picture(start_x, start_y, w, h)
570+
pic = picture_abspath + ".png"
571+
572+
resolution = f"{start_x, start_y} -> {w, h}"
573+
471574
res = OCR.ocr(
472575
*args,
473576
picture_abspath=pic,
@@ -484,15 +587,18 @@ def assert_ocr_not_exist(
484587
elif isinstance(res, tuple):
485588
raise AssertionError(
486589
(
487-
f"通过ocr识别到不应存在的文案 {res}",
590+
f"通过ocr在范围[{resolution}]识别到不应存在的文案 {res}",
488591
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
489592
)
490593
)
491-
elif isinstance(res, dict) and True in res.values():
492-
res = filter(lambda x: x[1] is not False, res.items())
493-
raise AssertionError(
494-
(
495-
f"通过OCR识别到不应存在的文案:{dict(res)}",
496-
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
594+
elif isinstance(res, dict):
595+
if all(value is False for value in res.values()):
596+
pass
597+
else:
598+
res = filter(lambda x: x[1] is not False, res.items())
599+
raise AssertionError(
600+
(
601+
f"通过OCR在范围[{resolution}]识别到不应存在的文案:{dict(res)}",
602+
f"{pic if pic else GlobalConfig.SCREEN_CACHE}",
603+
)
497604
)
498-
)

src/ocr_utils.py

+52-39
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,17 @@ def ocr(cls, *args, **kwargs):
4141

4242
@classmethod
4343
def ocr_remote(
44-
cls,
45-
target_strings: tuple = None,
46-
picture_abspath: str = None,
47-
similarity: [int, float] = 0.6,
48-
return_default: bool = False,
49-
return_first: bool = False,
50-
lang: str = "ch",
51-
network_retry: int = None,
52-
pause: [int, float] = None,
53-
timeout: [int, float] = None,
54-
max_match_number: int = None,
44+
cls,
45+
target_strings: tuple = None,
46+
picture_abspath: str = None,
47+
similarity: [int, float] = 0.6,
48+
return_default: bool = False,
49+
return_first: bool = False,
50+
lang: str = "ch",
51+
network_retry: int = None,
52+
pause: [int, float] = None,
53+
timeout: [int, float] = None,
54+
max_match_number: int = None,
5555
):
5656
servers = cls.ocr_servers
5757
while servers:
@@ -96,20 +96,23 @@ def _check_xy(cls):
9696
@classmethod
9797
def click(cls):
9898
from src.mouse_key import MouseKey
99+
99100
cls._check_xy()
100101
MouseKey.click(cls.x, cls.y)
101102
return cls
102103

103104
@classmethod
104105
def right_click(cls):
105106
from src.mouse_key import MouseKey
107+
106108
cls._check_xy()
107109
MouseKey.right_click(cls.x, cls.y)
108110
return cls
109111

110112
@classmethod
111113
def double_click(cls):
112114
from src.mouse_key import MouseKey
115+
113116
cls._check_xy()
114117
MouseKey.double_click(cls.x, cls.y)
115118
return cls
@@ -126,41 +129,51 @@ def all_result(cls):
126129
@classmethod
127130
def ocr_find_by_range(cls, text, x1=None, x2=None, y1=None, y2=None):
128131
"""
129-
OCR在界面中识别到多个关键词时,通过区域筛选出对应关键词并返回坐标
130-
:param text: 页面查找关键词
131-
:param x1: x坐标开始范围
132-
:param x2: x坐标结束范围
133-
:param y1: y坐标开始范围
134-
:param y2: y坐标结束范围
132+
OCR在当前界面中识别到多个关键词时,通过区域筛选出对应关键词并返回坐标
133+
:param text: 页面范围内查找关键词,可自由使用以下参数划定查找区域
134+
:param x1: x坐标开始范围,有效区域为大于 x1 区域
135+
:param x2: x坐标结束范围,有效区域为小于 x1 区域
136+
:param y1: y坐标开始范围,有效区域为大于 y1 区域
137+
:param y2: y坐标结束范围,有效区域为小于 y2 区域
135138
:return: 坐标元组 (x, y)
136139
137-
注意:需要特定区域内只有一组OCR关键词,若任有多组请增加精度,否则默认返回第一组符合条件的关键词坐标
140+
注意:该方法设计是为了筛选出唯一坐标,所以需要特定区域内只有一组OCR关键词,若任有多组数据会直接报错,请增加精度
138141
139142
以默认分辨率 1920*1080 为例,多种示例情况如下:
140-
示例1(识别左半屏幕关键字):ocr_find_by_range(x1=960)
143+
示例1(识别左半屏幕关键字):ocr_find_by_range(x2=960)
141144
示例2(识别下半屏幕关键字):ocr_find_by_range(y1=540)
142-
示例3(识别左半屏幕-上半屏关键字):ocr_find_by_range(x1=960, y1=540)
143-
示例4(识别特定区域 :100*900-200*950 内关键字):ocr_find_by_range(x1=100, x2=200, y1=900, y2=950)
145+
示例3(识别右下半屏幕关键字):ocr_find_by_range(x1=960, y1=540)
146+
示例4(识别特定区域 :100*900-200*950 内关键字):ocr_find_by_range(x1=100, y1=900, x2=200, y2=950)
144147
"""
145-
defaults = {
146-
'x1': 0,
147-
'x2': 1920,
148-
'y1': 0,
149-
'y2': 1080
150-
}
151-
152-
x1 = x1 if x1 is not None else defaults['x1']
153-
x2 = x2 if x2 is not None else defaults['x2']
154-
y1 = y1 if y1 is not None else defaults['y1']
155-
y2 = y2 if y2 is not None else defaults['y2']
148+
defaults = {"x1": 0, "x2": 1920, "y1": 0, "y2": 1080}
149+
150+
x1 = x1 if x1 is not None else defaults["x1"]
151+
x2 = x2 if x2 is not None else defaults["x2"]
152+
y1 = y1 if y1 is not None else defaults["y1"]
153+
y2 = y2 if y2 is not None else defaults["y2"]
156154

155+
if x1 > x2 or y1 > y2:
156+
raise ValueError("x1 > x2 or y1 > y2")
157+
158+
results = []
157159
ocr_return = cls.ocr(text)
158160
if isinstance(ocr_return, dict):
159-
for key, value in ocr_return.items():
161+
for _, value in ocr_return.items():
160162
if x1 <= value[0] <= x2 and y1 <= value[1] <= y2:
161-
return value
162-
return ocr_return
163-
164-
165-
if __name__ == '__main__':
166-
OCRUtils.ocrx().click()
163+
results.append(value)
164+
if len(results) == 0:
165+
raise ValueError(
166+
f"范围内[{x1, y1} - {x2, y2}]未识别到关键词[{text}],请增加识别精度,识别结果为:{results}"
167+
)
168+
elif len(results) == 1:
169+
return results[0]
170+
else:
171+
raise ValueError(
172+
f"范围内[{x1, y1} - {x2, y2}]识别到多组关键词[{text}],请增加识别精度,识别结果为:{results}"
173+
)
174+
if x1 <= ocr_return[0] <= x2 and y1 <= ocr_return[1] <= y2:
175+
return ocr_return
176+
else:
177+
raise ValueError(
178+
f"范围内[{x1, y1} - {x2, y2}]识别不到关键词,请增加识别精度,识别结果为:{ocr_return}"
179+
)

0 commit comments

Comments
 (0)