Skip to content

Commit 01fbaed

Browse files
sebrasjulian-smith-artifex-com
authored andcommitted
Adapt pymupf test suite to text extraction behaviour in MuPDF upcoming 1.27.0.
1 parent bc65105 commit 01fbaed

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

tests/test_tables.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,12 @@ def test_2979():
184184
), f"{pymupdf.TOOLS.set_small_glyph_heights()=}"
185185

186186
wt = pymupdf.TOOLS.mupdf_warnings()
187-
if pymupdf.mupdf_version_tuple >= (1, 26, 0):
187+
if pymupdf.mupdf_version_tuple >= (1, 27, 0):
188+
assert (
189+
wt
190+
== "bogus font ascent/descent values (3117 / -2463)\n... repeated 2 times...\nActualtext with no position. Text may be lost or mispositioned.\n... repeated 96 times..."
191+
)
192+
elif pymupdf.mupdf_version_tuple >= (1, 26, 0):
188193
assert (
189194
wt
190195
== "bogus font ascent/descent values (3117 / -2463)\n... repeated 2 times..."

tests/test_textextract.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,11 @@ def test_3594():
338338
for line in text.split('\n'):
339339
print(f' {line!r}')
340340
print('='*40)
341+
wt = pymupdf.TOOLS.mupdf_warnings()
342+
if pymupdf.mupdf_version_tuple < (1, 27):
343+
assert not wt
344+
else:
345+
assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 2 times...'
341346

342347

343348
def test_3687():
@@ -885,6 +890,9 @@ def test_4546():
885890

886891
# This output is different from expected_1_23_5.
887892
expected_mupdf_1_26_1 = b'JOB No.: Shipper (complete name and address) \xe5\x8f\x91\xe8\xb4\xa7\xe4\xba\xba(\xe5\x90\x8d\xe7\xa7\xb0\xe5\x8f\x8a\xe5\x9c\xb0\xe5\x9d\x80) Tel: Fax: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81 \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88'.decode()
893+
894+
# This output is different from either of the two expected strings.
895+
expected_mupdf_1_27_0 = b'JOB No.: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81 \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88572702 \n \xe6\x93\x8d\xe4\xbd\x9c\xe9\x83\xa8: \n88570008 \n88570004 \n \xe6\x96\x87\xe4\xbb\xb6\xe9\x83\xa8: \n88570003\n \nNotify Party(complete name and address, '.decode()
888896

889897
print(f'expected_1_23_5\n{textwrap.indent(expected_1_23_5, " ")}')
890898
print(f'expected_mupdf_1_26_1\n{textwrap.indent(expected_mupdf_1_26_1, " ")}')
@@ -894,10 +902,16 @@ def test_4546():
894902
print(f'{text=}')
895903
print(f'{text.encode()=}')
896904

897-
if pymupdf.mupdf_version_tuple >= (1, 26, 1):
905+
wt = pymupdf.TOOLS.mupdf_warnings()
906+
if pymupdf.mupdf_version_tuple >= (1, 27, 0):
907+
assert text == expected_mupdf_1_27_0
908+
assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 120 times...'
909+
elif pymupdf.mupdf_version_tuple >= (1, 26, 1):
898910
assert text == expected_mupdf_1_26_1
911+
assert not wt
899912
else:
900913
print(f'No expected output for {pymupdf.mupdf_version_tuple=}')
914+
assert not wt
901915

902916

903917
def test_4503():

0 commit comments

Comments
 (0)