diff --git a/pygexml/page.py b/pygexml/page.py index e95c57b..116b1ce 100644 --- a/pygexml/page.py +++ b/pygexml/page.py @@ -3,7 +3,7 @@ from warnings import warn from dataclasses import dataclass from dataclasses_json import DataClassJsonMixin -from typing import ClassVar +from typing import ClassVar, TypeAlias from collections.abc import Iterable from lxml import etree from lxml.etree import _Element as Element, QName @@ -83,7 +83,7 @@ def __str__(self) -> str: return " ".join(str(p) for p in self.polygon.points) -type ID = str +ID: TypeAlias = str @dataclass diff --git a/test/test_page.py b/test/test_page.py index c5f4fd9..23745e0 100644 --- a/test/test_page.py +++ b/test/test_page.py @@ -208,6 +208,11 @@ def test_textline_words(tl: TextLine) -> None: assert tl.words() == tl.text.split() +def test_textline_serialization_roundtrip() -> None: + tl = TextLine(id="tl-id", coords=Coords.parse("1,2 3,4"), text="foo bar") + assert TextLine.from_dict(tl.to_dict()) == tl + + ####### Tests for TextRegion ############### @@ -354,6 +359,17 @@ def test_textregion_all_arbitrary_text_and_words(region: TextRegion) -> None: ] +def test_textregion_serialization_roundtrip() -> None: + tr = TextRegion( + id="tr-id", + coords=Coords.parse("1,2 3,4"), + textlines={ + "tl-1": TextLine(id="tl-1", coords=Coords.parse("1,2 3,4"), text="foo") + }, + ) + assert TextRegion.from_dict(tr.to_dict()) == tr + + ############### Tests for Page #################### @@ -787,3 +803,21 @@ def test_page_all_arbitrary_text_and_words(page: Page) -> None: assert list(page.all_words()) == [ w for r in page.regions.values() for w in r.all_words() ] + + +def test_page_serialization_roundtrip() -> None: + pa = Page( + image_filename="a.jpg", + regions={ + "tr-1": TextRegion( + id="tr-1", + coords=Coords.parse("1,2 3,4"), + textlines={ + "tl-1": TextLine( + id="tl-1", coords=Coords.parse("1,2 3,4"), text="foo" + ) + }, + ) + }, + ) + assert Page.from_dict(pa.to_dict()) == pa