Skip to content

Commit

Permalink
Merge pull request #21 from manga109/for_custom_tag
Browse files Browse the repository at this point in the history
add try & except for custom tag
  • Loading branch information
ku21fan authored Oct 14, 2020
2 parents 0427aad + 11bc24b commit bf6a5ce
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 25 deletions.
17 changes: 11 additions & 6 deletions manga109api/manga109api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def __init__(self, root_dir):
with (self.root_dir / "books.txt").open("rt", encoding='utf-8') as f:
self.books = [line.rstrip() for line in f]


def get_annotation(self, book, annotation_type="annotations", separate_by_tag=True):
"""
Given a book title, return its annotations as a dict.
Expand Down Expand Up @@ -62,15 +61,18 @@ def formatted_dict(d):
input: {"index": "5", "title": "a"}
output: {"@index": 5, "@title": "a"}
"""
return dict([("@"+k, int_literals_to_int(v)) for k, v in d.items()])
return dict([("@" + k, int_literals_to_int(v)) for k, v in d.items()])

with (self.root_dir / annotation_type / (book + ".xml")).open("rt", encoding='utf-8') as f:
xml = ET.parse(f).getroot()
annotation = {"title" : xml.attrib["title"]}
annotation = {"title": xml.attrib["title"]}

characters = []
for t in xml.find("characters"):
characters.append(formatted_dict(t.attrib))
try:
for t in xml.find("characters"):
characters.append(formatted_dict(t.attrib))
except:
pass
annotation["character"] = characters

pages = []
Expand All @@ -90,7 +92,10 @@ def formatted_dict(d):
d["type"] = bb_xml.tag

if separate_by_tag:
page[bb_xml.tag].append(d)
try:
page[bb_xml.tag].append(d)
except:
page[bb_xml.tag] = [d]
else:
page["contents"].append(d)

Expand Down
20 changes: 20 additions & 0 deletions tests/data_dummy/annotations/TitleC_with_custom_tag.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<book title="TitleC">
<characters>
<character id="00000017" name="山田太郎"/>
<character id="00000018" name="田中花子"/>
</characters>
<pages>
<page index="0" width="1654" height="1170">
<body id="00000018" xmin="848" ymin="236" xmax="985" ymax="614" character="00000017"/>
<!-- w/ character -->
<custom_tag id="00000019" attr_num="0" attr_str="aaa" attr_mix="123a" attr_unk='111' character="00000017"/>
<custom_tag id="00000020" attr_num="12" attr_str="bbb" attr_mix="456b" attr_unk='asdf' character="00000017"/>
<custom_tag2 id="00000021" attr_num="345" attr_str="ccc" attr_mix="7cd" attr_unk='1a' attr_unk2='2b' character="00000018">dummy_text</custom_tag2>
<custom_tag2 id="00000022" attr_num="6789" attr_str="ddd" attr_mix="8ef" attr_unk='b' attr_unk2='33' character="00000018">dummy_text2</custom_tag2>
<!-- w/o character -->
<custom_tag3 id="00000023" attr_num="234" attr_str="xxx" attr_mix="12fsd3a" attr_unk='13411'/>
</page>
<page index="1" width="1654" height="1170"/>
<page index="2" width="1654" height="1170"/>
</pages>
</book>
1 change: 1 addition & 0 deletions tests/data_dummy/books.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
TitleA
TitleB
TitleC_with_custom_tag
70 changes: 51 additions & 19 deletions tests/test_data_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,31 @@ def test_data_type():

assert isinstance(page["contents"], list)
for obj in page["contents"]:
assert isinstance(obj["@id"], str)
assert isinstance(obj["@xmin"], int)
assert isinstance(obj["@xmax"], int)
assert isinstance(obj["@ymin"], int)
assert isinstance(obj["@ymax"], int)
assert isinstance(obj["type"], str)
if obj["type"] in {"body", "face", "frame", "text"}:
assert isinstance(obj["@id"], str)
assert isinstance(obj["@xmin"], int)
assert isinstance(obj["@xmax"], int)
assert isinstance(obj["@ymin"], int)
assert isinstance(obj["@ymax"], int)
assert isinstance(obj["type"], str)

if obj["type"] == "text":
assert isinstance(obj["#text"], str)

# custom tag test
else:
assert isinstance(obj["@id"], str)
assert isinstance(obj["@attr_num"], int)
assert isinstance(obj["@attr_str"], str)
assert isinstance(obj["@attr_mix"], str)
assert isinstance(obj["type"], str)

for key in (obj.keys() - {"@id", "@attr_num", "@attr_str", "@attr_mix", "type"}):
assert isinstance(obj[key], (int, str))

if "#text" in obj.keys():
assert isinstance(obj["#text"], str)

if obj["type"] == "text":
assert isinstance(obj["#text"], str)

def test_data_type_separated():
manga109_root_dir = "tests/data_dummy/"
Expand All @@ -59,15 +75,31 @@ def test_data_type_separated():
assert isinstance(page["@width"], int)
assert isinstance(page["@height"], int)

for obj_type in {"body", "face", "frame", "text"}:
assert isinstance(page[obj_type], list)
for obj in page[obj_type]:
assert isinstance(obj["@id"], str)
assert isinstance(obj["@xmin"], int)
assert isinstance(obj["@xmax"], int)
assert isinstance(obj["@ymin"], int)
assert isinstance(obj["@ymax"], int)
assert obj["type"] == obj_type
for obj_type in page.keys():
if obj_type in {"body", "face", "frame", "text"}:
assert isinstance(page[obj_type], list)
for obj in page[obj_type]:
assert isinstance(obj["@id"], str)
assert isinstance(obj["@xmin"], int)
assert isinstance(obj["@xmax"], int)
assert isinstance(obj["@ymin"], int)
assert isinstance(obj["@ymax"], int)
assert obj["type"] == obj_type

if obj_type == "text":
assert isinstance(obj["#text"], str)
if obj_type == "text":
assert isinstance(obj["#text"], str)

# custom tag test
elif obj_type not in {"@index", "@width", "@height"}:
for obj in page[obj_type]:
assert isinstance(obj["@id"], str)
assert isinstance(obj["@attr_num"], int)
assert isinstance(obj["@attr_str"], str)
assert isinstance(obj["@attr_mix"], str)
assert obj["type"] == obj_type

for key in (obj.keys() - {"@id", "@attr_num", "@attr_str", "@attr_mix", "type"}):
assert isinstance(obj[key], (int, str))

if "#text" in obj.keys():
assert isinstance(obj["#text"], str)

0 comments on commit bf6a5ce

Please sign in to comment.