Skip to content

Commit a208b91

Browse files
committed
Improve internal ordering of KeyValue set elements
1 parent 707ba03 commit a208b91

File tree

3 files changed

+5
-10
lines changed

3 files changed

+5
-10
lines changed

textractor/data/text_linearization_config.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,7 @@ class TextLinearizationConfig:
100100

101101
text_suffix: str = "" #: Suffix for text layout elements
102102

103-
key_value_layout_prefix: str = (
104-
os.linesep * 2
105-
) #: Prefix for key_value layout elements (not for individual key-value elements)
103+
key_value_layout_prefix: str = "" #: Prefix for key_value layout elements (not for individual key-value elements)
106104

107105
key_value_layout_suffix: str = "" #: Suffix for key_value layout elements (not for individual key-value elements)
108106

textractor/parsers/response_parser.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -768,8 +768,6 @@ def _create_layout_objects(
768768
[line_by_id[line_id] for line_id in relationship["Ids"] if line_id in line_by_id]
769769
)
770770
else:
771-
if block["BlockType"] in (LAYOUT_KEY_VALUE):
772-
continue
773771
leaf_layouts.append(
774772
Layout(
775773
entity_id=block["Id"],

textractor/utils/text_utils.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,7 @@ def should_group(line, group):
6262

6363
current_group = [sorted_elements[0]]
6464
for element in sorted_elements[1:]:
65-
if (
66-
"Table" in element.__class__.__name__
67-
or "KeyValue" in element.__class__.__name__
68-
):
65+
if "Table" in element.__class__.__name__:
6966
if current_group:
7067
grouped_elements.append(current_group)
7168
grouped_elements.append([element])
@@ -184,7 +181,7 @@ def part_of_same_paragraph(element1, element2, config=config):
184181
result += config.same_paragraph_separator + text_element
185182
words_output += words_element
186183
else:
187-
result += config.layout_element_separator + text_element
184+
result += config.same_layout_element_separator + text_element
188185
words_output += words_element
189186

190187
# FIXME: Seems like this would be mostly needed
@@ -193,6 +190,8 @@ def part_of_same_paragraph(element1, element2, config=config):
193190

194191
if is_layout_table:
195192
result += config.table_row_separator
193+
else:
194+
result += config.same_layout_element_separator
196195

197196
# We make a dummy line element with the bbox from the previous group
198197
prev_element = Line(

0 commit comments

Comments
 (0)