Skip to content

Commit 8443c20

Browse files
committed
Bug fix in assign_heading_levels
1 parent 8d8e513 commit 8443c20

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

text_extractor/parser/pdfact_parser.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -361,10 +361,13 @@ def assign_heading_levels(heading_styles: List[Dict[str, Any]]) -> List[Dict[str
361361
for i in range(len(existing_sizes) - 1):
362362
if existing_sizes[i + 1] > size > existing_sizes[i]:
363363
mid_point = (existing_sizes[i] + existing_sizes[i + 1]) / 2
364+
# Ensure we select a font size for which a level is already assigned
365+
larger_font = next(f for f in levels_assigned if f[1] == existing_sizes[i + 1])
366+
smaller_font = next(f for f in levels_assigned if f[1] == existing_sizes[i])
364367
if size >= mid_point:
365-
level = levels_assigned[(main_font, existing_sizes[i + 1])]
368+
level = levels_assigned[larger_font]
366369
else:
367-
level = levels_assigned[(main_font, existing_sizes[i])]
370+
level = levels_assigned[smaller_font]
368371
break
369372

370373
levels_assigned[(font['font_name'], font['font_size'])] = level

0 commit comments

Comments
 (0)