|
8 | 8 | from dedoc.structure_extractors.hierarchy_level_builders.law_builders.body_builder.abstract_body_hierarchy_level_builder import \
|
9 | 9 | AbstractBodyHierarchyLevelBuilder
|
10 | 10 | from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_digits_with_dots
|
11 |
| -from dedoc.structure_extractors.patterns import BracketListPattern, BulletListPattern, DottedListPattern, LetterListPattern, TagHeaderPattern, TagListPattern |
| 11 | +from dedoc.structure_extractors.patterns import BracketListPattern, BulletListPattern, DottedListPattern, LetterListPattern, TagListPattern, TagPattern |
12 | 12 |
|
13 | 13 |
|
14 | 14 | class DiplomaBodyBuilder(AbstractHierarchyLevelBuilder):
|
15 | 15 | named_item_keywords = ("введение", "заключение", "библиографический список", "список литературы", "глава", "приложение", "приложения")
|
16 | 16 |
|
17 |
| - def __int__(self) -> None: |
| 17 | + def __init__(self) -> None: |
18 | 18 | super().__init__()
|
19 | 19 | self.digits_with_dots_regexp = regexps_digits_with_dots
|
20 | 20 | self.patterns = [
|
21 |
| - TagHeaderPattern(line_type=HierarchyLevel.header, level_1=1), |
22 |
| - TagListPattern(line_type=HierarchyLevel.list_item, level_1=2), |
23 |
| - DottedListPattern(line_type=HierarchyLevel.list_item, level_1=2), |
24 |
| - BracketListPattern(line_type=HierarchyLevel.list_item, level_1=3, level_2=1), |
25 |
| - LetterListPattern(line_type=HierarchyLevel.list_item, level_1=4, level_2=1), |
26 |
| - BulletListPattern(line_type=HierarchyLevel.list_item, level_1=5, level_2=1), |
| 21 | + TagListPattern(line_type=HierarchyLevel.list_item, level_1=2, can_be_multiline=False), |
| 22 | + DottedListPattern(line_type=HierarchyLevel.list_item, level_1=2, can_be_multiline=False), |
| 23 | + BracketListPattern(line_type=HierarchyLevel.list_item, level_1=3, level_2=1, can_be_multiline=False), |
| 24 | + LetterListPattern(line_type=HierarchyLevel.list_item, level_1=4, level_2=1, can_be_multiline=False), |
| 25 | + BulletListPattern(line_type=HierarchyLevel.list_item, level_1=5, level_2=1, can_be_multiline=False), |
| 26 | + TagPattern(line_type=HierarchyLevel.raw_text) |
27 | 27 | ]
|
28 | 28 |
|
29 | 29 | def get_lines_with_hierarchy(self, lines_with_labels: List[Tuple[LineWithMeta, str]], init_hl_depth: int) -> List[LineWithMeta]:
|
@@ -66,10 +66,10 @@ def __handle_named_item(self, init_hl_depth: int, line: LineWithMeta, prediction
|
66 | 66 | if text.startswith(self.named_item_keywords):
|
67 | 67 | hierarchy_level = HierarchyLevel(init_hl_depth, 0, True, prediction)
|
68 | 68 | elif item_depth == -1:
|
69 |
| - if previous_named_item_line and previous_named_item_line.metadata.hierarchy_level.line_type == "named_item": |
| 69 | + if previous_named_item_line: |
70 | 70 | hierarchy_level = previous_named_item_line.metadata.hierarchy_level
|
71 | 71 | else:
|
72 |
| - hierarchy_level = HierarchyLevel(init_hl_depth + 1, 0, True, prediction) |
| 72 | + hierarchy_level = HierarchyLevel(init_hl_depth, 0, True, prediction) |
73 | 73 | else:
|
74 | 74 | hierarchy_level = HierarchyLevel(init_hl_depth, item_depth - 1, True, prediction)
|
75 | 75 | line.metadata.hierarchy_level = hierarchy_level
|
|
0 commit comments