Skip to content

Commit

Permalink
TLDR-861 remove orient cell params
Browse files Browse the repository at this point in the history
  • Loading branch information
oksidgy committed Dec 13, 2024
1 parent ccf6d15 commit 2a3d0e6
Show file tree
Hide file tree
Showing 6 changed files with 4 additions and 37 deletions.
3 changes: 0 additions & 3 deletions dedoc/api/api_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ class QueryParameters:
# tables handling
need_pdf_table_analysis: str = Form("true", enum=["true", "false"], description="Enable table recognition for pdf")
table_type: str = Form("", description="Pipeline mode for table recognition")
orient_analysis_cells: str = Form("false", enum=["true", "false"], description="Enable analysis of rotated cells in table headers")
orient_cell_angle: str = Form("90", enum=["90", "270"],
description='Set cells orientation in table headers, "90" means 90 degrees counterclockwise cells rotation')

# pdf handling
pdf_with_text_layer: str = Form("auto_tabby", enum=["true", "false", "auto", "auto_tabby", "tabby"],
Expand Down
10 changes: 1 addition & 9 deletions dedoc/api/web/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -101,21 +101,13 @@ <h4>Attachments handling</h4>

<div class="parameters">
<h4>Tables handling </h4>
<details><summary>need_pdf_table_analysis, orient_analysis_cells, orient_cell_angle</summary>
<details><summary>need_pdf_table_analysis</summary>
<br>
<p>
<label>
<input type="hidden" name="need_pdf_table_analysis" value="false">
<input type="checkbox" name="need_pdf_table_analysis" value="true" checked> need_pdf_table_analysis</label>
</p>

<p>
<label><input name="orient_analysis_cells" type="checkbox" value="true"> orient_analysis_cells</label>
</p>

<p>
<label>orient_cell_angle <input name="orient_cell_angle" type="number" size="5" value="90"></label>
</p>
</details>
</div>

Expand Down
3 changes: 0 additions & 3 deletions dedoc/readers/pdf_reader/data_classes/tables/scantable.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ def check_on_cell_instance(self) -> bool:
return False
return True

def to_table(self) -> Table:
return super()

@staticmethod
def get_cells_text(cells: List[List[CellWithMeta]]) -> List[List[str]]:
return [[cell.get_text() for cell in row] for row in cells]
Expand Down
4 changes: 2 additions & 2 deletions dedoc/readers/pdf_reader/pdf_base_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,12 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure
)

lines, scan_tables, attachments, warnings, metadata = self._parse_document(file_path, params_for_parse)
tables = [scan_table.to_table() for scan_table in scan_tables]
# tables = [scan_table.to_table() for scan_table in scan_tables]

if params_for_parse.with_attachments and self.attachment_extractor.can_extract(file_path):
attachments += self.attachment_extractor.extract(file_path=file_path, parameters=parameters)

result = UnstructuredDocument(lines=lines, tables=tables, attachments=attachments, warnings=warnings, metadata=metadata)
result = UnstructuredDocument(lines=lines, tables=scan_tables, attachments=attachments, warnings=warnings, metadata=metadata)
return self._postprocess(result)

def _parse_document(self, path: str, parameters: ParametersForParseDoc) -> (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,7 @@ def __extract(self, path: str, parameters: dict, warnings: List[str], tmp_dir: s
mp_tables = self.table_recognizer.convert_to_multipages_tables(all_scan_tables, lines_with_meta=all_lines)
all_lines = self.linker.link_objects(lines=all_lines, tables=mp_tables, images=all_attached_images)

tables = [scan_table.to_table() for scan_table in mp_tables]

return all_lines, tables, all_attached_images, document_metadata
return all_lines, mp_tables, all_attached_images, document_metadata

def __save_gost_frame_boxes_to_json(self, first_page: Optional[int], last_page: Optional[int], page_count: int, path: str, tmp_dir: str) -> str:
from joblib import Parallel, delayed
Expand Down
17 changes: 0 additions & 17 deletions dedoc/utils/parameter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,6 @@ def get_param_document_type(parameters: Optional[dict]) -> str:
return document_type


def get_param_orient_analysis_cells(parameters: Optional[dict]) -> bool:
if parameters is None:
return False
orient_analysis_cells = str(parameters.get("orient_analysis_cells", "False")).lower() == "true"
return orient_analysis_cells


def get_param_with_attachments(parameters: Optional[dict]) -> bool:
if parameters is None:
return False
Expand Down Expand Up @@ -80,16 +73,6 @@ def get_param_need_binarization(parameters: Optional[dict]) -> bool:
return need_binarization


def get_param_orient_cell_angle(parameters: Optional[dict]) -> int:
if parameters is None:
return 90

orient_cell_angle = str(parameters.get("orient_cell_angle", "90"))
if orient_cell_angle == "":
orient_cell_angle = "90"
return int(orient_cell_angle)


def get_param_is_one_column_document(parameters: Optional[dict]) -> Optional[bool]:
if parameters is None:
return None
Expand Down

0 comments on commit 2a3d0e6

Please sign in to comment.