Skip to content

Commit c6572d6

Browse files
committed
Use live console in extract_sections
1 parent 8ee83f1 commit c6572d6

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

src/pdf2sqlite/extract_sections.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from pypdf import PdfReader
22
from pypdf.generic._data_structures import Destination
3+
from rich.live import Live
34
from typing import Dict
45

5-
def extract_toc_and_sections(reader: PdfReader) -> Dict:
6+
def extract_toc_and_sections(reader: PdfReader, live: Live) -> Dict:
67
"""
78
Extract table of contents and corresponding sections from a single PDF.
89
If TOC is not available, fall back to heuristic section detection.
@@ -80,7 +81,7 @@ def process_outline(entries, level=1):
8081
if page_text:
8182
section_text += page_text + "\n\n"
8283
except Exception as e:
83-
print(f"Error extracting text from page {p}: {e}")
84+
live.console.print(f"Error extracting text from page {p}: {e}")
8485

8586
# Store the section
8687
section_id = f"{level}_{title.replace(' ', '_')[:30]}_{page_number}"
@@ -94,7 +95,7 @@ def process_outline(entries, level=1):
9495

9596
# If no TOC was found or no valid sections were extracted, use page-based sections
9697
if not result['has_toc'] or not result['sections']:
97-
print("No TOC found or no valid sections extracted. Using page-based sections.")
98+
live.console.print("No TOC found or no valid sections extracted. Using page-based sections.")
9899
result['has_toc'] = False
99100

100101
for page_num, page in enumerate(reader.pages):
@@ -110,6 +111,6 @@ def process_outline(entries, level=1):
110111
}
111112

112113
except Exception as e:
113-
print(f"Error extracting TOC: {e}")
114+
live.console.print(f"Error extracting TOC: {e}")
114115

115116
return result

0 commit comments

Comments
 (0)