-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path.env.example
More file actions
55 lines (48 loc) · 1.51 KB
/
.env.example
File metadata and controls
55 lines (48 loc) · 1.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# .env.example — extractor pipeline
# LiteLLM models
LITELLM_DEFAULT_MODEL=openai/gpt-5
LITELLM_VLM_MODEL=openai/gpt-4o
MAX_CONCURRENT_LLM_CALLS=3
# Stage 05 — Table extraction (Camelot/pandas)
TABLE_VERTICAL_PADDING_RATIO=0.30
TABLE_HORIZONTAL_PADDING_RATIO=0.07
TABLE_EXTRACTION_DPI=200
TABLE_STITCH_MIN_HORIZONTAL_IOU=0.2
TABLE_STITCH_ALLOW_NEXT_PAGE=true
TABLE_FILTER_MIN_DENSITY=0.15
TABLE_FILTER_MIN_ROWS=3
TABLE_HEADER_DUP_MIN_MATCH=0.5
TABLE_HEADER_COALESCE_ENABLED=true
TABLE_HEADER_REPEAT_MIN_MATCH=0.6
TABLE_MULTI_PAGE_MERGE_ENABLED=true
TABLE_MULTI_PAGE_MERGE_MIN_IOU=0.3
# Stage 06 — Figure extraction
FIGURE_VERTICAL_PADDING=0.10
# Stage 07 — Reflow + hybrid search
SEMANTIC_ANNOTATION_TOP_K=5
ARANGO_ENABLE_HYBRID=true
ARANGO_HOST=localhost
ARANGO_PORT=8529
ARANGO_USER=root
ARANGO_PASSWORD=changeme
ARANGO_DATABASE=pdf_knowledge_base
ARANGO_ANNOTATIONS_COLLECTION=
ARANGO_EDGES_COLLECTION=
# Stage 10/11 — Embeddings/graph
EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# Notes:
# - Copy to .env and customize.
# - For security, do not commit your .env.
# - Install camelot-py[cv] and ghostscript for Stage 05.
# Stage 11 — Graph/FAISS relationship building
GRAPH_K_NEIGHBORS=10
GRAPH_SIMILARITY_THRESHOLD=0.55
GRAPH_SEMANTIC_WEIGHT=0.7
GRAPH_HIERARCHY_WEIGHT=0.3
GRAPH_EDGE_COLLECTION=pdf_relationships
GRAPH_NAME=pdf_knowledge_graph
GRAPH_VERTEX_COLLECTION=pdf_objects
GRAPH_ENABLE_RATIONALES=true
GRAPH_RATIONALE_MODEL=openai/gpt-5-mini
GRAPH_RATIONALE_CONCURRENCY=8
GRAPH_RATIONALE_MAX_TOKENS=256