-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathrun_analysis_once.py
More file actions
35 lines (26 loc) · 1.04 KB
/
Copy pathrun_analysis_once.py
File metadata and controls
35 lines (26 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python3
"""Initialize DeepGraph and process one paper end-to-end."""
from config import WORKSPACE_DIR, PDF_CACHE_DIR
from db.database import init_db
from db.evidence_graph import backfill_entity_resolutions
from db.taxonomy import seed_taxonomy, backfill_result_taxonomy
from orchestrator.pipeline import ingest_papers, process_single_paper
from db import database as db
def main() -> None:
WORKSPACE_DIR.mkdir(parents=True, exist_ok=True)
PDF_CACHE_DIR.mkdir(parents=True, exist_ok=True)
init_db()
seed_taxonomy()
backfill_result_taxonomy()
backfill_entity_resolutions()
if db.fetchone("SELECT COUNT(*) as c FROM papers")["c"] == 0:
ingest_papers(max_papers=1)
paper = db.fetchone("SELECT id FROM papers ORDER BY published_date DESC LIMIT 1")
if not paper:
print("No papers available to process.", flush=True)
return
paper_id = paper["id"]
print(f"processing {paper_id}", flush=True)
print(process_single_paper(paper_id), flush=True)
if __name__ == "__main__":
main()