|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +import pytest |
| 6 | + |
| 7 | +from pdf2sqlite.mcp_server.db import Database |
| 8 | + |
| 9 | + |
| 10 | +TEST_DB = Path("tests/test.db").resolve() |
| 11 | + |
| 12 | + |
| 13 | +@pytest.mark.skipif(not TEST_DB.exists(), reason="tests/test.db missing") |
| 14 | +def test_db_pdf_counts_pages_and_blobs(): |
| 15 | + db = Database(TEST_DB) |
| 16 | + |
| 17 | + pdfs = asyncio_run(db.get_pdf_counts(10, 0)) |
| 18 | + assert isinstance(pdfs, list) |
| 19 | + assert pdfs, "expected at least one pdf in tests/test.db" |
| 20 | + |
| 21 | + first_pdf_id = int(pdfs[0]["id"]) # type: ignore[index] |
| 22 | + |
| 23 | + pages = asyncio_run(db.get_pdf_pages(first_pdf_id, 1, 0)) |
| 24 | + assert pages, "expected at least one page for first pdf" |
| 25 | + |
| 26 | + page = pages[0] |
| 27 | + page_id = int(page["id"]) # type: ignore[index] |
| 28 | + page_num = int(page["page_number"]) # type: ignore[index] |
| 29 | + |
| 30 | + blob = asyncio_run(db.get_page_blob(first_pdf_id, page_num)) |
| 31 | + assert isinstance(blob, (bytes, bytearray)) |
| 32 | + assert blob.startswith(b"%PDF"), "page blob should be a PDF" |
| 33 | + |
| 34 | + blob2 = asyncio_run(db.get_page_blob_by_id(page_id)) |
| 35 | + assert blob2.startswith(b"%PDF") |
| 36 | + |
| 37 | + |
| 38 | +@pytest.mark.skipif(not TEST_DB.exists(), reason="tests/test.db missing") |
| 39 | +def test_db_schema_nonempty(): |
| 40 | + db = Database(TEST_DB) |
| 41 | + |
| 42 | + stmts = asyncio_run(db.get_schema()) |
| 43 | + assert any("CREATE TABLE" in s for s in stmts) |
| 44 | + assert any("pdfs" in s for s in stmts) |
| 45 | + |
| 46 | + |
| 47 | +@pytest.mark.skipif(not TEST_DB.exists(), reason="tests/test.db missing") |
| 48 | +def test_db_page_assets_optional(): |
| 49 | + db = Database(TEST_DB) |
| 50 | + |
| 51 | + pdfs = asyncio_run(db.get_pdf_counts(1, 0)) |
| 52 | + if not pdfs: |
| 53 | + pytest.skip("no pdfs present in test db") |
| 54 | + pdf_id = int(pdfs[0]["id"]) # type: ignore[index] |
| 55 | + |
| 56 | + pages = asyncio_run(db.get_pdf_pages(pdf_id, 1, 0)) |
| 57 | + if not pages: |
| 58 | + pytest.skip("no pages present in test db") |
| 59 | + page_id = int(pages[0]["id"]) # type: ignore[index] |
| 60 | + |
| 61 | + figures = asyncio_run(db.get_figures_for_page(page_id)) |
| 62 | + tables = asyncio_run(db.get_tables_for_page(page_id)) |
| 63 | + |
| 64 | + assert isinstance(figures, list) |
| 65 | + assert isinstance(tables, list) |
| 66 | + |
| 67 | + if figures: |
| 68 | + fid = int(figures[0]["id"]) # type: ignore[index] |
| 69 | + blob, mime = asyncio_run(db.get_figure_blob(fid)) |
| 70 | + assert isinstance(blob, (bytes, bytearray)) |
| 71 | + assert mime is None or isinstance(mime, str) |
| 72 | + |
| 73 | + if tables: |
| 74 | + tid = int(tables[0]["id"]) # type: ignore[index] |
| 75 | + img = asyncio_run(db.get_table_image_blob(tid)) |
| 76 | + assert isinstance(img, (bytes, bytearray)) |
| 77 | + |
| 78 | + |
| 79 | +# helpers |
| 80 | +import asyncio |
| 81 | + |
| 82 | + |
| 83 | +def asyncio_run(awaitable): |
| 84 | + return asyncio.get_event_loop().run_until_complete(awaitable) |
0 commit comments