Skip to content

Commit 85c033f

Browse files
committed
Expand test suite
1 parent dbab44f commit 85c033f

File tree

7 files changed

+321
-8
lines changed

7 files changed

+321
-8
lines changed

tests/test.db

7.23 MB
Binary file not shown.

tests/test_mcp_config.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from __future__ import annotations
2+
3+
import os
4+
from pathlib import Path
5+
6+
import pytest
7+
8+
from pdf2sqlite.mcp_server.config import ServerConfig
9+
10+
11+
def test_from_cli_reads_env_when_database_missing(tmp_path, monkeypatch):
12+
db = tmp_path / "t.db"
13+
db.write_bytes(b"SQLite format 3\0")
14+
monkeypatch.setenv("PDF2SQLITE_MCP_DATABASE", str(db))
15+
16+
cfg = ServerConfig.from_cli(database=None)
17+
18+
assert cfg.database_path == db.resolve()
19+
20+
21+
def test_from_cli_validates_limits(tmp_path, monkeypatch):
22+
db = tmp_path / "t.db"
23+
db.write_bytes(b"SQLite format 3\0")
24+
25+
# positive defaults
26+
cfg = ServerConfig.from_cli(str(db), default_limit=10, max_limit=20)
27+
assert cfg.clamp_limit(None) == 10
28+
29+
# default limit > max limit is rejected
30+
with pytest.raises(ValueError):
31+
ServerConfig.from_cli(str(db), default_limit=30, max_limit=20)
32+
33+
# zero means default; values over max are rejected
34+
assert cfg.clamp_limit(0) == 10
35+
with pytest.raises(ValueError):
36+
cfg.clamp_limit(9999)
37+
38+
39+
def test_from_cli_env_overrides(tmp_path, monkeypatch):
40+
db = tmp_path / "t.db"
41+
db.write_bytes(b"SQLite format 3\0")
42+
43+
monkeypatch.setenv("PDF2SQLITE_MCP_MAX_BLOB_BYTES", "1024")
44+
monkeypatch.setenv("PDF2SQLITE_MCP_DEFAULT_LIMIT", "5")
45+
monkeypatch.setenv("PDF2SQLITE_MCP_MAX_LIMIT", "6")
46+
47+
cfg = ServerConfig.from_cli(str(db))
48+
49+
assert cfg.max_blob_bytes == 1024
50+
assert cfg.default_limit == 5
51+
assert cfg.max_limit == 6
52+
53+
54+
def test_from_cli_rejects_missing_database(monkeypatch):
55+
monkeypatch.delenv("PDF2SQLITE_MCP_DATABASE", raising=False)
56+
with pytest.raises(ValueError):
57+
ServerConfig.from_cli(database=None)

tests/test_mcp_db.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
from __future__ import annotations
2+
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from pdf2sqlite.mcp_server.db import Database
8+
9+
10+
TEST_DB = Path("tests/test.db").resolve()
11+
12+
13+
@pytest.mark.skipif(not TEST_DB.exists(), reason="tests/test.db missing")
14+
def test_db_pdf_counts_pages_and_blobs():
15+
db = Database(TEST_DB)
16+
17+
pdfs = asyncio_run(db.get_pdf_counts(10, 0))
18+
assert isinstance(pdfs, list)
19+
assert pdfs, "expected at least one pdf in tests/test.db"
20+
21+
first_pdf_id = int(pdfs[0]["id"]) # type: ignore[index]
22+
23+
pages = asyncio_run(db.get_pdf_pages(first_pdf_id, 1, 0))
24+
assert pages, "expected at least one page for first pdf"
25+
26+
page = pages[0]
27+
page_id = int(page["id"]) # type: ignore[index]
28+
page_num = int(page["page_number"]) # type: ignore[index]
29+
30+
blob = asyncio_run(db.get_page_blob(first_pdf_id, page_num))
31+
assert isinstance(blob, (bytes, bytearray))
32+
assert blob.startswith(b"%PDF"), "page blob should be a PDF"
33+
34+
blob2 = asyncio_run(db.get_page_blob_by_id(page_id))
35+
assert blob2.startswith(b"%PDF")
36+
37+
38+
@pytest.mark.skipif(not TEST_DB.exists(), reason="tests/test.db missing")
39+
def test_db_schema_nonempty():
40+
db = Database(TEST_DB)
41+
42+
stmts = asyncio_run(db.get_schema())
43+
assert any("CREATE TABLE" in s for s in stmts)
44+
assert any("pdfs" in s for s in stmts)
45+
46+
47+
@pytest.mark.skipif(not TEST_DB.exists(), reason="tests/test.db missing")
48+
def test_db_page_assets_optional():
49+
db = Database(TEST_DB)
50+
51+
pdfs = asyncio_run(db.get_pdf_counts(1, 0))
52+
if not pdfs:
53+
pytest.skip("no pdfs present in test db")
54+
pdf_id = int(pdfs[0]["id"]) # type: ignore[index]
55+
56+
pages = asyncio_run(db.get_pdf_pages(pdf_id, 1, 0))
57+
if not pages:
58+
pytest.skip("no pages present in test db")
59+
page_id = int(pages[0]["id"]) # type: ignore[index]
60+
61+
figures = asyncio_run(db.get_figures_for_page(page_id))
62+
tables = asyncio_run(db.get_tables_for_page(page_id))
63+
64+
assert isinstance(figures, list)
65+
assert isinstance(tables, list)
66+
67+
if figures:
68+
fid = int(figures[0]["id"]) # type: ignore[index]
69+
blob, mime = asyncio_run(db.get_figure_blob(fid))
70+
assert isinstance(blob, (bytes, bytearray))
71+
assert mime is None or isinstance(mime, str)
72+
73+
if tables:
74+
tid = int(tables[0]["id"]) # type: ignore[index]
75+
img = asyncio_run(db.get_table_image_blob(tid))
76+
assert isinstance(img, (bytes, bytearray))
77+
78+
79+
# helpers
80+
import asyncio
81+
82+
83+
def asyncio_run(awaitable):
84+
return asyncio.get_event_loop().run_until_complete(awaitable)

tests/test_mcp_resources.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,9 @@
11
from __future__ import annotations
22

33
from pathlib import Path
4-
import sys
54

65
import pytest
76

8-
ROOT = Path(__file__).resolve().parents[1]
9-
SITE_PACKAGES = ROOT / ".venv" / "lib" / "python3.13" / "site-packages"
10-
if SITE_PACKAGES.exists():
11-
sys.path.append(str(SITE_PACKAGES))
12-
13-
pytest.importorskip("mcp.server.fastmcp")
14-
157
from pdf2sqlite.mcp_server.config import ServerConfig
168
from pdf2sqlite.mcp_server.server import build_server
179

tests/test_mcp_resources_unit.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from __future__ import annotations
2+
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from pdf2sqlite.mcp_server.config import ServerConfig
8+
from pdf2sqlite.mcp_server.db import Database
9+
from pdf2sqlite.mcp_server.resources import (
10+
ResourceService,
11+
ResourceTooLargeError,
12+
build_page_payload,
13+
)
14+
from mcp.server.fastmcp.utilities.types import Image as MCPImage
15+
from pdf2sqlite.mcp_server.uri import PdfResource
16+
17+
18+
TEST_DB = Path("tests/test.db").resolve()
19+
20+
21+
@pytest.mark.skipif(not TEST_DB.exists(), reason="tests/test.db missing")
22+
def test_resource_service_pdf_blobs_and_limits():
23+
# tiny limit to force overflow on any real PDF blob
24+
cfg = ServerConfig(
25+
database_path=TEST_DB,
26+
max_blob_bytes=1,
27+
default_limit=10,
28+
max_limit=10,
29+
)
30+
db = Database(TEST_DB)
31+
svc = ResourceService(database=db, config=cfg)
32+
33+
# pick first pdf id
34+
pdfs = asyncio_run(db.get_pdf_counts(1, 0))
35+
if not pdfs:
36+
pytest.skip("no pdfs present in test db")
37+
pdf_id = int(pdfs[0]["id"]) # type: ignore[index]
38+
39+
# page blob should exceed limit and raise
40+
page = asyncio_run(db.get_pdf_pages(pdf_id, 1, 0))[0]
41+
page_num = int(page["page_number"]) # type: ignore[index]
42+
43+
with pytest.raises(ResourceTooLargeError):
44+
asyncio_run(svc.load_pdf_blob(PdfResource(pdf_id, page_num)))
45+
46+
# bump limit so full-pdf works and returns a valid PDF
47+
cfg.max_blob_bytes = 10_000_000
48+
data = asyncio_run(svc.load_pdf_blob(PdfResource(pdf_id)))
49+
assert data.startswith(b"%PDF")
50+
51+
embed = asyncio_run(svc.make_embedded_pdf(
52+
f"pdf2sqlite://pdf/{pdf_id}", data
53+
))
54+
assert embed.resource.mimeType == "application/pdf"
55+
assert embed.resource.meta["size"] == len(data)
56+
57+
# as_image returns an MCP Image object
58+
img = svc.as_image(b"x", "image/png")
59+
assert isinstance(img, MCPImage)
60+
61+
62+
def test_build_page_payload_validates_fields():
63+
# missing required id
64+
with pytest.raises(ValueError):
65+
build_page_payload({"pdf_id": 1, "page_number": 1})
66+
# non-int page number
67+
with pytest.raises(ValueError):
68+
build_page_payload({"id": 1, "pdf_id": 1, "page_number": "x"})
69+
70+
71+
# helpers
72+
import asyncio
73+
74+
75+
def asyncio_run(awaitable):
76+
return asyncio.get_event_loop().run_until_complete(awaitable)

tests/test_mcp_tools_unit.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from __future__ import annotations
2+
3+
import pytest
4+
5+
from pdf2sqlite.mcp_server.tools import _pdf_summary_block
6+
from pdf2sqlite.mcp_server.uri import PdfResource
7+
8+
9+
def test_pdf_summary_block_for_full_pdf():
10+
block = _pdf_summary_block(
11+
"pdf2sqlite://pdf/1",
12+
1234,
13+
PdfResource(1),
14+
)
15+
assert "PDF 1" in block.text
16+
assert "Bytes: 1234" in block.text
17+
18+
19+
def test_pdf_summary_block_for_single_page():
20+
block = _pdf_summary_block(
21+
"pdf2sqlite://pdf/2/page/5",
22+
77,
23+
PdfResource(2, 5),
24+
)
25+
assert "PDF 2 page 5" in block.text
26+
assert "Bytes: 77" in block.text

tests/test_mcp_uri.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
from __future__ import annotations
2+
3+
import pytest
4+
5+
from pdf2sqlite.mcp_server.uri import (
6+
PdfResource,
7+
FigureResource,
8+
TableImageResource,
9+
build_pdf_uri,
10+
build_pdf_page_uri,
11+
build_figure_uri,
12+
build_table_image_uri,
13+
parse_resource_uri,
14+
)
15+
16+
17+
def test_parse_pdf_uri_full_document():
18+
uri = "pdf2sqlite://pdf/123"
19+
desc = parse_resource_uri(uri)
20+
assert isinstance(desc, PdfResource)
21+
assert desc.pdf_id == 123
22+
assert desc.page_number is None
23+
assert build_pdf_uri(123) == uri
24+
25+
26+
def test_parse_pdf_uri_single_page():
27+
uri = "pdf2sqlite://pdf/456/page/7"
28+
desc = parse_resource_uri(uri)
29+
assert isinstance(desc, PdfResource)
30+
assert desc.pdf_id == 456
31+
assert desc.page_number == 7
32+
assert build_pdf_page_uri(456, 7) == uri
33+
34+
35+
def test_parse_figure_uri():
36+
uri = "pdf2sqlite://figure/999"
37+
desc = parse_resource_uri(uri)
38+
assert isinstance(desc, FigureResource)
39+
assert desc.figure_id == 999
40+
assert build_figure_uri(999) == uri
41+
42+
43+
def test_parse_table_image_uri():
44+
uri = "pdf2sqlite://table-image/42"
45+
desc = parse_resource_uri(uri)
46+
assert isinstance(desc, TableImageResource)
47+
assert desc.table_id == 42
48+
assert build_table_image_uri(42) == uri
49+
50+
51+
def test_parse_uri_rejects_bad_scheme():
52+
with pytest.raises(ValueError):
53+
parse_resource_uri("http://pdf/1")
54+
55+
56+
def test_parse_uri_rejects_missing_target():
57+
with pytest.raises(ValueError):
58+
parse_resource_uri("pdf2sqlite://")
59+
60+
61+
def test_parse_uri_rejects_malformed_pdf_paths():
62+
with pytest.raises(ValueError):
63+
parse_resource_uri("pdf2sqlite://pdf/")
64+
with pytest.raises(ValueError):
65+
parse_resource_uri("pdf2sqlite://pdf/123/page/")
66+
with pytest.raises(ValueError):
67+
parse_resource_uri("pdf2sqlite://pdf/123/x/1")
68+
69+
70+
def test_parse_uri_rejects_non_int_identifiers():
71+
with pytest.raises(ValueError):
72+
parse_resource_uri("pdf2sqlite://pdf/abc")
73+
with pytest.raises(ValueError):
74+
parse_resource_uri("pdf2sqlite://pdf/1/page/two")
75+
with pytest.raises(ValueError):
76+
parse_resource_uri("pdf2sqlite://figure/notanint")
77+
with pytest.raises(ValueError):
78+
parse_resource_uri("pdf2sqlite://table-image/notanint")

0 commit comments

Comments
 (0)