Jelly-RDF · Ja-Gk-00 · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025 · Nov 20, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -85,6 +85,10 @@ docs = [
     "markupsafe<4.0.0",
     "linkchecker~=10.6.0",
 ]
+bench = [
+    "pytest-benchmark>=5.2.1",
+    "rdflib>=7.1.4",
+]
 
 [tool.uv]
 required-version = "~=0.9.0"
@@ -110,7 +114,14 @@ extra-standard-library = ["typing_extensions"]
 
 [tool.pytest.ini_options]
 pythonpath = "."
-addopts = ["--import-mode=importlib", "--doctest-modules", "--ignore-glob=docs/examples/**", "--ignore-glob=examples/**"]
+addopts = ["--import-mode=importlib", 
+          "--doctest-modules", 
+          "--ignore-glob=docs/examples/**", 
+          "--ignore-glob=examples/**", 
+          "--ignore-glob=tests/utils/benchmark_*",
+          "--ignore=tests/benchmark_tests",
+          "-m", "not benchmark"
+        ]
 
 [tool.ruff]
 extend-exclude = ["*{_pb2,_pb2_grpc}.{py,pyi}"]

diff --git a/tests/benchmark_tests/__init__.py b/tests/benchmark_tests/__init__.py
diff --git a/tests/benchmark_tests/conftest.py b/tests/benchmark_tests/conftest.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+import io
+from pathlib import Path
+
+import pytest
+from rdflib import Dataset, Graph
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    g = parser.getgroup("benchmark")
+    g.addoption("--in-nt", type=str, help="path to N-Triples file.")
+    g.addoption("--in-nq", type=str, help="path to N-Quads file.")
+    g.addoption(
+        "--in-jelly-triples",
+        type=str,
+        default=None,
+        help="optional Jelly triples file; if none, generated in-memory from nt file.",
+    )
+    g.addoption(
+        "--in-jelly-quads",
+        type=str,
+        default=None,
+        help="optional Jelly quads file; if none, generated in-memory from nq slice.",
+    )
+
+    g.addoption(
+        "--limit-statements",
+        type=int,
+        default=5_000_000,
+        help="first N statements from input.",
+    )
+    g.addoption(
+        "--warmup-rounds",
+        type=int,
+        default=5,
+        help="warmup rounds, not counted to evaluation.",
+    )
+    g.addoption("--rounds", type=int, default=10, help="measured rounds.")
+    g.addoption("--iterations", type=int, default=1, help="iterations per round.")
+
+
+def _slice_lines_to_bytes(path: Path, limit: int) -> bytes:
+    buf = io.BytesIO()
+    with path.open("rb") as f:
+        for i, line in enumerate(f):
+            buf.write(line)
+            if i + 1 >= limit:
+                break
+    return buf.getvalue()
+
+
+@pytest.fixture(scope="session")
+def limit_statements(request: pytest.FixtureRequest) -> int:
+    return int(request.config.getoption("--limit-statements"))
+
+
+@pytest.fixture(scope="session")
+def pedantic_cfg(request: pytest.FixtureRequest) -> dict[str, int]:
+    return {
+        "warmup_rounds": int(request.config.getoption("--warmup-rounds")),
+        "rounds": int(request.config.getoption("--rounds")),
+        "iterations": int(request.config.getoption("--iterations")),
+    }
+
+
+@pytest.fixture(scope="session")
+def nt_path(request: pytest.FixtureRequest) -> Path:
+    opt = request.config.getoption("--in-nt")
+    assert opt, "--in-nt is required"
+    p = Path(opt)
+    assert p.exists(), f"--in-nt not found: {p}"
+    return p
+
+
+@pytest.fixture(scope="session")
+def nq_path(request: pytest.FixtureRequest) -> Path:
+    opt = request.config.getoption("--in-nq")
+    assert opt, "--in-nq is required"
+    p = Path(opt)
+    assert p.exists(), f"--in-nq not found: {p}"
+    return p
+
+
+@pytest.fixture(scope="session")
+def jelly_triples_path(request: pytest.FixtureRequest) -> Path | None:
+    opt = request.config.getoption("--in-jelly-triples")
+    return Path(opt) if opt else None
+
+
+@pytest.fixture(scope="session")
+def jelly_quads_path(request: pytest.FixtureRequest) -> Path | None:
+    opt = request.config.getoption("--in-jelly-quads")
+    return Path(opt) if opt else None
+
+
+@pytest.fixture(scope="session")
+def nt_bytes_sliced(nt_path: Path, limit_statements: int) -> bytes:
+    return _slice_lines_to_bytes(nt_path, limit_statements)
+
+
+@pytest.fixture(scope="session")
+def nq_bytes_sliced(nq_path: Path, limit_statements: int) -> bytes:
+    return _slice_lines_to_bytes(nq_path, limit_statements)
+
+
+@pytest.fixture(scope="session")
+def nt_graph(nt_bytes_sliced: bytes) -> Graph:
+    g = Graph()
+    g.parse(data=nt_bytes_sliced, format="nt")
+    return g
+
+
+@pytest.fixture(scope="session")
+def nq_dataset(nq_bytes_sliced: bytes) -> Dataset:
+    ds = Dataset()
+    ds.parse(data=nq_bytes_sliced, format="nquads")
+    return ds
+
+
+@pytest.fixture(scope="session")
+def jelly_triples_bytes(jelly_triples_path: Path | None, nt_graph: Graph) -> bytes:
+    if jelly_triples_path and jelly_triples_path.exists():
+        return jelly_triples_path.read_bytes()
+    return nt_graph.serialize(destination=None, format="jelly", encoding="utf-8")
+
+
+@pytest.fixture(scope="session")
+def jelly_quads_bytes(jelly_quads_path: Path | None, nq_dataset: Dataset) -> bytes:
+    if jelly_quads_path and jelly_quads_path.exists():
+        return jelly_quads_path.read_bytes()
+    return nq_dataset.serialize(destination=None, format="jelly", encoding="utf-8")
+
+
+def pytest_configure(config: pytest.Config) -> None:
+    config.addinivalue_line("markers", "benchmark: flat ser/des benchmarks")
+    config.addinivalue_line(
+        "markers", "triples: triples-only benchmarks (NT/Jelly-triples)"
+    )
+    config.addinivalue_line("markers", "quads: quads-only benchmarks (NQ/Jelly-quads)")
+
+
+def pytest_collection_modifyitems(
+    config: pytest.Config, items: list[pytest.Item]
+) -> None:
+    has_nt = bool(config.getoption("--in-nt"))
+    has_nq = bool(config.getoption("--in-nq"))
+
+    deselected: list[pytest.Item] = []
+    selected: list[pytest.Item] = []
+
+    for it in items:
+        is_triples = it.get_closest_marker("triples") is not None
+        is_quads = it.get_closest_marker("quads") is not None
+
+        if is_triples and not has_nt:
+            deselected.append(it)
+            continue
+        if is_quads and not has_nq:
+            deselected.append(it)
+            continue
+
+        selected.append(it)
+
+    if deselected:
+        config.hook.pytest_deselected(items=deselected)
+        items[:] = selected
diff --git a/tests/benchmark_tests/jelly_rdflib.py b/tests/benchmark_tests/jelly_rdflib.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+import io
+from contextlib import suppress
+
+from rdflib import Dataset, Graph
+
+from tests.utils.benchmark_io_utils import NullCounter
+
+
+def parse_nt_bytes(nt_bytes: bytes) -> Graph:
+    g = Graph()
+    g.parse(data=nt_bytes, format="nt")
+    return g
+
+
+def parse_jelly_triples_bytes(jelly_bytes: bytes) -> Graph:
+    g = Graph()
+    g.parse(data=jelly_bytes, format="jelly")
+    return g
+
+
+def parse_nq_bytes(nq_bytes: bytes) -> Dataset:
+    ds = Dataset()
+    ds.parse(data=nq_bytes, format="nquads")
+    return ds
+
+
+def parse_jelly_quads_bytes(jelly_bytes: bytes) -> Dataset:
+    ds = Dataset()
+    ds.parse(data=jelly_bytes, format="jelly")
+    return ds
+
+
+def serialize_nt_stream(g: Graph) -> int:
+    sink = NullCounter()
+    buf = io.BufferedWriter(sink)
+    g.serialize(destination=buf, format="nquads", encoding="utf-8")
+    buf.flush()
+    with suppress(io.UnsupportedOperation, ValueError):
+        buf.detach()
+    return sink.n
+
+
+def serialize_jelly_triples_stream(g: Graph) -> int:
+    sink = NullCounter()
+    buf = io.BufferedWriter(sink)
+    g.serialize(destination=buf, format="jelly", encoding="utf-8")
+    buf.flush()
+    with suppress(io.UnsupportedOperation, ValueError):
+        buf.detach()
+    return sink.n
+
+
+def serialize_nq_stream(ds: Dataset) -> int:
+    sink = NullCounter()
+    buf = io.BufferedWriter(sink)
+    ds.serialize(destination=buf, format="nquads", encoding="utf-8")
+    buf.flush()
+    with suppress(io.UnsupportedOperation, ValueError):
+        buf.detach()
+    return sink.n
+
+
+def serialize_jelly_quads_stream(ds: Dataset) -> int:
+    sink = NullCounter()
+    buf = io.BufferedWriter(sink)
+    ds.serialize(destination=buf, format="jelly", encoding="utf-8")
+    buf.flush()
+    with suppress(io.UnsupportedOperation, ValueError):
+        buf.detach()
+    return sink.n
diff --git a/tests/benchmark_tests/test_flat_deserialize.py b/tests/benchmark_tests/test_flat_deserialize.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import pytest
+from pytest_benchmark.fixture import BenchmarkFixture  # type: ignore[import-not-found]
+
+from tests.benchmark_tests.jelly_rdflib import (
+    parse_jelly_quads_bytes,
+    parse_jelly_triples_bytes,
+    parse_nq_bytes,
+    parse_nt_bytes,
+)
+from tests.utils.benchmark_throughput import print_throughput
+
+pytest.importorskip(
+    "pytest_benchmark",
+    reason="Install bench dependency group and run with -m benchmark",
+)
+
+pytestmark = pytest.mark.benchmark
+
+
+@pytest.mark.triples
+def test_flat_triples_deserialize_nt(
+    benchmark: BenchmarkFixture,
+    nt_bytes_sliced: bytes,
+    pedantic_cfg: dict[str, int],
+    limit_statements: int,
+) -> None:
+    benchmark.pedantic(parse_nt_bytes, args=(nt_bytes_sliced,), **pedantic_cfg)
+    print_throughput(benchmark, limit_statements, "triples: parse NT")
+
+
+@pytest.mark.triples
+def test_flat_triples_deserialize_jelly(
+    benchmark: BenchmarkFixture,
+    jelly_triples_bytes: bytes,
+    pedantic_cfg: dict[str, int],
+    limit_statements: int,
+) -> None:
+    benchmark.pedantic(
+        parse_jelly_triples_bytes, args=(jelly_triples_bytes,), **pedantic_cfg
+    )
+    print_throughput(benchmark, limit_statements, "triples: parse Jelly")
+
+
+@pytest.mark.quads
+def test_flat_quads_deserialize_nq(
+    benchmark: BenchmarkFixture,
+    nq_bytes_sliced: bytes,
+    pedantic_cfg: dict[str, int],
+    limit_statements: int,
+) -> None:
+    benchmark.pedantic(parse_nq_bytes, args=(nq_bytes_sliced,), **pedantic_cfg)
+    print_throughput(benchmark, limit_statements, "quads: parse NQ")
+
+
+@pytest.mark.quads
+def test_flat_quads_deserialize_jelly(
+    benchmark: BenchmarkFixture,
+    jelly_quads_bytes: bytes,
+    pedantic_cfg: dict[str, int],
+    limit_statements: int,
+) -> None:
+    benchmark.pedantic(
+        parse_jelly_quads_bytes, args=(jelly_quads_bytes,), **pedantic_cfg
+    )
+    print_throughput(benchmark, limit_statements, "quads: parse Jelly")