Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,9 @@ With CCE: context_search "payment flow" = 800 tokens

We benchmarked CCE against [FastAPI](https://github.com/fastapi/fastapi) (53 source files, 180K tokens) with 20 real coding questions. No cherry-picking, no synthetic queries.

**Methodology:** For each query, "without CCE" means reading the full content of every file the query touches. "With CCE" means the relevant chunks after compression. This is conservative (agents often read more files than needed).
**Methodology:** For each query, "without CCE" means reading the full content of every file the query touches. "With CCE" means the relevant chunks after compression.

**Important baseline note:** The 94% number is measured against full-file reads, not against what Claude Code actually does. In practice, Claude Code already uses grep, partial file reads, and targeted tools, so the real-world savings compared to normal Claude Code behavior will be lower than 94%. We use full-file as the baseline because it's reproducible and deterministic (no agent behavior variability). The benchmark measures CCE's retrieval efficiency, not a head-to-head comparison with Claude Code's built-in exploration.

| Metric | Result |
|--------|--------|
Expand Down
3 changes: 2 additions & 1 deletion src/context_engine/indexer/embedding_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ def prune_orphans(self, known_hashes: set[str]) -> int:
for i in range(0, len(orphan_list), 500):
batch = orphan_list[i : i + 500]
placeholders = ",".join("?" * len(batch))
self._conn.execute(
# Safe: placeholders is only "?" chars; values are parameterized.
self._conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"DELETE FROM embedding_cache WHERE content_hash IN ({placeholders})",
batch,
)
Expand Down
10 changes: 6 additions & 4 deletions src/context_engine/memory/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,9 +403,10 @@ def _write_vec_row(conn, table: str, rowid: int, vec) -> None:
doesn't break inserts on the source table — the failed row simply won't
be semantically searchable until the vec tables are rebuilt.
"""
# Safe: table name is an internal constant, never from user input.
try:
conn.execute(f"DELETE FROM {table} WHERE rowid = ?", (rowid,))
conn.execute(
conn.execute(f"DELETE FROM {table} WHERE rowid = ?", (rowid,)) # nosemgrep: sqlalchemy-execute-raw-query
conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"INSERT INTO {table}(rowid, embedding) VALUES (?, ?)",
(rowid, _serialize_vec(vec)),
)
Expand Down Expand Up @@ -786,17 +787,18 @@ def prune_old_rows(

archived: dict[str, list[dict]] = {}

# Safe: table and col_list are internal constants, never from user input.
def _harvest_and_delete(table: str, columns: list[str], cutoff: int) -> int:
col_list = ", ".join(columns)
rows = conn.execute(
rows = conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"SELECT {col_list} FROM {table} WHERE created_at_epoch < ?",
(cutoff,),
).fetchall()
if not rows:
return 0
if archive:
archived[table] = [dict(r) for r in rows]
conn.execute(
conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"DELETE FROM {table} WHERE created_at_epoch < ?",
(cutoff,),
)
Expand Down
3 changes: 2 additions & 1 deletion src/context_engine/storage/fts_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def _delete_files_sync(self, file_paths: list[str]) -> None:
with self._lock:
for batch in batched_params(file_paths):
placeholders = ",".join("?" * len(batch))
self._conn.execute(
# Safe: placeholders is only "?" chars; values are parameterized.
self._conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"DELETE FROM chunks_fts WHERE file_path IN ({placeholders})",
batch,
)
Expand Down
7 changes: 4 additions & 3 deletions src/context_engine/storage/graph_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,22 +162,23 @@ def _sync_delete_by_files(self, file_paths: list[str]) -> None:
with self._lock:
cur = self._conn.cursor()
# Collect node IDs in batches to respect SQLite param limits.
# Safe: ph is only "?" chars; values are parameterized.
node_ids: list[str] = []
for batch in batched_params(file_paths):
ph = ",".join("?" * len(batch))
cur.execute(
cur.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"SELECT id FROM nodes WHERE file_path IN ({ph})", batch
)
node_ids.extend(row[0] for row in cur.fetchall())
# Delete edges and nodes in batches.
for batch in batched_params(node_ids):
ph = ",".join("?" * len(batch))
cur.execute(
cur.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"DELETE FROM edges WHERE source_id IN ({ph}) "
f"OR target_id IN ({ph})",
batch + batch,
)
cur.execute(f"DELETE FROM nodes WHERE id IN ({ph})", batch)
cur.execute(f"DELETE FROM nodes WHERE id IN ({ph})", batch) # nosemgrep: sqlalchemy-execute-raw-query
self._conn.commit()

# ------------------------------------------------------------------
Expand Down
16 changes: 9 additions & 7 deletions src/context_engine/storage/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,11 @@ def _ensure_vec_table(self, dim: int) -> None:
self._conn.execute("DROP TABLE IF EXISTS chunks_vec")
self._conn.execute("DELETE FROM chunks")
self._conn.execute("DELETE FROM chunk_compressions")
self._conn.execute(f"""
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec
USING vec0(embedding float[{dim}])
""")
# Safe: dim is a validated integer, never from user input.
self._conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"CREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec "
f"USING vec0(embedding float[{dim}])"
)
self._dim = dim
self._conn.commit()

Expand Down Expand Up @@ -242,20 +243,21 @@ async def delete_by_files(self, file_paths: list[str]) -> None:
from context_engine.utils import batched_params

with self._lock:
# Safe: placeholders is only "?" chars; values are parameterized.
for batch in batched_params(file_paths):
placeholders = ",".join("?" * len(batch))
if self._dim is not None:
self._conn.execute(
self._conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"DELETE FROM chunks_vec "
f"WHERE rowid IN (SELECT rowid FROM chunks WHERE file_path IN ({placeholders}))",
batch,
)
self._conn.execute(
self._conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"DELETE FROM chunk_compressions "
f"WHERE chunk_id IN (SELECT id FROM chunks WHERE file_path IN ({placeholders}))",
batch,
)
self._conn.execute(
self._conn.execute( # nosemgrep: sqlalchemy-execute-raw-query
f"DELETE FROM chunks WHERE file_path IN ({placeholders})",
batch,
)
Expand Down
Loading