diff --git a/iii-database/Cargo.lock b/iii-database/Cargo.lock index 45ebd4b0..90b49779 100644 --- a/iii-database/Cargo.lock +++ b/iii-database/Cargo.lock @@ -1270,7 +1270,7 @@ dependencies = [ [[package]] name = "iii-database" -version = "1.0.0" +version = "0.0.4" dependencies = [ "anyhow", "async-trait", diff --git a/iii-database/README.md b/iii-database/README.md index bd039d30..b4fbb881 100644 --- a/iii-database/README.md +++ b/iii-database/README.md @@ -47,7 +47,8 @@ databases: url: postgres://app@db.example.com:5432/app tls: mode: verify-full # disable | require | verify-full (default: require) - ca_cert: /etc/ssl/internal-ca.pem # optional; replaces the system trust store + ca_cert: /etc/ssl/internal-ca.pem # optional; extends the system trust store + trust_native: true # default true; set false to trust only ca_cert local: url: postgres://dev@localhost:5432/dev tls: @@ -58,7 +59,34 @@ databases: - **`require`** (default) — encrypted; cert chain validated; hostname is **not** verified. Catches passive eavesdropping, doesn't catch a determined MITM with their own valid-chain cert. - **`verify-full`** — encrypted; cert chain validated; cert hostname must match the URL host. Production default for managed services (RDS, Neon, Supabase). -`ca_cert` lets you point at a private CA bundle for self-hosted databases. When set, it **replaces** the system trust store rather than extending it. +`ca_cert` lets you point at a CA bundle for self-hosted databases or managed providers whose root isn't in the OS trust store. **Additive by default**: the supplied certs extend the system trust store rather than replacing it, so the same `TlsConfig` surface works for one database that needs a private CA and another that doesn't. Set `tls.trust_native: false` to switch to the strict-isolation posture (only the `ca_cert` certs trusted; the public web PKI is rejected). Postgres only — `mysql_async`'s rustls path always bundles `webpki_roots` and offers no upstream knob to suppress it. + +#### Connecting to managed providers + +**Supabase.** Every Supabase endpoint (direct, transaction pooler, session pooler) presents certificates signed by *Supabase Intermediate 2021 CA*, which is not in the OS trust store. By default `tls.mode: require` fails with `pool connection failed (tls)`. Download the CA from your project dashboard (or `https://supabase.com/downloads/prod-ca-2021.crt`) and point `tls.ca_cert` at it: + +```yaml +databases: + primary: + url: postgresql://postgres.:@aws-0-.pooler.supabase.com:6543/postgres + tls: + mode: verify-full + ca_cert: /etc/ssl/supabase-prod-ca-2021.crt +``` + +`ca_cert` is additive — your existing CA pinning for other databases keeps working alongside this entry. + +**Neon.** Drop `?sslmode=` and `?channel_binding=` from URLs copied out of the Neon dashboard, and configure TLS via the `tls` YAML block instead: + +```yaml +databases: + primary: + url: postgres://user:pass@ep-xxx-pooler..aws.neon.tech/neondb + tls: + mode: require # or verify-full +``` + +Neon's default `?channel_binding=require` cannot work through the pooler endpoint: TLS terminates at the pooler, so SCRAM-SHA-256-PLUS isn't advertised by the inner server, and `tokio-postgres` refuses to fall back. Leaving the URL param in surfaces as `pool connection failed (auth)`. SQLite ignores the `tls` block (local-file driver). @@ -92,27 +120,15 @@ const { rows } = await call('iii-database::query', { | `iii-database::execute` | Write SQL. Returns `{ affected_rows, last_insert_id, returned_rows }`.
**`last_insert_id` semantics:** SQLite/MySQL surface the engine's `last_insert_rowid()` / `LAST_INSERT_ID()` (only populated for INSERT). Postgres has no equivalent — `last_insert_id` is set from the **first column of the first RETURNING row**, so put your PK first: `RETURNING id, name`, not `RETURNING name, id`. | | `iii-database::prepareStatement` | Pin a connection and return `{ handle: { id, expires_at } }`. | | `iii-database::runStatement` | Run a previously-prepared handle. (No `timeout_ms` — uses the pinned connection's session lifetime; configure via `ttl_seconds` on `prepareStatement`.) | -| `iii-database::transaction` | Atomic sequence; rolls back on first failure. | +| `iii-database::transaction` | Atomic batch sequence; rolls back on first failure. One-shot — pass all statements together. | +| `iii-database::beginTransaction` | Open an interactive transaction. Returns `{ transaction: { id, expires_at } }`. Configurable `timeout_ms` (default 30 000, max 300 000) auto-rolls back if the deadline elapses. | +| `iii-database::transactionQuery` | Read SQL inside an interactive transaction. Same envelope as `query`. | +| `iii-database::transactionExecute` | Write SQL inside an interactive transaction. Same envelope as `execute`. Rejects bare `BEGIN`/`COMMIT`/`ROLLBACK`/`SAVEPOINT`/`SET TRANSACTION` with `INVALID_PARAM` — finalize via the dedicated handlers below. | +| `iii-database::commitTransaction` | Commit and finalize an interactive transaction. Subsequent calls against the same id return `TRANSACTION_NOT_FOUND`. | +| `iii-database::rollbackTransaction` | Rollback and finalize an interactive transaction. Subsequent calls against the same id return `TRANSACTION_NOT_FOUND`. | ## Triggers -### `iii-database::query-poll` -Polls a SQL query at a fixed interval, dispatches new rows, and persists a cursor inside the watched database in `__iii_cursors`. - -```yaml -triggers: - - type: iii-database::query-poll - config: - db: primary - sql: SELECT id, body FROM outbox WHERE id > COALESCE(?, 0) ORDER BY id LIMIT 50 - interval_ms: 1000 - cursor_column: id -``` - -The trigger binds the cursor as the single positional parameter (`?` for SQLite/MySQL, `$1` for Postgres). On the first poll the cursor binds as `NULL`. - -The dispatched event includes a `cursor` field that is **always serialized as a JSON string**, regardless of the underlying column type. Callers must parse it (e.g. `parseInt(event.cursor)`) when expecting numeric comparison. - ### `iii-database::row-change` Postgres only. Streams row-level changes via logical replication (`pgoutput`). @@ -138,12 +154,13 @@ Returned `IIIError::Handler` bodies carry a stable `code` field: | `POOL_TIMEOUT` | Pool acquire exceeded `acquire_timeout_ms`. | | `QUERY_TIMEOUT` | Query exceeded `timeout_ms`. | | `STATEMENT_NOT_FOUND` | Handle expired or unknown — re-prepare. | +| `TRANSACTION_NOT_FOUND` | Transaction id unknown, already committed/rolled back, or timed out (auto-rolled-back by the watcher). | | `UNKNOWN_DB` | `db` parameter doesn't match any configured database. | -| `INVALID_PARAM` | JSON value couldn't be coerced for the target driver. | -| `DRIVER_ERROR` | Wraps underlying driver error with `driver` and `inner_code` (nullable). `inner_code` format is per-driver: Postgres = SQLSTATE 5-char string (e.g. `42P01`), MySQL = server error number as string, SQLite = `rusqlite::ErrorCode` debug name. | +| `INVALID_PARAM` | JSON value couldn't be coerced for the target driver, or transaction-control SQL was sent to `transactionExecute` (use `commitTransaction` / `rollbackTransaction`). | +| `DRIVER_ERROR` | Wraps underlying driver error with `driver` and `inner_code` (nullable). `inner_code` format is per-driver: Postgres = SQLSTATE 5-char string (e.g. `42P01`), MySQL = server error number as string, SQLite = `rusqlite::ErrorCode` debug name. Pool-acquire failures use the message form `pool connection failed ()` where `` is one of `tls`, `auth`, `network`, `server-policy`, or `unknown` — a redacted hint so untrusted callers can self-triage without seeing host/userinfo/db fragments. The full driver error is in the worker's stderr via `tracing::warn!`. | | `REPLICATION_SLOT_EXISTS` | Startup-only: another instance owns the slot. | | `UNSUPPORTED` | Operation not supported on the chosen driver. | -| `CONFIG_ERROR` | Config parse, pool init, or trigger misconfiguration (e.g. `cursor_column` not in result). | +| `CONFIG_ERROR` | Config parse or pool init failure. | ## Driver compatibility @@ -156,11 +173,16 @@ A few operations are no-ops on certain drivers. They emit a `tracing::warn!` rat | `transaction` `isolation: serializable` | ✓ (`BEGIN IMMEDIATE`) | ✓ | ✓ | | `iii-database::row-change` trigger | — | setup-only in v1.0.0 (see above) | — | + ## Troubleshooting - **Pool exhausted (`POOL_TIMEOUT`)**: bump `pool.max` or shorten the longest-running query. Live `prepareStatement` handles each pin one connection from the pool until they expire. - **`STATEMENT_NOT_FOUND` from a long-lived handle**: handles are bounded to `ttl_seconds` (default 3600, max 86400). Re-prepare and retry. -- **SQLite write contention with `query-poll`**: enable WAL mode in your DB: `PRAGMA journal_mode=WAL;` once after creation. +- **`DRIVER_ERROR` "pool connection failed (...)"**: the parenthesized class tells you where to look. + - `(tls)` — handshake or cert-chain failure. For managed providers (Supabase, self-signed corporate CAs), supply `tls.ca_cert`; see "Connecting to managed providers" above. + - `(auth)` — credential or pg_hba/SCRAM rejection. Includes Neon's `?channel_binding=require` failing through the pooler endpoint (drop the URL param, use `tls.mode` in YAML). + - `(network)` — TCP refuse, DNS, route, or peer reset. Check host/port reachability and any firewalls. + - `(server-policy)` — server reachable and TLS+auth OK, but the server actively refused (e.g. `max_connections` exceeded, admin shutdown). Look at the worker stderr for the underlying driver message. - **Replication slot already exists**: another instance is consuming the slot. Either reuse the slot name or run `SELECT pg_drop_replication_slot('')`. ## License diff --git a/iii-database/skills/iii-database/execute.md b/iii-database/skills/iii-database/execute.md new file mode 100644 index 00000000..9e6ce272 --- /dev/null +++ b/iii-database/skills/iii-database/execute.md @@ -0,0 +1,136 @@ +--- +type: how-to +function_id: iii-database::execute +title: Run a write statement and return affected rows +--- + +# When to use + +Call `iii-database::execute` for any write-side SQL — `INSERT`, `UPDATE`, +`DELETE`, or DDL (`CREATE TABLE`, `ALTER TABLE`, `DROP INDEX`, ...). The +response carries `affected_rows`, an optional `last_insert_id`, and a +`returned_rows` array populated when the caller asks for `RETURNING`-style +output. + +Reach for it when: + +- You need the count of rows the write touched (`affected_rows`). +- You inserted into a table with an autoincrement primary key and want + the newly-assigned id without a follow-up `SELECT`. +- You want a write to surface specific columns from each affected row + (e.g. server-defaulted `id` + `created_at`) — set `returning` on + Postgres or SQLite. + +Use [`iii-database::query`](iii://iii-database/query) instead when the +statement reads — `execute` does run a `SELECT` if you give it one but +discards the rows and reports `affected_rows: 0`, which is rarely what a +SELECT caller wants. + +Use [`iii-database::transaction`](iii://iii-database/transaction) instead +when you need several writes to commit atomically — `execute` runs each +call as its own implicit transaction. + +# Inputs + +```json +{ + "db": "primary", // required; key from your `databases:` config + "sql": "INSERT INTO users (email) VALUES (?) RETURNING id", // required; non-empty after trim + "params": ["a@x"], // optional; bound positionally + "returning": ["id", "created_at"] // optional; SQLite + Postgres only — see Driver compatibility +} +``` + +`db` and `sql` are required. Empty/whitespace-only `sql` is rejected +uniformly with `DRIVER_ERROR` carrying `message: "empty SQL"` (matches +[`iii-database::query`](iii://iii-database/query)'s contract). + +`params` accepts JSON primitives, arrays, and objects exactly like +`query` — same per-driver placeholder syntax (`?` for sqlite/mysql, +`$1`/`$2`/... for postgres) and same `INVALID_PARAM` rule for +out-of-range numbers. + +`returning` is the column projection the driver fills into +`returned_rows` when supported. **MySQL ignores it** (warns once and +returns `returned_rows: []`); SQLite implements it via the native +`RETURNING` clause; Postgres reads it from the SQL's own `RETURNING` +list. To stay portable, prefer writing the `RETURNING ...` clause +directly in `sql` for sqlite/postgres rather than passing +`returning: [...]`. + +# Outputs + +```json +{ + "affected_rows": 1, // rows the engine reports as inserted/updated/deleted + "last_insert_id": "42", // string-encoded; null when not applicable — see below + "returned_rows": [{ "id": 42, "created_at": "..." }] // empty array when no RETURNING is used +} +``` + +- `affected_rows` is the engine-reported write count. DDL statements + return `0` on every driver. +- `last_insert_id` is **always a JSON string or `null`** so the field can + carry sequence values that overflow JS `Number.MAX_SAFE_INTEGER`. It is + populated only for inserts: + - **SQLite / MySQL**: surfaces the engine's `last_insert_rowid()` / + `LAST_INSERT_ID()` for the connection. **Only set on INSERT** — an + `UPDATE` that runs immediately after an `INSERT` on the same pooled + connection returns `null` (not the prior INSERT's rowid). Falls back + to `null` when the engine reports `0` (no INSERT has run on that + connection yet). + - **Postgres**: has no engine-level `LASTVAL()` equivalent in this + surface. The worker reads the **first column of the first + `RETURNING` row** as the id. Put your primary key first: + `RETURNING id, name` works; `RETURNING name, id` returns `name` as + `last_insert_id`. With no `RETURNING` clause, the field is `null`. +- `returned_rows` mirrors the row-of-objects shape from + [`iii-database::query`](iii://iii-database/query). Empty `[]` when the + statement omits `RETURNING` or runs on MySQL. + +# Worked example + +Insert one row and capture the autoincrement id (SQLite or MySQL): + +```json +{ + "db": "primary", + "sql": "INSERT INTO users (email) VALUES (?)", + "params": ["a@x"] +} +``` + +Returns `{ "affected_rows": 1, "last_insert_id": "1", "returned_rows": [] }`. + +Same intent on Postgres — the worker pulls `last_insert_id` from the +first `RETURNING` column, so put `id` first: + +```json +{ + "db": "primary", + "sql": "INSERT INTO users (email) VALUES ($1) RETURNING id, email", + "params": ["a@x"] +} +``` + +Returns +`{ "affected_rows": 1, "last_insert_id": "1", "returned_rows": [{ "id": 1, "email": "a@x" }] }`. + +A bulk update reports the count and leaves `last_insert_id` null +(no INSERT happened): + +```json +{ + "db": "primary", + "sql": "UPDATE users SET active = ? WHERE last_seen_at < ?", + "params": [false, "2026-01-01T00:00:00Z"] +} +``` + +Returns `{ "affected_rows": 17, "last_insert_id": null, "returned_rows": [] }`. + +# Related + +- `iii-database::query` — for read SQL; returns materialized rows + column metadata instead of `affected_rows`. +- `iii-database::transaction` — group several writes into one atomic batch with rollback on first failure. +- `iii-database::prepareStatement` + `iii-database::runStatement` — re-run the same parameterized write many times without re-parsing on each call. diff --git a/iii-database/skills/iii-database/interactive-transaction.md b/iii-database/skills/iii-database/interactive-transaction.md new file mode 100644 index 00000000..0edcd58c --- /dev/null +++ b/iii-database/skills/iii-database/interactive-transaction.md @@ -0,0 +1,240 @@ +--- +type: how-to +functions: [iii-database::beginTransaction, iii-database::transactionQuery, iii-database::transactionExecute, iii-database::commitTransaction, iii-database::rollbackTransaction] +title: Run a stateful transaction across multiple RPC calls with a timeout-driven auto-rollback +--- + +# When to use + +Use the interactive transaction surface when you need **read-your-writes +across several round-trips** inside one transaction — e.g. issue a write, +inspect the resulting state, branch in your application code, then issue +follow-up writes that all commit (or all roll back) together. The worker +pins one pool connection inside a server-side `BEGIN ... COMMIT/ROLLBACK` +under a UUID handle; subsequent calls re-use the same connection. + +Reach for it when: + +- You need to take a decision in application code *between* statements + that must commit together (the batch + [`iii-database::transaction`](iii://iii-database/transaction) requires + every statement up-front, so it can't carry inter-statement logic). +- You want a single transaction id to thread through a long-running + workflow without holding a network connection open in your code. +- You need stronger-than-default isolation for a multi-step read+write + flow (pass `isolation: "serializable"` to `beginTransaction`). + +Use [`iii-database::transaction`](iii://iii-database/transaction) instead +when every statement is known in advance — it skips the registry overhead +and the per-call round-trip cost. Use +[`iii-database::execute`](iii://iii-database/execute) for one-off writes; +each `execute` call is its own auto-committed transaction. + +# Lifecycle + +``` +beginTransaction(db, isolation?, timeout_ms?) → { transaction: { id, expires_at } } + ├─ transactionQuery ( transaction_id, sql, params ) → { rows, row_count, columns } + ├─ transactionExecute ( transaction_id, sql, params ) → { affected_rows, last_insert_id, returned_rows } + ├─ ... (repeat) ... + ├─ commitTransaction ( transaction_id ) → { committed: true } + └─ rollbackTransaction ( transaction_id ) → { rolled_back: true } +``` + +After `commitTransaction` / `rollbackTransaction` returns, the id is gone +— any subsequent call against it returns `TRANSACTION_NOT_FOUND`. If the +configured `timeout_ms` elapses before either finalizer lands, the worker +auto-rolls back and removes the id; the next call also gets +`TRANSACTION_NOT_FOUND`. + +# `iii-database::beginTransaction` + +## Inputs + +```json +{ + "db": "primary", // required; key from your `databases:` config + "isolation": "serializable", // optional; read_committed | repeatable_read | serializable + "timeout_ms": 30000 // optional; default 30000, clamped to max 300000 (5 min) +} +``` + +`db` is required. `isolation` accepts the same three values as the batch +[`iii-database::transaction`](iii://iii-database/transaction); any other +string is rejected with `INVALID_PARAM`. On SQLite, `read_committed` / +`repeatable_read` log a one-line `tracing::warn!` and fall back to +`BEGIN IMMEDIATE` (serializable in practice). + +`timeout_ms` is the **total lifetime of the transaction**, not an +inactivity timeout. It defaults to 30 s and is clamped at 5 min so a +buggy client can't pin a pool connection indefinitely. A background +sweep task fires `ROLLBACK` once the deadline elapses, then removes the +id from the registry. + +## Outputs + +```json +{ + "transaction": { + "id": "550e8400-e29b-41d4-a716-446655440000", + "expires_at": "2026-05-19T18:30:00Z" + } +} +``` + +`transaction.id` is the opaque UUID every subsequent handler accepts. +Treat it as a session token: scope it to one workflow, hand it off +between functions if needed, but never share it across unrelated requests. + +`transaction.expires_at` is RFC 3339 UTC; once it elapses the worker has +already auto-rolled back. Re-issue `beginTransaction` and start over; +the original id is gone. + +# `iii-database::transactionQuery` / `iii-database::transactionExecute` + +The envelopes are **identical to the standalone +[`query`](iii://iii-database/query) and +[`execute`](iii://iii-database/execute)** handlers — same row-of-objects +shape, same `columns` metadata, same `affected_rows` / +`last_insert_id` / `returned_rows` semantics. The only difference: SQL +runs on the pinned transaction connection. + +```json +{ + "transaction_id": "550e8400-...", // required; id from beginTransaction + "sql": "SELECT n FROM t WHERE id = ?", + "params": [42] +} +``` + +`transactionExecute` adds a `returning` array exactly like +[`execute`](iii://iii-database/execute) for Postgres + SQLite +`RETURNING` clauses; MySQL ignores it (logged warn-once). + +`transactionExecute` **rejects** bare `BEGIN`, `COMMIT`, `ROLLBACK`, +`END`, `SAVEPOINT`, `RELEASE`, and `SET TRANSACTION ...` SQL with +`INVALID_PARAM` (`reason` points at `commitTransaction` / +`rollbackTransaction`). Finalization is a first-class handler, not a +side-channel — this makes the lifecycle observable: every commit and +every rollback shows up as its own engine call + log event. + +Concurrent calls against the same `transaction_id` serialize on the +per-conn mutex. The worker doesn't pipeline statements within one +transaction. + +# `iii-database::commitTransaction` / `iii-database::rollbackTransaction` + +```json +{ "transaction_id": "550e8400-..." } +``` + +Returns `{ "committed": true }` or `{ "rolled_back": true }` on success. +The id is removed atomically before the connection is locked, so a +racing `transactionQuery` / `transactionExecute` either landed before +finalization (succeeded inside the same transaction) or after +(returns `TRANSACTION_NOT_FOUND`). + +If `commitTransaction` fails (e.g. serialization conflict in Postgres), +the worker issues a best-effort `ROLLBACK` before returning the error — +the pool's recycler doesn't run rollback for us, so without this the +next caller on the recycled connection would see "current transaction is +aborted, commands ignored". + +# Errors + +| Code | Surface | Meaning | +|---|---|---| +| `UNKNOWN_DB` | `beginTransaction` | `db` not in your `databases:` config. | +| `INVALID_PARAM` | `beginTransaction` | Unknown `isolation`. | +| `INVALID_PARAM` | `transactionExecute` | Transaction-control SQL — use the dedicated finalizer handler. | +| `TRANSACTION_NOT_FOUND` | any of `transactionQuery` / `transactionExecute` / `commitTransaction` / `rollbackTransaction` | Id unknown, already finalized, or timed out. | +| `POOL_TIMEOUT` | `beginTransaction` | Pool was busy when `beginTransaction` tried to acquire a connection. Bump `pool.max` or shorten the longest-running transaction. | +| `DRIVER_ERROR` | any of the above | Wraps the underlying driver error with `inner_code`; same shape as elsewhere. | + +# Observability + +Every call lands on its own engine-managed OTel span. The worker emits +structured log events via `iii_sdk::Logger` that attach to the active +span, including `db.system`, `db.name`, `db.transaction.id`, +`db.operation`, and `db.statement` (no params — those can carry PII). + +Key event names you can grep for in your trace backend: + +- `db_tx_started` — info on `beginTransaction` success. +- `db_tx_statement` — debug per `transactionQuery` / `transactionExecute`. +- `db_tx_statement_failed` — warn on driver-level failure inside a tx. +- `db_tx_unknown` — warn when an id is missing/finalized/timed-out. +- `db_tx_committed` / `db_tx_rolled_back` — info on explicit finalization (carries `duration_ms`). +- `db_tx_commit_failed` — error on COMMIT failure (also indicates whether the follow-up rollback succeeded). +- `db_tx_timed_out` — warn from the background watcher when the deadline auto-rolls back. +- `db_tx_timeout_rollback_failed` — error when the timeout-driven ROLLBACK itself fails (rare). + +# Worked example + +Transfer money between two accounts with a read-then-write check that +must see the updated balance before deciding whether to credit: + +```ts +const { transaction } = await iii.trigger({ + function_id: 'iii-database::beginTransaction', + payload: { + db: 'primary', + isolation: 'serializable', + timeout_ms: 5000, + }, +}) + +try { + const debit = await iii.trigger({ + function_id: 'iii-database::transactionExecute', + payload: { + transaction_id: transaction.id, + sql: 'UPDATE accounts SET balance = balance - ? WHERE id = ? AND balance >= ?', + params: [10, 1, 10], + }, + }) + if (debit.affected_rows !== 1) { + await iii.trigger({ + function_id: 'iii-database::rollbackTransaction', + payload: { transaction_id: transaction.id }, + }) + throw new Error('insufficient funds') + } + await iii.trigger({ + function_id: 'iii-database::transactionExecute', + payload: { + transaction_id: transaction.id, + sql: 'UPDATE accounts SET balance = balance + ? WHERE id = ?', + params: [10, 2], + }, + }) + await iii.trigger({ + function_id: 'iii-database::commitTransaction', + payload: { transaction_id: transaction.id }, + }) +} catch (e) { + // Best-effort rollback; the worker is happy to see TRANSACTION_NOT_FOUND + // if a prior step already rolled it back or the deadline elapsed. + try { + await iii.trigger({ + function_id: 'iii-database::rollbackTransaction', + payload: { transaction_id: transaction.id }, + }) + } catch {/* ignore */} + throw e +} +``` + +# Related + +- [`iii-database::transaction`](iii://iii-database/transaction) — atomic + batch when every statement is known up-front; skip the round-trip + overhead of the interactive surface. +- [`iii-database::query`](iii://iii-database/query) / + [`iii-database::execute`](iii://iii-database/execute) — one-off + read/write outside any transaction. +- [`iii-database::prepareStatement` + + `runStatement`](iii://iii-database/prepared-statements) — pin a + connection for repeated parameterized calls **without** transactional + semantics. Useful for read-heavy workloads where the prepared plan is + the bottleneck. diff --git a/iii-database/skills/iii-database/prepared-statements.md b/iii-database/skills/iii-database/prepared-statements.md new file mode 100644 index 00000000..93c2bfbb --- /dev/null +++ b/iii-database/skills/iii-database/prepared-statements.md @@ -0,0 +1,168 @@ +--- +type: how-to +functions: [iii-database::prepareStatement, iii-database::runStatement] +title: Prepare a SQL statement once, run it many times against a pinned connection +--- + +# When to use + +Use the `prepareStatement` + `runStatement` pair when a single SQL string +will run repeatedly with different bind values, or when you need +session-scoped state (temp tables, `SET LOCAL`, transaction snapshots) to +persist across several calls. `prepareStatement` parses + plans the SQL +once and **pins a pool connection under a UUID handle**; `runStatement` +re-executes that handle with new params on the same connection. + +| Question | Use this | +|------------------------------------------------------------------|-----------------------------------------------------------| +| Will this exact SQL run more than a handful of times? | `iii-database::prepareStatement` first, then re-use. | +| Do I have a UUID handle already? | `iii-database::runStatement` with new `params`. | +| Need session-scoped state (e.g. a Postgres advisory lock) across calls? | `iii-database::prepareStatement` pins one connection. | +| Just running this SQL once? | [`iii-database::query`](iii://iii-database/query) — no handle, no pool pinning. | + +**A live handle pins one pool connection until its TTL expires.** The +default TTL is 1 hour and the cap is 24 hours; while a handle exists, +`pool.max - 1` connections are available for everyone else. Hold only as +many handles as you need concurrently, and let them expire (or simply +stop calling them) when the workload ends — there is no `release` or +`close` function. + +Use [`iii-database::query`](iii://iii-database/query) instead when the +SQL runs only once. Use +[`iii-database::transaction`](iii://iii-database/transaction) instead +when several statements need atomic commit/rollback semantics — handles +are not transactions; commit boundaries are defined by the SQL you run +through them. + +# `iii-database::prepareStatement` + +## Inputs + +```json +{ + "db": "primary", // required; key from your `databases:` config + "sql": "SELECT id, email FROM users WHERE id > ?", // required; non-empty after trim + "ttl_seconds": 3600 // optional; default 3600, capped at 86400 +} +``` + +`db` and `sql` are required. Empty/whitespace-only `sql` is rejected +with `DRIVER_ERROR` carrying `message: "empty SQL"`. The guard runs +**before** the worker acquires a pool connection, so a typo cannot +silently leak a pinned conn. + +`ttl_seconds` values above 86400 (24 h) are silently clamped down — no +error is returned. A background evictor sweeps expired entries every +30 s; a `runStatement` against an entry whose TTL elapsed between +evictor sweeps fails fast with `STATEMENT_NOT_FOUND` and removes the +entry as a side effect. + +## Outputs + +```json +{ + "handle": { + "id": "550e8400-e29b-41d4-a716-446655440000", // RFC 4122 v4 UUID + "expires_at": "2026-05-19T18:30:00Z" // RFC 3339 UTC; now + ttl_seconds (clamped) + } +} +``` + +- `handle.id` is the opaque UUID `runStatement` accepts. Stable until + expiry; treat it as a session token. +- `handle.expires_at` is when the worker will stop honouring the + handle. After that point `runStatement` returns `STATEMENT_NOT_FOUND`. + +# `iii-database::runStatement` + +## Inputs + +```json +{ + "handle_id": "550e8400-e29b-41d4-a716-446655440000", // required; UUID returned by prepareStatement + "params": [42] // optional; positionally bound at run time +} +``` + +`handle_id` is required and must reference a live handle. Unknown or +expired ids return `STATEMENT_NOT_FOUND`; the response carries the +`handle_id` echo so callers can correlate failures. + +`params` follows the same JSON-to-driver coercion as +[`iii-database::query`](iii://iii-database/query). The placeholder +syntax matches the SQL given to `prepareStatement` (`?` for +sqlite/mysql, `$1`/`$2`/... for postgres). + +There is **no `timeout_ms`** input on `runStatement`. The handle pins a +pool connection for its full TTL, and per-call timeouts would not +short-circuit the underlying network round-trip on Postgres or MySQL — +configure the per-call ceiling via the connection's session lifetime +(e.g. `statement_timeout` in `postgresql.conf`) or shorten +`ttl_seconds` on `prepareStatement`. + +## Outputs + +```json +{ + "rows": [{ "id": 1, "email": "a@x" }], // row-of-objects, same shape as `iii-database::query` + "row_count": 1, + "columns": [ + { "name": "id", "type_name": "INTEGER" }, + { "name": "email", "type_name": "TEXT" } + ] +} +``` + +The envelope is bit-for-bit identical to +[`iii-database::query`](iii://iii-database/query) — same row coercion +rules, same `columns[i]` metadata, same empty-result handling. Callers +can share one parser for both surfaces. + +`runStatement` does not surface write counts or `last_insert_id`. To +re-run an INSERT/UPDATE/DELETE many times and read those fields, use +[`iii-database::execute`](iii://iii-database/execute) per call — write +statements are typically cheap to re-parse and the prepared path saves +less than the pool-pinning cost. + +# Worked example + +Prepare a paginated SELECT once, then advance the cursor twice. Same +flow on every driver; the SQL changes its placeholder syntax. + +SQLite or MySQL — prepare: + +```json +{ + "db": "primary", + "sql": "SELECT id, body FROM outbox WHERE id > ? ORDER BY id LIMIT 50", + "ttl_seconds": 3600 +} +``` + +Returns +`{ "handle": { "id": "550e8400-...", "expires_at": "..." } }`. + +Run with cursor `0` (first page): + +```json +{ "handle_id": "550e8400-...", "params": [0] } +``` + +Run again with cursor `50` (second page) on the same handle: + +```json +{ "handle_id": "550e8400-...", "params": [50] } +``` + +If a `runStatement` returns `STATEMENT_NOT_FOUND` mid-loop, the handle +expired (or the worker process restarted): re-prepare and continue from +the last successfully-read cursor. Do **not** retry the same +`handle_id`. + +# Related + +- `iii-database::query` — drop the handle altogether for one-shot reads; same response envelope. +- `iii-database::execute` — for repeated writes; pair its own `last_insert_id` with `affected_rows` per call. +- `iii-database::transaction` — group writes into an atomic batch instead of holding a pinned connection across many calls. +- Error code `STATEMENT_NOT_FOUND` — re-prepare and retry with the new `handle.id`; the old one is gone. +- Error code `POOL_TIMEOUT` — too many live handles can starve the pool. Bump `pool.max` in your `databases:` config or shorten `ttl_seconds`. diff --git a/iii-database/skills/iii-database/query.md b/iii-database/skills/iii-database/query.md new file mode 100644 index 00000000..8147699f --- /dev/null +++ b/iii-database/skills/iii-database/query.md @@ -0,0 +1,126 @@ +--- +type: how-to +function_id: iii-database::query +title: Run a read-only SQL query and return rows +--- + +# When to use + +Call `iii-database::query` for any read-side SQL — `SELECT`, `WITH`, +`PRAGMA`, `EXPLAIN`, anything that produces a result set you want +materialized as JSON. The response carries the rows as objects keyed by +column name plus a `columns` array with per-column type metadata, so +callers don't need to issue a follow-up describe call. + +Reach for it when: + +- You need a row-of-objects shape for a UI table or a JSON-returning + function. Each row is `{column_name: value}`, not a positional array. +- You only run the SQL once or a small handful of times. Bind parameters + inline via `params` rather than reaching for prepared statements. +- You want column-name + driver-type metadata alongside the rows + (`columns[i].name` and `columns[i].type_name`). + +Use [`iii-database::execute`](iii://iii-database/execute) instead when the +statement writes (INSERT/UPDATE/DELETE/DDL) — `query` only returns rows; +write counts and `last_insert_id` come from `execute`. + +Use [`iii-database::prepareStatement` + `runStatement`](iii://iii-database/prepared-statements) +instead when you'll re-run the same SQL many times in a hot loop — the +prepared path skips the per-call parse/plan cost and pins a pool +connection so isolation primitives like temp tables stay alive across +calls. + +# Inputs + +```json +{ + "db": "primary", // required; key from your `databases:` config + "sql": "SELECT id, email FROM users WHERE id > ?", // required; non-empty after trim + "params": [42], // optional; bound positionally (`?` for sqlite/mysql, `$1`/`$2`/... for postgres) + "timeout_ms": 30000 // optional; per-call cap, default 30000 +} +``` + +`db` and `sql` are required. Empty/whitespace-only `sql` is rejected at +the handler boundary with a `DRIVER_ERROR` carrying `message: "empty SQL"` +— this is uniform across all three drivers (Postgres' `tokio-postgres` +treats empty SQL as a successful no-op, sqlite/mysql parse-error it; the +worker normalizes the contract). + +`params` accepts JSON primitives (`null`, `bool`, integer, float, +string), arrays, and objects — arrays/objects bind as the driver's JSON +type. A number that fits neither `i64` nor `f64` is rejected with +`INVALID_PARAM` carrying the offending index. Placeholder syntax differs +per driver: `?` for sqlite and mysql, `$1`/`$2`/... for postgres. + +`timeout_ms` exceeded yields `QUERY_TIMEOUT` with the `db` and the +configured cap. + +# Outputs + +```json +{ + "rows": [ // row-of-objects, ordered to match the SQL + { "id": 1, "email": "a@x" }, + { "id": 2, "email": "b@x" } + ], + "row_count": 2, // == rows.length; convenience field + "columns": [ // per-column metadata in projection order + { "name": "id", "type_name": "INTEGER" }, + { "name": "email", "type_name": "TEXT" } + ] +} +``` + +- `rows` is always an array of objects keyed by `columns[i].name`. Empty + result sets return `[]`, not `null`. +- `row_count` is exactly `rows.length`; included so callers can branch on + presence without re-measuring. +- `columns[i].type_name` is the driver's native type name (e.g. + `INTEGER`, `TEXT`, `int4`, `varchar`); use it as a hint, not a + contract. +- Cell coercion follows fixed rules: + - Integer columns become JSON numbers when they fit `i64`. + - 64-bit identity columns serialize as JSON **strings** to preserve + precision past `Number.MAX_SAFE_INTEGER` in JS clients. + - `BYTEA` / `BLOB` cells are base64-encoded strings (standard alphabet, + with padding). + - `TIMESTAMP` / `TIMESTAMPTZ` cells are RFC 3339 strings in UTC + (`...Z`), seconds precision. + - `NUMERIC` / `DECIMAL` cells are JSON strings (no precision loss). + - `JSON` / `JSONB` columns pass through as the embedded value. + +# Worked example + +Read every user past a given id, with the cursor bound positionally. + +SQLite or MySQL: + +```json +{ + "db": "primary", + "sql": "SELECT id, email FROM users WHERE id > ? ORDER BY id LIMIT ?", + "params": [42, 100] +} +``` + +Postgres uses numbered placeholders for the same call: + +```json +{ + "db": "primary", + "sql": "SELECT id, email FROM users WHERE id > $1 ORDER BY id LIMIT $2", + "params": [42, 100] +} +``` + +Both return the same envelope; the `columns[i].type_name` strings will +differ (e.g. sqlite `INTEGER` vs postgres `int4`) but `rows` are +shape-compatible. + +# Related + +- `iii-database::execute` — for the write side (INSERT/UPDATE/DELETE/DDL); returns affected-row counts instead of materialized rows. +- `iii-database::prepareStatement` + `iii-database::runStatement` — re-run the same SQL many times without re-parsing; also pins a pool connection so session-scoped state (temp tables, `SET LOCAL`, ...) survives across calls. +- `iii-database::transaction` — group several statements (mixed read/write) into one atomic batch with a single `committed` flag. diff --git a/iii-database/skills/iii-database/transaction.md b/iii-database/skills/iii-database/transaction.md new file mode 100644 index 00000000..8194518c --- /dev/null +++ b/iii-database/skills/iii-database/transaction.md @@ -0,0 +1,165 @@ +--- +type: how-to +function_id: iii-database::transaction +title: Run a sequence of statements atomically with rollback on first failure +--- + +# When to use + +Call `iii-database::transaction` when several SQL statements must commit +or roll back together — the canonical "transfer money between accounts" +shape, plus any multi-step write that would leave the DB in an +inconsistent state if a later statement failed. The worker opens a +transaction on a freshly-acquired pool connection, runs every statement +in sequence, and commits if (and only if) every step returned without +error. + +Reach for it when: + +- You're running two or more writes that share a consistency invariant + (e.g. debit + credit, parent + child rows, denormalized counter + updates). +- You need a stronger isolation level than the engine's default — pass + `isolation: "serializable"` (or `"repeatable_read"` on Postgres/MySQL) + to upgrade just this batch. +- You want a single response that tells you *which* statement failed + when something rolls back (`failed_index`). + +Use [`iii-database::execute`](iii://iii-database/execute) instead for a +single write — `execute` is implicitly its own transaction and skips the +multi-statement framing cost. Use +[`iii-database::prepareStatement` + `runStatement`](iii://iii-database/prepared-statements) +instead when the goal is repeating one parameterized statement many +times rather than committing several different statements as a unit. + +# Inputs + +```json +{ + "db": "primary", // required; key from your `databases:` config + "statements": [ // required; array, run in order + { "sql": "INSERT INTO accounts (id, balance) VALUES (?, ?)", "params": [1, 100] }, + { "sql": "INSERT INTO accounts (id, balance) VALUES (?, ?)", "params": [2, 0] } + ], + "isolation": "serializable" // optional; one of read_committed | repeatable_read | serializable +} +``` + +`db` and `statements` are required; an empty `statements` array +commits a no-op transaction (`committed: true`, `results: []`). Each +statement carries its own `sql` (non-empty, like +[`iii-database::query`](iii://iii-database/query) and `execute`) plus +optional `params` with the same JSON-to-driver coercion rules. + +`isolation` is optional and accepts exactly three values: +`"read_committed"`, `"repeatable_read"`, `"serializable"`. Anything else +(including the empty string) is rejected with `INVALID_PARAM` carrying +`reason: "unknown isolation \`\`"`. Per-driver behaviour: + +- **Postgres**: maps directly to `BEGIN ISOLATION LEVEL ...`. +- **MySQL**: maps directly to `SET TRANSACTION ISOLATION LEVEL ...` + before `BEGIN`. +- **SQLite**: only supports a single isolation level. `serializable` + uses `BEGIN IMMEDIATE`; `read_committed` and `repeatable_read` log a + one-line `tracing::warn!` and fall back to `BEGIN IMMEDIATE`. The + call still succeeds; check your worker logs if you expect strict + isolation semantics on SQLite. + +Omitting `isolation` uses the driver's session default +(`READ COMMITTED` on Postgres + MySQL; serializable on SQLite by +construction). + +# Outputs + +The response shape changes based on `committed`. The two shapes never +overlap; success has no `failed_index`/`error`, failure has no +`results`. + +Success: + +```json +{ + "committed": true, + "results": [ // one entry per input statement, same order + { "affected_rows": 1, "rows": [] }, // statement 0 + { "affected_rows": 1, "rows": [] } // statement 1 + ] +} +``` + +Failure (rollback): + +```json +{ + "committed": false, + "failed_index": 1, // 0-based index of the offending statement; absent for non-step failures + "error": { // structured DbError; same shape returned by `query`/`execute` on driver failure + "code": "DRIVER_ERROR", + "driver": "sqlite", + "inner_code": null, + "message": "constraint failed: NOT NULL on accounts.id", + "failed_index": 1 + } +} +``` + +- `committed` is the truthy/falsy split. The transaction either + committed every statement or rolled back every statement; partial + commit is impossible. +- `results` is present only on success. Each entry mirrors the + `affected_rows` count `iii-database::execute` would have produced for + that statement, plus a positional `rows` array (NOT keyed by column — + this is intentionally lighter than `query`'s row-of-objects shape; + parse with the input order in mind). +- `failed_index` is present only when the failing error carries a + per-statement index — i.e. a `DRIVER_ERROR` thrown by one of the + inputs. **Connection-level failures leave it absent**: `POOL_TIMEOUT` + on acquire, `UNKNOWN_DB`, a `BEGIN` that fails, or any error that + isn't tied to a specific input statement. Treat absence as "the + transaction never started" or "the failure spans the batch", not as + "step 0 failed". +- `error` is the same JSON-tagged `DbError` returned elsewhere on + driver-level failure (`code` is the stable discriminant, `driver` / + `inner_code` / `message` are diagnostic). + +# Worked example + +Two related INSERTs that must land together: + +```json +{ + "db": "primary", + "statements": [ + { "sql": "INSERT INTO accounts (id, balance) VALUES (?, ?)", "params": [1, 100] }, + { "sql": "UPDATE accounts SET balance = balance - ? WHERE id = ?", "params": [10, 1] }, + { "sql": "UPDATE accounts SET balance = balance + ? WHERE id = ?", "params": [10, 2] } + ], + "isolation": "serializable" +} +``` + +Returns `{ "committed": true, "results": [...] }` with `affected_rows` +populated per step. + +If the third UPDATE hits a `NOT NULL` constraint failure, the response +becomes: + +```json +{ + "committed": false, + "failed_index": 2, + "error": { "code": "DRIVER_ERROR", "driver": "sqlite", "message": "...", "failed_index": 2 } +} +``` + +The first two statements rolled back; no row in `accounts` reflects the +debit, no row reflects the credit, and the original INSERT did not +persist. Re-issue the entire transaction call (don't retry only the +failed step) once the underlying constraint condition is fixed. + +# Related + +- [`iii-database::beginTransaction` + `transactionQuery` / `transactionExecute` + `commitTransaction` / `rollbackTransaction`](iii://iii-database/interactive-transaction) — **stateful interactive** transaction with a configurable timeout-driven auto-rollback. Use this surface when you need to take a decision in application code *between* statements (read-your-writes across round-trips). The batch handler on this page requires every statement up-front, so it can't carry that inter-statement logic. +- `iii-database::execute` — single-statement variant; skips the BEGIN/COMMIT framing for one-shot writes. +- `iii-database::query` — read-only; cannot be combined with writes inside this surface but is fine to mix into the `statements` array if you only need its rows for `affected_rows`-equivalent counts. +- `iii-database::prepareStatement` + `iii-database::runStatement` — for repeating one parameterized statement many times; not a substitute for atomic multi-statement commit. diff --git a/iii-database/skills/index.md b/iii-database/skills/index.md new file mode 100644 index 00000000..aceaa6b2 --- /dev/null +++ b/iii-database/skills/index.md @@ -0,0 +1,36 @@ +--- +type: index +title: iii-database +--- + +# iii-database + +Connect to PostgreSQL, MySQL, and SQLite from the iii engine. Run read-only +queries, write statements, atomic transactions, and prepared-statement +sequences over a managed per-database connection pool. Every callable +surface lives under the single `iii-database::*` namespace; SQLite is the +recommended starting point because it needs no server, just a file. + +The worker resolves the driver from each database's URL scheme (`sqlite:`, +`postgres://`, `postgresql://`, `mysql://`). For the `databases:` config +block, TLS modes, error-code reference, and the per-driver compatibility +table (e.g. `returning:` is a no-op on MySQL; SQLite degrades +`read_committed` / `repeatable_read` to `serializable`), see +[the README](../README.md). + +## How-tos + +### `iii-database::*` + +- [`iii-database::query`](iii://iii-database/query) — read-only SQL; returns `{ rows, row_count, columns }` for SELECT-style statements. +- [`iii-database::execute`](iii://iii-database/execute) — write SQL (INSERT/UPDATE/DELETE/DDL); returns `{ affected_rows, last_insert_id, returned_rows }`. +- [`iii-database::prepareStatement` + `iii-database::runStatement`](iii://iii-database/prepared-statements) — prepare-once, run-many parameterized SQL; the prepare step pins a pool connection until TTL expiry, so always run before re-issuing the same statement many times. +- [`iii-database::transaction`](iii://iii-database/transaction) — atomic batch sequence; one call, pass every statement together, rolls back on first failure with a `failed_index` pointer at the offending step. +- [`iii-database::beginTransaction` + `transactionQuery` / `transactionExecute` + `commitTransaction` / `rollbackTransaction`](iii://iii-database/interactive-transaction) — stateful interactive transaction with a configurable timeout-driven auto-rollback. Use this when you need read-your-writes across several round-trips inside a single transaction. + +`iii-database::row-change` (Postgres logical replication via `pgoutput`) is +registered as a trigger type but is **not yet functional in v1.0.0**: +`register_trigger` returns `UNSUPPORTED` while the streaming decode loop +waits on an upstream `tokio-postgres` replication API release. Operators +can pre-provision slots and publications now; see the **Triggers** section +of [the README](../README.md) for current status and cleanup commands. diff --git a/iii-database/src/config.rs b/iii-database/src/config.rs index b9e2da54..3dab0a0c 100644 --- a/iii-database/src/config.rs +++ b/iii-database/src/config.rs @@ -39,15 +39,45 @@ pub struct DatabaseConfig { /// (matching libpq's `sslmode=require` semantics). Use `mode: verify-full` /// to additionally verify the certificate hostname matches the URL host, /// and `mode: disable` to opt out of TLS entirely (local-dev only). -#[derive(Debug, Clone, Default, Deserialize)] +#[derive(Debug, Clone, Deserialize)] pub struct TlsConfig { #[serde(default)] pub mode: TlsMode, /// Optional path to a PEM file containing one or more CA certificates. - /// When set, the system trust store is **replaced** by these certs - /// (not extended). Use this for self-hosted databases with a private CA. + /// Additive by default — these certs **extend** the system trust store + /// rather than replace it. Set `trust_native: false` for strict-isolation + /// deployments that must only trust the operator-supplied bundle. #[serde(default)] pub ca_cert: Option, + /// When true (default), the system/native trust store is loaded in + /// addition to any `ca_cert` bundle. Set to `false` to trust only the + /// `ca_cert` certificates — useful when an operator wants to pin trust + /// to a private CA and explicitly *not* accept the public web PKI. + /// + /// Effective for postgres. MySQL is forced-additive: `mysql_async`'s + /// rustls path always loads the Mozilla `webpki_roots` bundle and + /// extends it with `ca_cert` — there is no upstream knob to suppress + /// the bundled roots, so `trust_native: false` only affects postgres. + /// + /// Note: with both `trust_native: false` *and* `ca_cert: None` on + /// postgres, no trust roots are available; pool construction fails + /// with `CONFIG_ERROR`. + #[serde(default = "default_trust_native")] + pub trust_native: bool, +} + +impl Default for TlsConfig { + fn default() -> Self { + Self { + mode: TlsMode::default(), + ca_cert: None, + trust_native: default_trust_native(), + } + } +} + +fn default_trust_native() -> bool { + true } #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize)] diff --git a/iii-database/src/cursor.rs b/iii-database/src/cursor.rs deleted file mode 100644 index a711d72d..00000000 --- a/iii-database/src/cursor.rs +++ /dev/null @@ -1,217 +0,0 @@ -//! __iii_cursors table CRUD. -//! -//! This table tracks the last successfully-acked cursor for each query-poll -//! trigger. It is created on first poll inside the watched database. - -use crate::driver::{self}; -use crate::error::DbError; -use crate::pool::Pool; -use crate::value::JsonParam; - -/// Default table name; overridable per-trigger via `cursor_table`. -pub const DEFAULT_CURSOR_TABLE: &str = "__iii_cursors"; - -/// Per-driver quoted form of the `cursor` column. `cursor` is a reserved -/// word in MySQL 8 (and standard SQL/PSM); without quoting, the table DDL -/// fails with ERROR 1064 syntax error. Postgres accepts unquoted but we -/// quote defensively for parity. SQLite needs no quoting but tolerates it. -fn cursor_col(pool: &Pool) -> &'static str { - match pool { - Pool::Postgres(_) => "\"cursor\"", - Pool::Mysql(_) => "`cursor`", - Pool::Sqlite(_) => "cursor", - } -} - -/// Issue `CREATE TABLE IF NOT EXISTS` for the cursor table. Called on every -/// poll tick. Intentionally NOT cached: a process-wide cache silently lies -/// when the table is dropped externally (e.g. test harness SCHEMA_RESET, -/// operator running `DROP TABLE __iii_cursors` for any reason), causing -/// every subsequent tick on the same worker process to silently fail -/// because read_cursor hits the missing table while the cache says -/// "ensured". `CREATE TABLE IF NOT EXISTS` against an existing table is a -/// system-catalog check on every driver — measured at sub-millisecond on -/// Postgres/MySQL/SQLite. At the default 1s polling interval that overhead -/// is invisible; the cache traded too much correctness for too little -/// throughput. -pub async fn ensure_table(pool: &Pool, table: &str) -> Result<(), DbError> { - let cursor = cursor_col(pool); - let sql = match pool { - Pool::Postgres(_) => format!( - "CREATE TABLE IF NOT EXISTS {table} (\ - trigger_id TEXT PRIMARY KEY, \ - {cursor} TEXT, \ - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW())" - ), - Pool::Mysql(_) => format!( - "CREATE TABLE IF NOT EXISTS {table} (\ - trigger_id VARCHAR(191) PRIMARY KEY, \ - {cursor} TEXT, \ - updated_at DATETIME(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6))" - ), - Pool::Sqlite(_) => format!( - "CREATE TABLE IF NOT EXISTS {table} (\ - trigger_id TEXT PRIMARY KEY, \ - {cursor} TEXT, \ - updated_at TEXT NOT NULL)" - ), - }; - match pool { - Pool::Postgres(p) => driver::postgres::execute(p, &sql, &[], &[]) - .await - .map(|_| ()), - Pool::Mysql(p) => driver::mysql::execute(p, &sql, &[], &[]).await.map(|_| ()), - Pool::Sqlite(p) => driver::sqlite::execute(p, &sql, &[], &[]).await.map(|_| ()), - } -} - -pub async fn read_cursor( - pool: &Pool, - table: &str, - trigger_id: &str, -) -> Result, DbError> { - let placeholder = match pool { - Pool::Postgres(_) => "$1", - _ => "?", - }; - let cursor = cursor_col(pool); - let sql = format!("SELECT {cursor} FROM {table} WHERE trigger_id = {placeholder} LIMIT 1"); - let result = match pool { - Pool::Postgres(p) => { - driver::postgres::query(p, &sql, &[JsonParam::Text(trigger_id.into())], 30_000).await? - } - Pool::Mysql(p) => { - driver::mysql::query(p, &sql, &[JsonParam::Text(trigger_id.into())], 30_000).await? - } - Pool::Sqlite(p) => { - driver::sqlite::query(p, &sql, &[JsonParam::Text(trigger_id.into())], 30_000).await? - } - }; - Ok(result - .rows - .first() - .and_then(|r| r.0.first().map(|v| v.to_json())) - .and_then(|v| v.as_str().map(|s| s.to_string()))) -} - -pub async fn write_cursor( - pool: &Pool, - table: &str, - trigger_id: &str, - cursor: &str, -) -> Result<(), DbError> { - // `col` rather than `cursor` here: the function parameter is also named - // `cursor` (the cursor value), so we must avoid shadowing it — otherwise - // the bind below sends the column-name string instead of the value. - let col = cursor_col(pool); - let sql = match pool { - Pool::Postgres(_) => format!( - "INSERT INTO {table} (trigger_id, {col}, updated_at) VALUES ($1, $2, NOW()) \ - ON CONFLICT (trigger_id) DO UPDATE SET {col} = EXCLUDED.{col}, updated_at = NOW()" - ), - Pool::Mysql(_) => format!( - "INSERT INTO {table} (trigger_id, {col}, updated_at) VALUES (?, ?, CURRENT_TIMESTAMP(6)) \ - ON DUPLICATE KEY UPDATE {col} = VALUES({col}), updated_at = CURRENT_TIMESTAMP(6)" - ), - Pool::Sqlite(_) => format!( - "INSERT INTO {table} (trigger_id, {col}, updated_at) VALUES (?, ?, datetime('now')) \ - ON CONFLICT(trigger_id) DO UPDATE SET {col} = excluded.{col}, updated_at = datetime('now')" - ), - }; - let params = vec![ - JsonParam::Text(trigger_id.into()), - JsonParam::Text(cursor.into()), - ]; - match pool { - Pool::Postgres(p) => driver::postgres::execute(p, &sql, ¶ms, &[]) - .await - .map(|_| ()), - Pool::Mysql(p) => driver::mysql::execute(p, &sql, ¶ms, &[]) - .await - .map(|_| ()), - Pool::Sqlite(p) => driver::sqlite::execute(p, &sql, ¶ms, &[]) - .await - .map(|_| ()), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::config::PoolConfig; - use crate::pool::SqlitePool; - - fn pool() -> SqlitePool { - SqlitePool::new("sqlite::memory:", &PoolConfig::default()).unwrap() - } - - #[tokio::test(flavor = "multi_thread")] - async fn ensure_table_creates_in_sqlite() { - let p = Pool::Sqlite(pool()); - ensure_table(&p, DEFAULT_CURSOR_TABLE).await.unwrap(); - // running again is idempotent - ensure_table(&p, DEFAULT_CURSOR_TABLE).await.unwrap(); - } - - #[tokio::test(flavor = "multi_thread")] - async fn read_returns_none_for_unknown_trigger() { - let p = Pool::Sqlite(pool()); - ensure_table(&p, DEFAULT_CURSOR_TABLE).await.unwrap(); - let v = read_cursor(&p, DEFAULT_CURSOR_TABLE, "trig-1") - .await - .unwrap(); - assert!(v.is_none()); - } - - #[tokio::test(flavor = "multi_thread")] - async fn ensure_table_survives_external_drop() { - // Regression: a process-wide ensure_table cache previously made the - // second call a no-op, but if the table was dropped externally - // between calls (test harness SCHEMA_RESET, operator action, ...) - // every subsequent poll silently failed. Removing the cache makes - // ensure_table idempotent under external drops. - let p = Pool::Sqlite(pool()); - let table = "test_drop_resilience_xj"; - ensure_table(&p, table).await.unwrap(); - // External drop simulation. - match &p { - Pool::Sqlite(sp) => { - crate::driver::sqlite::execute( - sp, - &format!("DROP TABLE IF EXISTS {table}"), - &[], - &[], - ) - .await - .unwrap(); - } - _ => unreachable!(), - } - // Second call must re-create — not silently believe it's already done. - ensure_table(&p, table).await.unwrap(); - // Verify the table exists by writing+reading a cursor row. - write_cursor(&p, table, "trig-survive", "1").await.unwrap(); - let v = read_cursor(&p, table, "trig-survive").await.unwrap(); - assert_eq!(v.as_deref(), Some("1")); - } - - #[tokio::test(flavor = "multi_thread")] - async fn write_then_read_round_trips() { - let p = Pool::Sqlite(pool()); - ensure_table(&p, DEFAULT_CURSOR_TABLE).await.unwrap(); - write_cursor(&p, DEFAULT_CURSOR_TABLE, "trig-1", "42") - .await - .unwrap(); - let v = read_cursor(&p, DEFAULT_CURSOR_TABLE, "trig-1") - .await - .unwrap(); - assert_eq!(v.as_deref(), Some("42")); - write_cursor(&p, DEFAULT_CURSOR_TABLE, "trig-1", "100") - .await - .unwrap(); - let v = read_cursor(&p, DEFAULT_CURSOR_TABLE, "trig-1") - .await - .unwrap(); - assert_eq!(v.as_deref(), Some("100")); - } -} diff --git a/iii-database/src/driver/mysql.rs b/iii-database/src/driver/mysql.rs index 39771c77..6aa7f422 100644 --- a/iii-database/src/driver/mysql.rs +++ b/iii-database/src/driver/mysql.rs @@ -244,6 +244,68 @@ fn step_err(idx: usize, e: mysql_async::Error) -> DbError { } } +/// Issue `SET TRANSACTION ISOLATION LEVEL ...; START TRANSACTION` on a +/// pinned connection. Used by `beginTransaction`. MySQL needs the isolation +/// statement before `START TRANSACTION` (per-server-default otherwise). +pub async fn tx_begin( + conn: &mut crate::pool::mysql::MysqlConn, + isolation: Option, +) -> Result<(), DbError> { + let iso_sql = match isolation { + Some(Isolation::ReadCommitted) => "SET TRANSACTION ISOLATION LEVEL READ COMMITTED", + Some(Isolation::RepeatableRead) => "SET TRANSACTION ISOLATION LEVEL REPEATABLE READ", + Some(Isolation::Serializable) => "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE", + None => "", + }; + if !iso_sql.is_empty() { + conn.query_drop(iso_sql).await.map_err(map_err)?; + } + conn.query_drop("START TRANSACTION").await.map_err(map_err) +} + +/// `COMMIT` the in-progress transaction on a pinned connection. +pub async fn tx_commit(conn: &mut crate::pool::mysql::MysqlConn) -> Result<(), DbError> { + conn.query_drop("COMMIT").await.map_err(map_err) +} + +/// `ROLLBACK` the in-progress transaction on a pinned connection. Callers +/// that want best-effort rollback should `let _ =` the result. +pub async fn tx_rollback(conn: &mut crate::pool::mysql::MysqlConn) -> Result<(), DbError> { + conn.query_drop("ROLLBACK").await.map_err(map_err) +} + +/// Run an INSERT/UPDATE/DELETE on a pinned connection inside an active +/// transaction. Mirrors `execute()` but without pool acquire. MySQL has +/// no `RETURNING`, so the `returning` array is logged-and-ignored as +/// usual. Used by `transactionExecute`. +pub async fn tx_execute( + conn: &mut crate::pool::mysql::MysqlConn, + sql: &str, + params: &[JsonParam], + returning: &[String], +) -> Result { + if !returning.is_empty() { + tracing::warn!( + driver = "mysql", + "RETURNING not supported on MySQL; ignoring `returning` array" + ); + } + let bound = bind_params(params); + conn.exec_drop(sql, bound).await.map_err(map_err)?; + let affected = conn.affected_rows(); + let last_insert_id = if is_insert(sql) { + conn.last_insert_id().map(|i| i.to_string()) + } else { + None + }; + Ok(ExecuteResult { + affected_rows: affected, + last_insert_id, + returned_rows: vec![], + returned_columns: vec![], + }) +} + pub async fn run_prepared( conn: &mut crate::pool::mysql::MysqlConn, sql: &str, @@ -281,7 +343,7 @@ mod tests { async fn pool() -> Option { let tls = crate::config::TlsConfig { mode: crate::config::TlsMode::Disable, - ca_cert: None, + ..Default::default() }; Some(MysqlPool::new(&url()?, &PoolConfig::default(), &tls).unwrap()) } diff --git a/iii-database/src/driver/postgres.rs b/iii-database/src/driver/postgres.rs index 92f27919..07d46045 100644 --- a/iii-database/src/driver/postgres.rs +++ b/iii-database/src/driver/postgres.rs @@ -421,6 +421,107 @@ fn step_err(idx: usize, e: tokio_postgres::Error) -> DbError { } } +/// Issue `BEGIN [ISOLATION LEVEL ...]` on a pinned client. Used by +/// `beginTransaction` to open an interactive transaction on a connection +/// pulled out of the pool. +pub async fn tx_begin( + client: &mut crate::pool::postgres::PgClient, + isolation: Option, +) -> Result<(), DbError> { + let begin_sql = match isolation { + Some(Isolation::ReadCommitted) => "BEGIN ISOLATION LEVEL READ COMMITTED", + Some(Isolation::RepeatableRead) => "BEGIN ISOLATION LEVEL REPEATABLE READ", + Some(Isolation::Serializable) => "BEGIN ISOLATION LEVEL SERIALIZABLE", + None => "BEGIN", + }; + client.batch_execute(begin_sql).await.map_err(map_err) +} + +/// `COMMIT` the in-progress transaction on a pinned client. +pub async fn tx_commit(client: &mut crate::pool::postgres::PgClient) -> Result<(), DbError> { + client.batch_execute("COMMIT").await.map_err(map_err) +} + +/// `ROLLBACK` the in-progress transaction on a pinned client. Callers that +/// want best-effort rollback (timeout watcher, post-commit-failure cleanup) +/// should `let _ =` the result. +pub async fn tx_rollback(client: &mut crate::pool::postgres::PgClient) -> Result<(), DbError> { + client.batch_execute("ROLLBACK").await.map_err(map_err) +} + +/// Run an INSERT/UPDATE/DELETE (optionally with `RETURNING`) on a pinned +/// client that is currently inside `BEGIN ... COMMIT`. Mirrors `execute()` +/// — but without pool acquire. Used by `transactionExecute`. +pub async fn tx_execute( + client: &mut crate::pool::postgres::PgClient, + sql: &str, + params: &[JsonParam], + _returning: &[String], +) -> Result { + let bound = bind_params(params); + let bound_refs: Vec<&(dyn ToSql + Sync)> = + bound.iter().map(|p| p as &(dyn ToSql + Sync)).collect(); + + let upper = sql.to_ascii_uppercase(); + if upper.contains(" RETURNING ") { + let rows = client + .query(sql, bound_refs.as_slice()) + .await + .map_err(map_err)?; + let columns: Vec = rows + .first() + .map(|r| { + r.columns() + .iter() + .map(|c| ColumnMeta { + name: c.name().to_string(), + ty: c.type_().name().to_string(), + }) + .collect() + }) + .unwrap_or_default(); + + let mut returned: Vec = Vec::with_capacity(rows.len()); + let mut last_insert_id: Option = None; + + for (ri, row) in rows.iter().enumerate() { + let mut cells = Vec::with_capacity(row.columns().len()); + for (i, col) in row.columns().iter().enumerate() { + cells.push(pg_cell_to_row_value(row, i, col.type_())?); + } + if ri == 0 { + if let Some(first) = cells.first() { + last_insert_id = match first { + RowValue::Int(i) => Some(i.to_string()), + RowValue::BigInt(i) => Some(i.to_string()), + RowValue::Text(s) => Some(s.clone()), + _ => None, + }; + } + } + returned.push(Row(cells)); + } + + Ok(ExecuteResult { + affected_rows: returned.len() as u64, + last_insert_id, + returned_rows: returned, + returned_columns: columns, + }) + } else { + let n = client + .execute(sql, bound_refs.as_slice()) + .await + .map_err(map_err)?; + Ok(ExecuteResult { + affected_rows: n, + last_insert_id: None, + returned_rows: vec![], + returned_columns: vec![], + }) + } +} + pub async fn run_prepared( client: &mut crate::pool::postgres::PgClient, sql: &str, @@ -592,7 +693,7 @@ mod tests { let u = url()?; let tls = crate::config::TlsConfig { mode: crate::config::TlsMode::Disable, - ca_cert: None, + ..Default::default() }; Some( PostgresPool::new(&u, &PoolConfig::default(), &tls) diff --git a/iii-database/src/driver/sqlite.rs b/iii-database/src/driver/sqlite.rs index c8ee6b0e..9c32d875 100644 --- a/iii-database/src/driver/sqlite.rs +++ b/iii-database/src/driver/sqlite.rs @@ -386,6 +386,175 @@ fn run_tx_steps( Ok(results) } +/// Issue `BEGIN` on a pinned connection (held in the registry's +/// `PinnedConn::Sqlite(Option<...>)` slot). SQLite-specific isolation +/// downgrade applies: `Serializable` → `BEGIN IMMEDIATE`, others fall back +/// to `BEGIN DEFERRED` with a `tracing::warn!`. Used by `beginTransaction`. +pub async fn tx_begin( + conn_slot: &mut Option, + isolation: Option, +) -> Result<(), DbError> { + let begin_sql = match isolation { + Some(Isolation::Serializable) => "BEGIN IMMEDIATE", + Some(Isolation::ReadCommitted) | Some(Isolation::RepeatableRead) => { + tracing::warn!( + "sqlite ignores requested isolation; using BEGIN DEFERRED (always serializable in practice)" + ); + "BEGIN DEFERRED" + } + None => "BEGIN DEFERRED", + }; + run_simple_on_pinned(conn_slot, begin_sql).await +} + +/// `COMMIT` the in-progress transaction on a pinned connection. +pub async fn tx_commit( + conn_slot: &mut Option, +) -> Result<(), DbError> { + run_simple_on_pinned(conn_slot, "COMMIT").await +} + +/// `ROLLBACK` the in-progress transaction on a pinned connection. Errors +/// from rollback (e.g. SQLite already aborted the txn implicitly) are +/// surfaced; callers that want best-effort rollback (e.g. timeout watcher, +/// post-commit-failure cleanup) should `let _ =` the result. +pub async fn tx_rollback( + conn_slot: &mut Option, +) -> Result<(), DbError> { + run_simple_on_pinned(conn_slot, "ROLLBACK").await +} + +/// Internal helper: run a parameterless control-plane SQL string (`BEGIN`, +/// `COMMIT`, `ROLLBACK`) on the pinned SQLite connection. Mirrors the +/// take/replace dance from `run_prepared` so rusqlite's blocking call can +/// be wrapped in `spawn_blocking`. +async fn run_simple_on_pinned( + conn_slot: &mut Option, + sql: &'static str, +) -> Result<(), DbError> { + let owned = conn_slot.take().ok_or_else(|| DbError::DriverError { + driver: "sqlite".into(), + code: None, + message: "pinned connection already taken (concurrent tx op?)".into(), + failed_index: None, + })?; + let (result, returned) = tokio::task::spawn_blocking( + move || -> (Result<(), DbError>, crate::pool::sqlite::SqliteConn) { + let mut owned = owned; + let result = owned.with_mut(|c| c.execute_batch(sql).map_err(map_err)); + (result, owned) + }, + ) + .await + .map_err(|e| DbError::DriverError { + driver: "sqlite".into(), + code: None, + message: format!("spawn_blocking join: {e}"), + failed_index: None, + })?; + *conn_slot = Some(returned); + result +} + +/// Run an INSERT/UPDATE/DELETE/DDL (optionally with `RETURNING`) against a +/// pinned connection that is currently inside a `BEGIN ... COMMIT` block. +/// Mirrors `execute()`'s semantics — multi-statement guard, `last_insert_id` +/// for INSERT, planner-driven row/no-row routing — but does NOT acquire from +/// the pool. Used by `transactionExecute`. +pub async fn tx_execute( + conn_slot: &mut Option, + sql: &str, + params: &[JsonParam], + returning: &[String], +) -> Result { + if looks_like_multi_statement(sql) { + return Err(DbError::DriverError { + driver: "sqlite".into(), + code: Some("MULTI_STATEMENT".into()), + message: "rusqlite tx_execute() supports only a single statement; \ + use multiple transactionExecute calls" + .into(), + failed_index: None, + }); + } + let owned = conn_slot.take().ok_or_else(|| DbError::DriverError { + driver: "sqlite".into(), + code: None, + message: "pinned connection already taken (concurrent tx op?)".into(), + failed_index: None, + })?; + let sql = sql.to_string(); + let params = params.to_vec(); + let returning = returning.to_vec(); + + let (result, returned) = tokio::task::spawn_blocking( + move || -> (Result, crate::pool::sqlite::SqliteConn) { + let mut owned = owned; + let result = owned.with_mut(|c| -> Result { + let bound: Vec = params.iter().map(json_param_to_sql).collect(); + let bound_refs: Vec<&dyn rusqlite::ToSql> = + bound.iter().map(|v| v as &dyn rusqlite::ToSql).collect(); + + let (affected_rows, returned_rows, returned_columns) = { + let mut stmt = c.prepare(&sql).map_err(map_err)?; + if statement_returns_rows(&stmt, &returning) { + let columns: Vec = stmt + .columns() + .into_iter() + .map(|col| ColumnMeta { + name: col.name().to_string(), + ty: col.decl_type().unwrap_or("").to_string(), + }) + .collect(); + let n = columns.len(); + let mut returned: Vec = Vec::new(); + let mut rows = stmt.query(bound_refs.as_slice()).map_err(map_err)?; + while let Some(row) = rows.next().map_err(map_err)? { + let mut vals = Vec::with_capacity(n); + for i in 0..n { + vals.push(row_value_at(row, i)?); + } + returned.push(Row(vals)); + } + (returned.len() as u64, returned, columns) + } else { + let affected = stmt.execute(bound_refs.as_slice()).map_err(map_err)?; + (affected as u64, vec![], vec![]) + } + }; + + let last_insert_id = if is_insert(&sql) { + let r = c.last_insert_rowid(); + if r != 0 { + Some(r.to_string()) + } else { + None + } + } else { + None + }; + Ok(ExecuteResult { + affected_rows, + last_insert_id, + returned_rows, + returned_columns, + }) + }); + (result, owned) + }, + ) + .await + .map_err(|e| DbError::DriverError { + driver: "sqlite".into(), + code: None, + message: format!("spawn_blocking join: {e}"), + failed_index: None, + })?; + + *conn_slot = Some(returned); + result +} + /// Run an arbitrary SELECT/RETURNING-bearing statement against a pinned /// connection held in an Option slot (the registry's `PinnedConn::Sqlite` /// variant). The slot is `.take()`-en to move the connection into diff --git a/iii-database/src/error.rs b/iii-database/src/error.rs index f6f5eea3..1e03fd43 100644 --- a/iii-database/src/error.rs +++ b/iii-database/src/error.rs @@ -21,6 +21,10 @@ pub enum DbError { #[error("statement handle {handle_id} not found or expired")] StatementNotFound { handle_id: String }, + #[serde(rename = "TRANSACTION_NOT_FOUND")] + #[error("transaction {transaction_id} not found, ended, or timed out")] + TransactionNotFound { transaction_id: String }, + #[serde(rename = "UNKNOWN_DB")] #[error("unknown db {db}")] UnknownDb { db: String }, @@ -88,6 +92,16 @@ mod tests { assert_eq!(v["code"], "UNKNOWN_DB"); } + #[test] + fn transaction_not_found_serializes_with_stable_code() { + let e = DbError::TransactionNotFound { + transaction_id: "tx-123".into(), + }; + let v: serde_json::Value = serde_json::to_value(&e).unwrap(); + assert_eq!(v["code"], "TRANSACTION_NOT_FOUND"); + assert_eq!(v["transaction_id"], "tx-123"); + } + #[test] fn driver_error_carries_driver_name_and_inner() { let e = DbError::DriverError { diff --git a/iii-database/src/handlers/begin_transaction.rs b/iii-database/src/handlers/begin_transaction.rs new file mode 100644 index 00000000..88ef9ef4 --- /dev/null +++ b/iii-database/src/handlers/begin_transaction.rs @@ -0,0 +1,195 @@ +//! `iii-database::beginTransaction` — open an interactive transaction and +//! return a handle. The handle pins one pool connection inside a server- +//! side `BEGIN ... COMMIT/ROLLBACK` until either the matching +//! `commitTransaction` / `rollbackTransaction` call lands, or the +//! configurable per-tx timeout fires (background watcher in +//! `crate::transaction::TxRegistry::spawn_timeout_watcher`). + +use super::AppState; +use crate::driver::{self, Isolation}; +use crate::error::DbError; +use crate::handle::PinnedConn; +use crate::handlers::query::err_to_str; +use crate::pool::Pool; +use crate::transaction::{driver_system, TxHandleResponse}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::time::Duration; + +#[derive(Deserialize, JsonSchema)] +pub struct BeginTxReq { + pub db: String, + #[serde(default)] + pub isolation: Option, + #[serde(default)] + pub timeout_ms: Option, +} + +#[derive(Debug, Serialize, JsonSchema)] +pub struct BeginTxResp { + pub transaction: TxHandleResponse, +} + +fn default_timeout_ms() -> u64 { + 30_000 +} + +/// Upper bound on `timeout_ms`. 5 minutes is the longest a single +/// transaction should reasonably hold a pool connection before the +/// timeout-driven rollback fires. Operators who need longer txns should +/// bump this constant rather than work around it with retries. +const MAX_TIMEOUT_MS: u64 = 300_000; + +fn parse_isolation(s: Option<&str>) -> Result, DbError> { + match s { + None => Ok(None), + Some("read_committed") => Ok(Some(Isolation::ReadCommitted)), + Some("repeatable_read") => Ok(Some(Isolation::RepeatableRead)), + Some("serializable") => Ok(Some(Isolation::Serializable)), + Some(other) => Err(DbError::InvalidParam { + index: 0, + reason: format!("unknown isolation `{other}`"), + }), + } +} + +pub async fn handle(state: &AppState, req: BeginTxReq) -> Result { + let pool = state.pool(&req.db).map_err(err_to_str)?; + let driver = pool.driver(); + let isolation = parse_isolation(req.isolation.as_deref()).map_err(err_to_str)?; + let timeout_ms = req + .timeout_ms + .unwrap_or_else(default_timeout_ms) + .min(MAX_TIMEOUT_MS); + let timeout = Duration::from_millis(timeout_ms); + + // Acquire a connection from the pool and issue BEGIN on it. If BEGIN + // fails the pinned conn drops here and returns to the pool — no leaked + // half-open transaction. We log the failure with the SDK logger so it + // shows up in the per-call OTel span. + let mut pinned = match pool { + Pool::Sqlite(p) => PinnedConn::Sqlite(Some(p.acquire().await.map_err(err_to_str)?)), + Pool::Postgres(p) => PinnedConn::Postgres(p.acquire().await.map_err(err_to_str)?), + Pool::Mysql(p) => PinnedConn::Mysql(p.acquire().await.map_err(err_to_str)?), + }; + + let begin_result = match &mut pinned { + PinnedConn::Sqlite(slot) => driver::sqlite::tx_begin(slot, isolation).await, + PinnedConn::Postgres(client) => driver::postgres::tx_begin(client, isolation).await, + PinnedConn::Mysql(conn) => driver::mysql::tx_begin(conn, isolation).await, + }; + if let Err(e) = begin_result { + state.log.warn( + "db_tx_begin_failed", + Some(json!({ + "db.system": driver_system(driver), + "db.name": req.db, + "isolation": req.isolation, + "error": serde_json::to_value(&e).ok(), + })), + ); + return Err(err_to_str(e)); + } + + let handle = state + .transactions + .insert(req.db.clone(), driver, pinned, timeout) + .await; + + state.log.info( + "db_tx_started", + Some(json!({ + "db.system": driver_system(driver), + "db.name": req.db, + "db.operation": "BEGIN", + "db.transaction.id": handle.id, + "isolation": req.isolation, + "timeout_ms": timeout_ms, + })), + ); + + Ok(BeginTxResp { + transaction: handle, + }) +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use crate::config::PoolConfig; + use crate::handle::HandleRegistry; + use crate::handlers::AppState; + use crate::pool::{Pool, SqlitePool}; + use crate::transaction::TxRegistry; + use iii_sdk::Logger; + use serde_json::{json, Value}; + use std::collections::HashMap; + use std::sync::Arc; + + pub fn state() -> AppState { + let pool = SqlitePool::new("sqlite::memory:", &PoolConfig::default()).unwrap(); + let mut pools = HashMap::new(); + pools.insert("primary".to_string(), Pool::Sqlite(pool)); + AppState { + pools: Arc::new(pools), + handles: Arc::new(HandleRegistry::new()), + transactions: TxRegistry::new(), + log: Logger::new(), + } + } + + fn req(v: Value) -> BeginTxReq { + serde_json::from_value(v).unwrap() + } + + #[tokio::test(flavor = "multi_thread")] + async fn begin_returns_handle_and_records_in_registry() { + let st = state(); + let resp = handle(&st, req(json!({ "db": "primary" }))).await.unwrap(); + assert_eq!(resp.transaction.id.len(), 36); + assert_eq!(st.transactions.len().await, 1); + } + + #[tokio::test(flavor = "multi_thread")] + async fn begin_unknown_db_returns_unknown_db_error() { + let st = state(); + let err = handle(&st, req(json!({ "db": "missing" }))) + .await + .unwrap_err(); + assert!(err.contains("UNKNOWN_DB"), "got: {err}"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn begin_unknown_isolation_returns_invalid_param() { + let st = state(); + let err = handle( + &st, + req(json!({ "db": "primary", "isolation": "bogus_level" })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + assert!(err.contains("bogus_level"), "got: {err}"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn begin_clamps_timeout_to_max() { + // Per-tx timeout caps at MAX_TIMEOUT_MS (5 min) regardless of caller input; + // operators tweak the constant if they need longer. + let st = state(); + let resp = handle( + &st, + req(json!({ "db": "primary", "timeout_ms": 999_999_999_u64 })), + ) + .await + .unwrap(); + let now = chrono::Utc::now(); + // expires_at should be within ~5 min from now, NOT ~11 days. + let diff = (resp.transaction.expires_at - now).num_milliseconds(); + assert!( + (0..=MAX_TIMEOUT_MS as i64 + 5_000).contains(&diff), + "expires_at not clamped to MAX_TIMEOUT_MS; diff={diff}ms" + ); + } +} diff --git a/iii-database/src/handlers/commit_transaction.rs b/iii-database/src/handlers/commit_transaction.rs new file mode 100644 index 00000000..faa09ef5 --- /dev/null +++ b/iii-database/src/handlers/commit_transaction.rs @@ -0,0 +1,206 @@ +//! `iii-database::commitTransaction` — finalize an interactive transaction +//! by issuing `COMMIT`. Removes the entry from the registry before locking +//! the connection so concurrent `transactionQuery` / `transactionExecute` +//! calls against the same id fast-fail with `TRANSACTION_NOT_FOUND` once +//! finalization is in progress. + +use super::AppState; +use crate::driver; +use crate::handle::PinnedConn; +use crate::handlers::query::err_to_str; +use crate::transaction::driver_system; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use serde_json::json; + +#[derive(Deserialize, JsonSchema)] +pub struct CommitTxReq { + pub transaction_id: String, +} + +#[derive(Debug, Serialize, JsonSchema)] +pub struct CommitTxResp { + pub committed: bool, +} + +pub async fn handle(state: &AppState, req: CommitTxReq) -> Result { + let taken = match state.transactions.take(&req.transaction_id).await { + Ok(t) => t, + Err(e) => { + state.log.warn( + "db_tx_unknown", + Some(json!({ + "db.transaction.id": req.transaction_id, + "db.operation": "COMMIT", + })), + ); + return Err(err_to_str(e)); + } + }; + + let started_at = taken.started_at; + let driver = taken.driver; + let db_name = taken.db_name.clone(); + // Lock the conn — queues behind any in-flight transactionQuery/Execute. + // The Arc's strong count goes to 1 here (registry has dropped its ref), + // so when the guard drops at the end of this function the PinnedConn + // drops and the underlying connection returns to its pool. + let mut guard = taken.conn_arc.lock_owned().await; + + let result = match &mut *guard { + PinnedConn::Sqlite(slot) => driver::sqlite::tx_commit(slot).await, + PinnedConn::Postgres(client) => driver::postgres::tx_commit(client).await, + PinnedConn::Mysql(conn) => driver::mysql::tx_commit(conn).await, + }; + + let duration_ms = (chrono::Utc::now() - started_at).num_milliseconds().max(0); + + match result { + Ok(()) => { + state.log.info( + "db_tx_committed", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.operation": "COMMIT", + "db.transaction.id": req.transaction_id, + "duration_ms": duration_ms, + })), + ); + Ok(CommitTxResp { committed: true }) + } + Err(e) => { + // COMMIT failed — issue best-effort ROLLBACK to leave the + // connection in a known state before the pool's recycler + // (Postgres deadpool uses Fast recycle, which does NOT issue + // ROLLBACK) hands it to the next caller. Without this, the next + // pool consumer can see "current transaction is aborted, + // commands ignored" until the conn is closed. + let rb = match &mut *guard { + PinnedConn::Sqlite(slot) => driver::sqlite::tx_rollback(slot).await, + PinnedConn::Postgres(client) => driver::postgres::tx_rollback(client).await, + PinnedConn::Mysql(conn) => driver::mysql::tx_rollback(conn).await, + }; + state.log.error( + "db_tx_commit_failed", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.operation": "COMMIT", + "db.transaction.id": req.transaction_id, + "duration_ms": duration_ms, + "rollback_after_commit_failure": rb.is_ok(), + "error": serde_json::to_value(&e).ok(), + })), + ); + Err(err_to_str(e)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::handlers::begin_transaction::tests::state; + use serde_json::{json, Value}; + + fn req(v: Value) -> CommitTxReq { + serde_json::from_value(v).unwrap() + } + + #[tokio::test(flavor = "multi_thread")] + async fn commit_returns_committed_true_and_removes_from_registry() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let id = begin.transaction.id; + let resp = handle(&st, req(json!({ "transaction_id": id.clone() }))) + .await + .unwrap(); + assert!(resp.committed); + assert_eq!(st.transactions.len().await, 0); + } + + #[tokio::test(flavor = "multi_thread")] + async fn commit_twice_returns_transaction_not_found() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let id = begin.transaction.id; + handle(&st, req(json!({ "transaction_id": id.clone() }))) + .await + .unwrap(); + let err = handle(&st, req(json!({ "transaction_id": id }))) + .await + .unwrap_err(); + assert!(err.contains("TRANSACTION_NOT_FOUND"), "got: {err}"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn writes_committed_via_interactive_tx_are_visible_after_commit() { + // Use an on-disk SQLite so the txn's pinned conn and the post-commit + // verification share a single database file. In-memory SQLite gives + // each connection its own separate database. + let tmp = tempfile::NamedTempFile::new().unwrap(); + let url = format!("sqlite:{}", tmp.path().display()); + let pool = + crate::pool::SqlitePool::new(&url, &crate::config::PoolConfig::default()).unwrap(); + let mut pools = std::collections::HashMap::new(); + pools.insert("primary".to_string(), crate::pool::Pool::Sqlite(pool)); + let st = crate::handlers::AppState { + pools: std::sync::Arc::new(pools), + handles: std::sync::Arc::new(crate::handle::HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), + }; + + crate::handlers::execute::handle( + &st, + serde_json::from_value(json!({ "db": "primary", "sql": "CREATE TABLE t (n INT)" })) + .unwrap(), + ) + .await + .unwrap(); + + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let id = begin.transaction.id; + + crate::handlers::transaction_execute::handle( + &st, + serde_json::from_value(json!({ + "transaction_id": id.clone(), + "sql": "INSERT INTO t (n) VALUES (?)", + "params": [42] + })) + .unwrap(), + ) + .await + .unwrap(); + + handle(&st, req(json!({ "transaction_id": id }))) + .await + .unwrap(); + + let q = crate::handlers::query::handle( + &st, + serde_json::from_value(json!({ "db": "primary", "sql": "SELECT n FROM t" })).unwrap(), + ) + .await + .unwrap(); + assert_eq!(q.row_count, 1); + assert_eq!(q.rows[0]["n"], 42); + } +} diff --git a/iii-database/src/handlers/execute.rs b/iii-database/src/handlers/execute.rs index f5633156..6c27f37c 100644 --- a/iii-database/src/handlers/execute.rs +++ b/iii-database/src/handlers/execute.rs @@ -76,6 +76,8 @@ mod tests { AppState { pools: Arc::new(pools), handles: Arc::new(HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), } } diff --git a/iii-database/src/handlers/mod.rs b/iii-database/src/handlers/mod.rs index 434ef469..53d9a5d6 100644 --- a/iii-database/src/handlers/mod.rs +++ b/iii-database/src/handlers/mod.rs @@ -5,14 +5,22 @@ use crate::error::DbError; use crate::handle::HandleRegistry; use crate::pool::Pool; +use crate::transaction::TxRegistry; +use iii_sdk::Logger; use std::collections::HashMap; use std::sync::Arc; +pub mod begin_transaction; +pub mod commit_transaction; pub mod execute; pub mod prepare; pub mod query; +pub mod rollback_transaction; pub mod run_statement; pub mod transaction; +pub mod transaction_execute; +pub mod transaction_query; +mod tx_sql_guard; pub(crate) use query::rows_to_objects as query_rows_to_objects; @@ -20,6 +28,8 @@ pub(crate) use query::rows_to_objects as query_rows_to_objects; pub struct AppState { pub pools: Arc>, pub handles: Arc, + pub transactions: TxRegistry, + pub log: Logger, } impl AppState { diff --git a/iii-database/src/handlers/prepare.rs b/iii-database/src/handlers/prepare.rs index 56c83ce3..cae29c6b 100644 --- a/iii-database/src/handlers/prepare.rs +++ b/iii-database/src/handlers/prepare.rs @@ -85,6 +85,8 @@ mod tests { AppState { pools: Arc::new(pools), handles: Arc::new(HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), } } diff --git a/iii-database/src/handlers/query.rs b/iii-database/src/handlers/query.rs index 92a473f6..24d1fad9 100644 --- a/iii-database/src/handlers/query.rs +++ b/iii-database/src/handlers/query.rs @@ -112,6 +112,8 @@ mod tests { AppState { pools: Arc::new(pools), handles: Arc::new(HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), } } diff --git a/iii-database/src/handlers/rollback_transaction.rs b/iii-database/src/handlers/rollback_transaction.rs new file mode 100644 index 00000000..5d2ab21c --- /dev/null +++ b/iii-database/src/handlers/rollback_transaction.rs @@ -0,0 +1,181 @@ +//! `iii-database::rollbackTransaction` — finalize an interactive transaction +//! by issuing `ROLLBACK`. Like `commitTransaction`, takes the entry out of +//! the registry first so concurrent statement calls fast-fail with +//! `TRANSACTION_NOT_FOUND`. + +use super::AppState; +use crate::driver; +use crate::handle::PinnedConn; +use crate::handlers::query::err_to_str; +use crate::transaction::driver_system; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use serde_json::json; + +#[derive(Deserialize, JsonSchema)] +pub struct RollbackTxReq { + pub transaction_id: String, +} + +#[derive(Debug, Serialize, JsonSchema)] +pub struct RollbackTxResp { + pub rolled_back: bool, +} + +pub async fn handle(state: &AppState, req: RollbackTxReq) -> Result { + let taken = match state.transactions.take(&req.transaction_id).await { + Ok(t) => t, + Err(e) => { + state.log.warn( + "db_tx_unknown", + Some(json!({ + "db.transaction.id": req.transaction_id, + "db.operation": "ROLLBACK", + })), + ); + return Err(err_to_str(e)); + } + }; + + let started_at = taken.started_at; + let driver = taken.driver; + let db_name = taken.db_name.clone(); + let mut guard = taken.conn_arc.lock_owned().await; + + let result = match &mut *guard { + PinnedConn::Sqlite(slot) => driver::sqlite::tx_rollback(slot).await, + PinnedConn::Postgres(client) => driver::postgres::tx_rollback(client).await, + PinnedConn::Mysql(conn) => driver::mysql::tx_rollback(conn).await, + }; + + let duration_ms = (chrono::Utc::now() - started_at).num_milliseconds().max(0); + + match result { + Ok(()) => { + state.log.info( + "db_tx_rolled_back", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.operation": "ROLLBACK", + "db.transaction.id": req.transaction_id, + "duration_ms": duration_ms, + "reason": "explicit", + })), + ); + Ok(RollbackTxResp { rolled_back: true }) + } + Err(e) => { + state.log.error( + "db_tx_rollback_failed", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.operation": "ROLLBACK", + "db.transaction.id": req.transaction_id, + "duration_ms": duration_ms, + "error": serde_json::to_value(&e).ok(), + })), + ); + Err(err_to_str(e)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::handlers::begin_transaction::tests::state; + use serde_json::{json, Value}; + + fn req(v: Value) -> RollbackTxReq { + serde_json::from_value(v).unwrap() + } + + #[tokio::test(flavor = "multi_thread")] + async fn rollback_returns_rolled_back_true_and_removes_from_registry() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let resp = handle(&st, req(json!({ "transaction_id": begin.transaction.id }))) + .await + .unwrap(); + assert!(resp.rolled_back); + assert_eq!(st.transactions.len().await, 0); + } + + #[tokio::test(flavor = "multi_thread")] + async fn rollback_unknown_id_returns_transaction_not_found() { + let st = state(); + let err = handle( + &st, + req(json!({ "transaction_id": "00000000-0000-0000-0000-000000000000" })), + ) + .await + .unwrap_err(); + assert!(err.contains("TRANSACTION_NOT_FOUND"), "got: {err}"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn writes_inside_rolled_back_tx_are_not_visible() { + let tmp = tempfile::NamedTempFile::new().unwrap(); + let url = format!("sqlite:{}", tmp.path().display()); + let pool = + crate::pool::SqlitePool::new(&url, &crate::config::PoolConfig::default()).unwrap(); + let mut pools = std::collections::HashMap::new(); + pools.insert("primary".to_string(), crate::pool::Pool::Sqlite(pool)); + let st = crate::handlers::AppState { + pools: std::sync::Arc::new(pools), + handles: std::sync::Arc::new(crate::handle::HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), + }; + + crate::handlers::execute::handle( + &st, + serde_json::from_value(json!({ "db": "primary", "sql": "CREATE TABLE t (n INT)" })) + .unwrap(), + ) + .await + .unwrap(); + + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let id = begin.transaction.id; + + crate::handlers::transaction_execute::handle( + &st, + serde_json::from_value(json!({ + "transaction_id": id.clone(), + "sql": "INSERT INTO t (n) VALUES (?)", + "params": [99] + })) + .unwrap(), + ) + .await + .unwrap(); + + handle(&st, req(json!({ "transaction_id": id }))) + .await + .unwrap(); + + let q = crate::handlers::query::handle( + &st, + serde_json::from_value( + json!({ "db": "primary", "sql": "SELECT COUNT(*) AS c FROM t" }), + ) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(q.rows[0]["c"], 0); + } +} diff --git a/iii-database/src/handlers/run_statement.rs b/iii-database/src/handlers/run_statement.rs index 69066ac8..d79f2508 100644 --- a/iii-database/src/handlers/run_statement.rs +++ b/iii-database/src/handlers/run_statement.rs @@ -59,6 +59,8 @@ mod tests { AppState { pools: Arc::new(pools), handles: Arc::new(HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), } } @@ -73,6 +75,8 @@ mod tests { let st = AppState { pools: Arc::new(pools), handles: Arc::new(HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), }; (st, tmp) } diff --git a/iii-database/src/handlers/transaction.rs b/iii-database/src/handlers/transaction.rs index 36056ae6..ece56ea0 100644 --- a/iii-database/src/handlers/transaction.rs +++ b/iii-database/src/handlers/transaction.rs @@ -135,6 +135,8 @@ mod tests { AppState { pools: Arc::new(pools), handles: Arc::new(HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), } } diff --git a/iii-database/src/handlers/transaction_execute.rs b/iii-database/src/handlers/transaction_execute.rs new file mode 100644 index 00000000..dbd351e3 --- /dev/null +++ b/iii-database/src/handlers/transaction_execute.rs @@ -0,0 +1,318 @@ +//! `iii-database::transactionExecute` — write SQL inside an interactive +//! transaction. Same response envelope as `iii-database::execute`. Bare +//! `BEGIN` / `COMMIT` / `ROLLBACK` / `END` / `SAVEPOINT` / `RELEASE` / +//! `SET TRANSACTION` / `START TRANSACTION` statements are rejected with +//! `INVALID_PARAM` so callers cannot side-channel finalization through SQL — +//! they must use the dedicated `commitTransaction` / `rollbackTransaction` +//! handlers. The detection runs *after* stripping leading SQL comments and +//! `;`, so `/* */COMMIT` and `--\nCOMMIT` are rejected too. See +//! [`super::tx_sql_guard`] for the full coverage and rationale. + +use super::tx_sql_guard::is_transaction_control_sql; +use super::AppState; +use crate::driver; +use crate::error::DbError; +use crate::handle::PinnedConn; +use crate::handlers::query::err_to_str; +use crate::transaction::driver_system; +use crate::value::JsonParam; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; + +#[derive(Deserialize, JsonSchema)] +pub struct TxExecuteReq { + pub transaction_id: String, + pub sql: String, + #[serde(default)] + pub params: Vec, + #[serde(default)] + pub returning: Vec, +} + +#[derive(Debug, Serialize, JsonSchema)] +pub struct TxExecuteResp { + pub affected_rows: u64, + pub last_insert_id: Option, + pub returned_rows: Vec>, +} + +pub async fn handle(state: &AppState, req: TxExecuteReq) -> Result { + if req.sql.trim().is_empty() { + return Err(err_to_str(DbError::DriverError { + driver: "(tx)".into(), + code: None, + message: "empty SQL".into(), + failed_index: None, + })); + } + if is_transaction_control_sql(&req.sql) { + // Reject before lookup so we don't consume the per-tx mutex on a + // request that's just going to bounce. The caller gets a clear + // pointer at the right handler. + state.log.warn( + "db_tx_finalization_via_sql_rejected", + Some(json!({ + "db.transaction.id": req.transaction_id, + "db.statement": req.sql.trim(), + })), + ); + return Err(err_to_str(DbError::InvalidParam { + index: 0, + reason: "transaction-control SQL (BEGIN/START TRANSACTION/COMMIT/ROLLBACK/\ + SAVEPOINT/RELEASE/END/SET TRANSACTION, including comment-prefixed forms) \ + is not allowed in transactionExecute; use commitTransaction or \ + rollbackTransaction" + .into(), + })); + } + let params = JsonParam::from_json_slice(&req.params).map_err(err_to_str)?; + + let lock = match state.transactions.lock(&req.transaction_id).await { + Ok(l) => l, + Err(e) => { + state.log.warn( + "db_tx_unknown", + Some(json!({ + "db.transaction.id": req.transaction_id, + "db.operation": "EXECUTE", + })), + ); + return Err(err_to_str(e)); + } + }; + + let crate::transaction::TxLock { + db_name, + driver, + guard: mut g, + .. + } = lock; + + let result = match &mut *g { + PinnedConn::Sqlite(slot) => { + driver::sqlite::tx_execute(slot, &req.sql, ¶ms, &req.returning).await + } + PinnedConn::Postgres(client) => { + driver::postgres::tx_execute(client, &req.sql, ¶ms, &req.returning).await + } + PinnedConn::Mysql(conn) => { + driver::mysql::tx_execute(conn, &req.sql, ¶ms, &req.returning).await + } + }; + + match result { + Ok(er) => { + state.log.debug( + "db_tx_statement", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.transaction.id": req.transaction_id, + "db.operation": "EXECUTE", + "db.statement": req.sql.trim(), + "affected_rows": er.affected_rows, + })), + ); + let returned_rows = + crate::handlers::query_rows_to_objects(&er.returned_columns, er.returned_rows); + Ok(TxExecuteResp { + affected_rows: er.affected_rows, + last_insert_id: er.last_insert_id, + returned_rows, + }) + } + Err(e) => { + state.log.warn( + "db_tx_statement_failed", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.transaction.id": req.transaction_id, + "db.operation": "EXECUTE", + "db.statement": req.sql.trim(), + "error": serde_json::to_value(&e).ok(), + })), + ); + Err(err_to_str(e)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::handlers::begin_transaction::tests::state; + use serde_json::{json, Value}; + + fn req(v: Value) -> TxExecuteReq { + serde_json::from_value(v).unwrap() + } + + // Keyword-set + comment-strip unit coverage lives in + // `super::tx_sql_guard`. The tests below assert the handler's + // *behaviour* — that the rejection actually fires before the registry + // is touched, the log event lands, and the error message points at the + // right finalization handler. We exercise enough bypass shapes here to + // catch any future regression that re-introduces a side-channel. + + #[tokio::test(flavor = "multi_thread")] + async fn rejects_commit_via_execute() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ "transaction_id": begin.transaction.id, "sql": "COMMIT" })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + assert!(err.contains("commitTransaction"), "got: {err}"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn rejects_rollback_via_execute() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ "transaction_id": begin.transaction.id, "sql": "ROLLBACK" })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + assert!(err.contains("rollbackTransaction"), "got: {err}"); + } + + /// Regression: `/* ... */COMMIT` previously fell through the first-token + /// check (the first whitespace-delimited token was `/*`). After the + /// strip-leading-noise fix it must reject like a bare `COMMIT`. + #[tokio::test(flavor = "multi_thread")] + async fn rejects_block_comment_prefixed_commit() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ + "transaction_id": begin.transaction.id, + "sql": "/* sneak */COMMIT", + })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + assert!(err.contains("commitTransaction"), "got: {err}"); + } + + /// Regression: `--text\nCOMMIT` previously bypassed the filter the same + /// way as the block-comment form. + #[tokio::test(flavor = "multi_thread")] + async fn rejects_line_comment_prefixed_commit() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ + "transaction_id": begin.transaction.id, + "sql": "-- sneak\nCOMMIT", + })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + } + + /// Regression: `START TRANSACTION` (MySQL/ANSI spelling of `BEGIN`) was + /// not in the keyword set; on MySQL it implicitly committed the outer + /// txn and opened a new untracked one. Must now reject. + #[tokio::test(flavor = "multi_thread")] + async fn rejects_start_transaction_via_execute() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ + "transaction_id": begin.transaction.id, + "sql": "START TRANSACTION", + })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + } + + /// Success-path INSERT inside an interactive transaction surfaces + /// `affected_rows` and `last_insert_id` just like the standalone + /// `iii-database::execute`. Uses an on-disk sqlite so the txn's pinned + /// conn and the post-commit verification share one database file. + #[tokio::test(flavor = "multi_thread")] + async fn execute_insert_inside_tx_returns_affected_rows_and_last_insert_id() { + let tmp = tempfile::NamedTempFile::new().unwrap(); + let url = format!("sqlite:{}", tmp.path().display()); + let pool = + crate::pool::SqlitePool::new(&url, &crate::config::PoolConfig::default()).unwrap(); + let mut pools = std::collections::HashMap::new(); + pools.insert("primary".to_string(), crate::pool::Pool::Sqlite(pool)); + let st = crate::handlers::AppState { + pools: std::sync::Arc::new(pools), + handles: std::sync::Arc::new(crate::handle::HandleRegistry::new()), + transactions: crate::transaction::TxRegistry::new(), + log: iii_sdk::Logger::new(), + }; + + crate::handlers::execute::handle( + &st, + serde_json::from_value( + json!({ "db": "primary", "sql": "CREATE TABLE t (id INTEGER PRIMARY KEY, n INT)" }), + ) + .unwrap(), + ) + .await + .unwrap(); + + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + + let resp = handle( + &st, + req(json!({ + "transaction_id": begin.transaction.id, + "sql": "INSERT INTO t (n) VALUES (?)", + "params": [7] + })), + ) + .await + .unwrap(); + assert_eq!(resp.affected_rows, 1); + assert_eq!(resp.last_insert_id.as_deref(), Some("1")); + } +} diff --git a/iii-database/src/handlers/transaction_query.rs b/iii-database/src/handlers/transaction_query.rs new file mode 100644 index 00000000..fe5a6962 --- /dev/null +++ b/iii-database/src/handlers/transaction_query.rs @@ -0,0 +1,288 @@ +//! `iii-database::transactionQuery` — read SQL inside an interactive +//! transaction. Same response envelope as `iii-database::query`; the only +//! difference is the SQL runs on the pinned transaction connection rather +//! than a freshly-pooled one. +//! +//! Transaction-control SQL (`BEGIN`/`COMMIT`/`ROLLBACK`/`START TRANSACTION` +//! /etc., including comment-prefixed forms) is rejected with `INVALID_PARAM` +//! by the shared [`super::tx_sql_guard`] check. PG/MySQL/SQLite all happily +//! execute these through their prepared-statement paths — without the guard +//! a caller could side-channel finalize the transaction through the query +//! handler, leaving the [`crate::transaction::TxRegistry`] desynchronized +//! from the connection's actual txn state. + +use super::tx_sql_guard::is_transaction_control_sql; +use super::AppState; +use crate::driver; +use crate::error::DbError; +use crate::handle::PinnedConn; +use crate::handlers::query::QueryResp; +use crate::handlers::{query::err_to_str, query_rows_to_objects}; +use crate::transaction::driver_system; +use crate::value::JsonParam; +use schemars::JsonSchema; +use serde::Deserialize; +use serde_json::{json, Value}; + +#[derive(Deserialize, JsonSchema)] +pub struct TxQueryReq { + pub transaction_id: String, + pub sql: String, + #[serde(default)] + pub params: Vec, +} + +pub async fn handle(state: &AppState, req: TxQueryReq) -> Result { + if req.sql.trim().is_empty() { + return Err(err_to_str(DbError::DriverError { + driver: "(tx)".into(), + code: None, + message: "empty SQL".into(), + failed_index: None, + })); + } + if is_transaction_control_sql(&req.sql) { + // Mirror the transactionExecute rejection. The defense is symmetric: + // PG/MySQL/SQLite all execute COMMIT/ROLLBACK/etc. through the + // prepared-statement path that `run_prepared` uses, so without this + // guard the query handler is the easiest finalization side-channel. + state.log.warn( + "db_tx_finalization_via_sql_rejected", + Some(json!({ + "db.transaction.id": req.transaction_id, + "db.operation": "QUERY", + "db.statement": req.sql.trim(), + })), + ); + return Err(err_to_str(DbError::InvalidParam { + index: 0, + reason: "transaction-control SQL (BEGIN/START TRANSACTION/COMMIT/ROLLBACK/\ + SAVEPOINT/RELEASE/END/SET TRANSACTION, including comment-prefixed forms) \ + is not allowed in transactionQuery; use commitTransaction or \ + rollbackTransaction" + .into(), + })); + } + let params = JsonParam::from_json_slice(&req.params).map_err(err_to_str)?; + + let lock = match state.transactions.lock(&req.transaction_id).await { + Ok(l) => l, + Err(e) => { + state.log.warn( + "db_tx_unknown", + Some(json!({ + "db.transaction.id": req.transaction_id, + "db.operation": "QUERY", + })), + ); + return Err(err_to_str(e)); + } + }; + + let crate::transaction::TxLock { + db_name, + driver, + guard: mut g, + .. + } = lock; + + let result = match &mut *g { + PinnedConn::Sqlite(slot) => driver::sqlite::run_prepared(slot, &req.sql, ¶ms).await, + PinnedConn::Postgres(client) => { + driver::postgres::run_prepared(client, &req.sql, ¶ms).await + } + PinnedConn::Mysql(conn) => driver::mysql::run_prepared(conn, &req.sql, ¶ms).await, + }; + + match result { + Ok(qr) => { + state.log.debug( + "db_tx_statement", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.transaction.id": req.transaction_id, + "db.operation": "QUERY", + "db.statement": req.sql.trim(), + "row_count": qr.rows.len(), + })), + ); + let row_count = qr.rows.len(); + let rows = query_rows_to_objects(&qr.columns, qr.rows); + Ok(QueryResp { + rows, + row_count, + columns: qr.columns, + }) + } + Err(e) => { + state.log.warn( + "db_tx_statement_failed", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.transaction.id": req.transaction_id, + "db.operation": "QUERY", + "db.statement": req.sql.trim(), + "error": serde_json::to_value(&e).ok(), + })), + ); + Err(err_to_str(e)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::handlers::begin_transaction::tests::state; + use serde_json::{json, Value}; + + fn req(v: Value) -> TxQueryReq { + serde_json::from_value(v).unwrap() + } + + #[tokio::test(flavor = "multi_thread")] + async fn query_inside_active_tx_returns_rows() { + let st = state(); + // Open a transaction + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let tx_id = begin.transaction.id; + + let resp = handle( + &st, + req(json!({ + "transaction_id": tx_id, + "sql": "SELECT 1 AS n" + })), + ) + .await + .unwrap(); + assert_eq!(resp.row_count, 1); + assert_eq!(resp.rows[0]["n"], 1); + } + + #[tokio::test(flavor = "multi_thread")] + async fn unknown_transaction_id_returns_not_found() { + let st = state(); + let err = handle( + &st, + req(json!({ + "transaction_id": "00000000-0000-0000-0000-000000000000", + "sql": "SELECT 1" + })), + ) + .await + .unwrap_err(); + assert!(err.contains("TRANSACTION_NOT_FOUND"), "got: {err}"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn empty_sql_rejected_at_handler_boundary() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ "transaction_id": begin.transaction.id, "sql": " " })), + ) + .await + .unwrap_err(); + assert!(err.contains("empty SQL"), "got: {err}"); + } + + /// transactionQuery must reject bare COMMIT/ROLLBACK/etc. with the same + /// `INVALID_PARAM` rationale as transactionExecute. Without this, a + /// caller can finalize the txn through the query handler — the registry + /// keeps the entry around and subsequent calls run on a now-non-txn conn + /// until the timeout watcher sweeps. + #[tokio::test(flavor = "multi_thread")] + async fn rejects_commit_via_query() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ "transaction_id": begin.transaction.id, "sql": "COMMIT" })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + assert!(err.contains("commitTransaction"), "got: {err}"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn rejects_rollback_via_query() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ "transaction_id": begin.transaction.id, "sql": "ROLLBACK" })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + assert!(err.contains("rollbackTransaction"), "got: {err}"); + } + + /// Comment-prefixed bypass via the query path. + #[tokio::test(flavor = "multi_thread")] + async fn rejects_block_comment_prefixed_commit_via_query() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ + "transaction_id": begin.transaction.id, + "sql": "/* sneak */COMMIT", + })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn rejects_start_transaction_via_query() { + let st = state(); + let begin = crate::handlers::begin_transaction::handle( + &st, + serde_json::from_value(json!({ "db": "primary" })).unwrap(), + ) + .await + .unwrap(); + let err = handle( + &st, + req(json!({ + "transaction_id": begin.transaction.id, + "sql": "START TRANSACTION", + })), + ) + .await + .unwrap_err(); + assert!(err.contains("INVALID_PARAM"), "got: {err}"); + } +} diff --git a/iii-database/src/handlers/tx_sql_guard.rs b/iii-database/src/handlers/tx_sql_guard.rs new file mode 100644 index 00000000..4b2b4614 --- /dev/null +++ b/iii-database/src/handlers/tx_sql_guard.rs @@ -0,0 +1,258 @@ +//! First-line defense against side-channel transaction finalization. +//! +//! `transactionExecute` and `transactionQuery` accept caller-supplied SQL and +//! run it on a connection that the worker holds inside `BEGIN ... COMMIT`. +//! If a caller submits raw `COMMIT` / `ROLLBACK` / `BEGIN` / `SAVEPOINT` / +//! `SET TRANSACTION` / `START TRANSACTION`, the driver executes it server- +//! side — finalizing or restarting the transaction without the `TxRegistry` +//! ever finding out. The next handler call then operates on a pinned conn +//! whose txn state no longer matches the registry's view, silently breaking +//! the atomicity contract. +//! +//! [`is_transaction_control_sql`] returns `true` for any SQL whose first +//! non-comment token is a transaction-control keyword, letting handlers +//! reject with `INVALID_PARAM` before the registry lookup. +//! +//! Coverage: +//! - Plain keyword forms: `BEGIN`, `COMMIT`, `ROLLBACK`, `END`, +//! `SAVEPOINT`, `RELEASE`, two-token forms `SET TRANSACTION ...` and +//! `START TRANSACTION ...` (ANSI / MySQL spelling of `BEGIN`). +//! - Comment-prefixed forms — `/* */COMMIT`, `-- foo\nCOMMIT`, +//! `/* a */ /* b */COMMIT`, and any chain of leading whitespace, `;`, +//! line comments, and block comments (including nested block comments +//! for postgres parity). +//! +//! Why nested block comments: postgres parses `/* /* nest */ */` as a single +//! comment. mysql and sqlite don't, but if we over-strip on those drivers +//! the worst case is a false-positive rejection — far better than letting a +//! finalization SQL through on postgres. + +/// Returns `true` if the first non-comment, non-whitespace, non-`;` token +/// of `sql` is a transaction-control keyword. The two-token forms +/// `SET TRANSACTION ...` and `START TRANSACTION ...` are matched only when +/// the second token is exactly `TRANSACTION` (case-insensitive) — bare +/// `SET LOCAL ...` (legitimate per-tx session settings) and MySQL +/// replication's `START SLAVE` / `START REPLICA` are left alone. +pub(super) fn is_transaction_control_sql(sql: &str) -> bool { + let trimmed = strip_leading_noise(sql); + let mut tokens = trimmed.split_whitespace(); + let Some(first) = tokens.next() else { + return false; + }; + // `COMMIT;` / `end;` reach here including the trailing `;`. Strip it so + // terminated and unterminated forms share one branch. + let first = first.trim_end_matches(';'); + let upper = first.to_ascii_uppercase(); + match upper.as_str() { + "BEGIN" | "COMMIT" | "ROLLBACK" | "END" | "SAVEPOINT" | "RELEASE" => true, + "SET" | "START" => tokens + .next() + .map(|t| t.trim_end_matches(';').eq_ignore_ascii_case("TRANSACTION")) + .unwrap_or(false), + _ => false, + } +} + +/// Strip any prefix of leading whitespace, `;`, line comments (`-- ...\n`), +/// and block comments (`/* ... */`, with nesting supported) so the returned +/// slice begins at the first character of the first real token. Returns an +/// empty slice when the input is comment-only or contains an unterminated +/// comment — the caller treats both as "no command present". +fn strip_leading_noise(sql: &str) -> &str { + let mut s = sql; + loop { + let trimmed = s.trim_start_matches(|c: char| c.is_whitespace() || c == ';'); + if let Some(after) = trimmed.strip_prefix("--") { + // Line comment ends at the next newline; if no newline exists, + // the comment swallows the rest of the input. + match after.find('\n') { + Some(pos) => s = &after[pos + 1..], + None => return "", + } + } else if let Some(after) = trimmed.strip_prefix("/*") { + // Block comment with postgres-style nesting. If the comment is + // unterminated, consume the rest — the driver would error on it + // regardless, and we'd rather over-strip than risk a bypass. + match strip_block_comment_body(after) { + Some(rest) => s = rest, + None => return "", + } + } else { + return trimmed; + } + } +} + +/// Consume the body of a block comment whose opening `/*` was already +/// stripped, returning the remainder of the input after the matching `*/`. +/// Handles nested `/* /* */ */`. Returns `None` when unterminated. +/// +/// Byte-level scanning is sound here because `/` (0x2F) and `*` (0x2A) are +/// both ASCII and never appear as continuation bytes in valid UTF-8 +/// (continuation bytes are 0x80..0xBF). A returned slice always begins +/// immediately after the matched `*/` — i.e. on a char boundary — because +/// `/` is a 1-byte ASCII char. +fn strip_block_comment_body(s: &str) -> Option<&str> { + let bytes = s.as_bytes(); + let mut depth: usize = 1; + let mut i = 0; + while i + 1 < bytes.len() { + if bytes[i] == b'/' && bytes[i + 1] == b'*' { + depth += 1; + i += 2; + } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { + depth -= 1; + i += 2; + if depth == 0 { + return Some(&s[i..]); + } + } else { + i += 1; + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + // --- Existing keyword coverage (no regression) ------------------------- + + #[test] + fn flags_plain_finalization_keywords() { + assert!(is_transaction_control_sql("COMMIT")); + assert!(is_transaction_control_sql(" rollback ")); + assert!(is_transaction_control_sql("ROLLBACK TO SAVEPOINT foo")); + assert!(is_transaction_control_sql( + "BEGIN ISOLATION LEVEL SERIALIZABLE" + )); + assert!(is_transaction_control_sql("end;")); + assert!(is_transaction_control_sql("SAVEPOINT s1")); + assert!(is_transaction_control_sql("RELEASE SAVEPOINT s1")); + assert!(is_transaction_control_sql("SET TRANSACTION READ ONLY")); + assert!(is_transaction_control_sql( + "set transaction isolation level serializable" + )); + } + + #[test] + fn passes_normal_dml() { + assert!(!is_transaction_control_sql("INSERT INTO t (n) VALUES (1)")); + assert!(!is_transaction_control_sql("UPDATE t SET n = 2")); + assert!(!is_transaction_control_sql("DELETE FROM t")); + assert!(!is_transaction_control_sql("SELECT 1")); + // Bare `SET` for per-tx session settings — second token isn't TRANSACTION. + assert!(!is_transaction_control_sql("SET LOCAL search_path = foo")); + assert!(!is_transaction_control_sql("SET search_path TO foo")); + } + + // --- Finding #3: START TRANSACTION (MySQL/ANSI spelling) --------------- + + #[test] + fn flags_start_transaction() { + assert!(is_transaction_control_sql("START TRANSACTION")); + assert!(is_transaction_control_sql("start transaction")); + assert!(is_transaction_control_sql( + "START TRANSACTION ISOLATION LEVEL SERIALIZABLE" + )); + assert!(is_transaction_control_sql("start transaction;")); + assert!(is_transaction_control_sql("START\tTRANSACTION")); + } + + #[test] + fn passes_start_other_than_transaction() { + // MySQL replication ops use START with a different second token — + // not finalization, must pass through. + assert!(!is_transaction_control_sql("START SLAVE")); + assert!(!is_transaction_control_sql("START REPLICA SQL_THREAD")); + // Bare `START` with nothing after isn't a real statement, but it + // also isn't transaction-control. + assert!(!is_transaction_control_sql("START")); + } + + // --- Finding #1: comment-prefixed bypass -------------------------------- + + #[test] + fn flags_block_comment_prefixed_finalization() { + assert!(is_transaction_control_sql("/* sneak */COMMIT")); + assert!(is_transaction_control_sql("/* sneak */ COMMIT")); + assert!(is_transaction_control_sql("/**/ COMMIT")); + assert!(is_transaction_control_sql("/*\n multi\n line\n*/ ROLLBACK")); + // Chained block comments. + assert!(is_transaction_control_sql("/* a *//* b */ COMMIT")); + assert!(is_transaction_control_sql("/* a */ /* b */ /* c */ COMMIT")); + // Mixed with whitespace and `;`. + assert!(is_transaction_control_sql( + "; /* foo */ ;; /* bar */COMMIT" + )); + // Postgres-style nested block comments must strip completely. + assert!(is_transaction_control_sql( + "/* outer /* inner */ outer */ COMMIT" + )); + } + + #[test] + fn flags_line_comment_prefixed_finalization() { + assert!(is_transaction_control_sql("-- sneak\nCOMMIT")); + assert!(is_transaction_control_sql("-- foo\n -- bar\nROLLBACK")); + // Mixed line + block comments. + assert!(is_transaction_control_sql("-- foo\n/* bar */ COMMIT")); + assert!(is_transaction_control_sql("/* bar */-- foo\nCOMMIT")); + } + + #[test] + fn flags_comment_prefixed_start_transaction() { + // Combine finding #1 and finding #3: a comment-prefixed + // START TRANSACTION must also be rejected (otherwise MySQL would + // implicitly commit the outer txn). + assert!(is_transaction_control_sql("/* */ START TRANSACTION")); + assert!(is_transaction_control_sql("-- skip\nSTART TRANSACTION")); + } + + #[test] + fn passes_comment_only_or_unterminated() { + // A SQL string that's *only* comments has no command — driver will + // no-op or error; we don't need to reject. + assert!(!is_transaction_control_sql("/* just a comment */")); + assert!(!is_transaction_control_sql("-- end-of-line comment\n")); + assert!(!is_transaction_control_sql("-- no newline")); + // Unterminated block comment: strip consumes the rest, no token left. + assert!(!is_transaction_control_sql("/* unterminated COMMIT")); + assert!(!is_transaction_control_sql("")); + assert!(!is_transaction_control_sql(" ")); + assert!(!is_transaction_control_sql(";;;;")); + } + + #[test] + fn passes_comment_prefixed_dml() { + // Stripping must not turn benign comment-prefixed DML into a flag. + assert!(!is_transaction_control_sql("/* note */ SELECT 1")); + assert!(!is_transaction_control_sql( + "-- note\nINSERT INTO t VALUES (1)" + )); + assert!(!is_transaction_control_sql( + "/* a */ /* b */ UPDATE t SET n=1" + )); + } + + // --- UTF-8 safety ------------------------------------------------------- + + #[test] + fn handles_utf8_inside_comments() { + // Multi-byte characters inside the comment body must not break + // byte-level scanning. The string after `/* … */` is just `COMMIT`. + assert!(is_transaction_control_sql("/* 漢字 🔥 مرحبا */ COMMIT")); + assert!(is_transaction_control_sql("-- 漢字 emoji 🚀\nROLLBACK")); + } + + #[test] + fn handles_utf8_in_statement_body() { + // UTF-8 chars *after* the keyword must also survive (regression + // safety net for the byte-scan implementation). + assert!(!is_transaction_control_sql("SELECT '漢字 🔥' AS n")); + assert!(!is_transaction_control_sql( + "INSERT INTO t VALUES ('مرحبا')" + )); + } +} diff --git a/iii-database/src/lib.rs b/iii-database/src/lib.rs index 459c0f2e..a798c3f7 100644 --- a/iii-database/src/lib.rs +++ b/iii-database/src/lib.rs @@ -1,12 +1,12 @@ //! iii-database worker — public surface for the binary and tests. pub mod config; -pub(crate) mod cursor; pub(crate) mod driver; pub mod error; pub mod handle; pub mod handlers; pub mod pool; +pub mod transaction; pub mod triggers; pub mod value; diff --git a/iii-database/src/main.rs b/iii-database/src/main.rs index ab34a0b6..c0568340 100644 --- a/iii-database/src/main.rs +++ b/iii-database/src/main.rs @@ -3,16 +3,24 @@ use clap::Parser; use iii_database::config::WorkerConfig; use iii_database::handle::HandleRegistry; use iii_database::handlers::{ + begin_transaction::{self, BeginTxReq}, + commit_transaction::{self, CommitTxReq}, execute::{self, ExecuteReq}, prepare::{self, PrepareReq}, query::{self, QueryReq}, + rollback_transaction::{self, RollbackTxReq}, run_statement::{self, RunReq}, transaction::{self, TxReq}, + transaction_execute::{self, TxExecuteReq}, + transaction_query::{self, TxQueryReq}, AppState, }; use iii_database::pool; -use iii_database::triggers::handler::{QueryPollTrigger, RowChangeTrigger}; -use iii_sdk::{register_worker, InitOptions, OtelConfig, RegisterFunction, RegisterTriggerType}; +use iii_database::transaction::TxRegistry; +use iii_database::triggers::handler::RowChangeTrigger; +use iii_sdk::{ + register_worker, InitOptions, Logger, OtelConfig, RegisterFunction, RegisterTriggerType, +}; use std::collections::HashMap; use std::sync::Arc; @@ -63,12 +71,17 @@ async fn main() -> Result<()> { } let handles = Arc::new(HandleRegistry::new()); + let transactions = TxRegistry::new(); + let log = Logger::new(); let state = AppState { pools: Arc::new(pools), handles: handles.clone(), + transactions: transactions.clone(), + log: log.clone(), }; let _evictor = handles.spawn_evictor(); + let _tx_watcher = transactions.spawn_timeout_watcher(log.clone()); let iii = register_worker( &cli.url, @@ -131,12 +144,78 @@ async fn main() -> Result<()> { .description("Run a sequence of statements atomically."), ); } + { + let st = state.clone(); + iii.register_function( + RegisterFunction::new_async( + "iii-database::beginTransaction", + move |req: BeginTxReq| { + let st = st.clone(); + async move { begin_transaction::handle(&st, req).await } + }, + ) + .description( + "Open an interactive transaction; returns a handle to use with \ + transactionQuery/transactionExecute/commitTransaction/rollbackTransaction.", + ), + ); + } + { + let st = state.clone(); + iii.register_function( + RegisterFunction::new_async( + "iii-database::transactionQuery", + move |req: TxQueryReq| { + let st = st.clone(); + async move { transaction_query::handle(&st, req).await } + }, + ) + .description("Run a read-only SQL query inside an interactive transaction."), + ); + } + { + let st = state.clone(); + iii.register_function( + RegisterFunction::new_async( + "iii-database::transactionExecute", + move |req: TxExecuteReq| { + let st = st.clone(); + async move { transaction_execute::handle(&st, req).await } + }, + ) + .description( + "Run a write statement inside an interactive transaction. \ + BEGIN/COMMIT/ROLLBACK are rejected; use commit/rollbackTransaction.", + ), + ); + } + { + let st = state.clone(); + iii.register_function( + RegisterFunction::new_async( + "iii-database::commitTransaction", + move |req: CommitTxReq| { + let st = st.clone(); + async move { commit_transaction::handle(&st, req).await } + }, + ) + .description("Commit and finalize an interactive transaction."), + ); + } + { + let st = state.clone(); + iii.register_function( + RegisterFunction::new_async( + "iii-database::rollbackTransaction", + move |req: RollbackTxReq| { + let st = st.clone(); + async move { rollback_transaction::handle(&st, req).await } + }, + ) + .description("Rollback and finalize an interactive transaction."), + ); + } - let _query_poll = iii.register_trigger_type(RegisterTriggerType::new( - "iii-database::query-poll", - "Polls a SQL query at a fixed interval and dispatches new rows since the last cursor.", - QueryPollTrigger::new(state.clone(), iii.clone()), - )); let _row_change = iii.register_trigger_type(RegisterTriggerType::new( "iii-database::row-change", "Postgres logical replication. Stubbed in v1.0 pending tokio-postgres replication API.", @@ -144,7 +223,7 @@ async fn main() -> Result<()> { )); tracing::info!( - "iii-database worker registered 5 functions and 2 trigger types, waiting for invocations" + "iii-database worker registered 10 functions and 1 trigger type, waiting for invocations" ); wait_for_shutdown_signal().await?; tracing::info!("iii-database worker shutting down"); diff --git a/iii-database/src/pool/mysql.rs b/iii-database/src/pool/mysql.rs index bf7c73ac..590837f6 100644 --- a/iii-database/src/pool/mysql.rs +++ b/iii-database/src/pool/mysql.rs @@ -60,20 +60,23 @@ impl MysqlPool { match tokio::time::timeout(self.acquire_timeout, fut).await { Ok(Ok(conn)) => Ok(conn), Ok(Err(e)) => { + let class = classify_mysql_error(&e); // `mysql_async::Error::Display` can echo the server address, // username, and other connection details. Log the full error - // operator-side via tracing; surface a generic message in - // the RPC reply so untrusted callers don't see infra details. + // operator-side via tracing; surface a generic message plus + // a coarse class hint in the RPC reply so untrusted callers + // can self-triage without seeing infra details. tracing::warn!( driver = "mysql", db = %db_name, + class, error = ?e, "pool acquire failed" ); Err(DbError::DriverError { driver: "mysql".into(), code: None, - message: "pool connection failed; check server availability".into(), + message: format!("pool connection failed ({class})"), failed_index: None, }) } @@ -85,9 +88,38 @@ impl MysqlPool { } } +/// Classify a `mysql_async::Error` into a coarse hint word for the RPC +/// reply. Same closed set as the postgres classifier — `tls`, `auth`, +/// `network`, `server-policy`, or `unknown` — and the same redaction +/// contract (no host/userinfo/db ever appears in the returned word). +/// +/// `mysql_async::Error` is a public exhaustive enum, so this is a +/// straight `match` — no `Debug`-string sniffing required. +fn classify_mysql_error(err: &mysql_async::Error) -> &'static str { + use mysql_async::{DriverError, Error, IoError}; + match err { + // TLS handshake / cert chain failure (gated on the `rustls-tls` + // feature that we already enable in Cargo.toml). + Error::Io(IoError::Tls(_)) => "tls", + // TCP refuse, DNS, route, peer reset, etc. + Error::Io(IoError::Io(_)) => "network", + // Server-side rejections by error code. The codes here cover the + // standard auth surface (access denied, host blocked, bad SSL). + // Everything else from the server (max_connections, replication + // lag, etc.) lands under server-policy. + Error::Server(s) if matches!(s.code, 1045 | 1129 | 1130 | 1156 | 1251 | 1698) => "auth", + Error::Server(_) => "server-policy", + Error::Driver(DriverError::ConnectionClosed) => "network", + // `Url(_)` shouldn't happen here — pool construction already + // parsed the URL via `Opts::from_url`. Classify defensively. + _ => "unknown", + } +} + #[cfg(test)] mod tests { use super::*; + use crate::config::{TlsConfig, TlsMode}; fn url() -> Option { std::env::var("TEST_MYSQL_URL").ok() @@ -97,9 +129,9 @@ mod tests { async fn mysql_pool_acquires() { let Some(u) = url() else { return }; // Local docker mysql in tests is plaintext; explicitly disable TLS. - let tls = crate::config::TlsConfig { - mode: crate::config::TlsMode::Disable, - ca_cert: None, + let tls = TlsConfig { + mode: TlsMode::Disable, + ..Default::default() }; let pool = MysqlPool::new(&u, &PoolConfig::default(), &tls).unwrap(); let mut conn = pool.acquire().await.unwrap(); @@ -113,15 +145,16 @@ mod tests { // Hits a port that nothing listens on so mysql_async surfaces a // connect error — the non-Timeout path that previously echoed the // underlying error verbatim. Asserts the RPC body uses the generic - // message and does not leak userinfo/host fragments. + // message, carries the `(network)` class hint, and does not leak + // userinfo/host fragments. let cfg = PoolConfig { max: 1, idle_timeout_ms: 1_000, acquire_timeout_ms: 500, }; - let tls = crate::config::TlsConfig { - mode: crate::config::TlsMode::Disable, - ca_cert: None, + let tls = TlsConfig { + mode: TlsMode::Disable, + ..Default::default() }; let url = "mysql://leaky_user:leaky_pass@127.0.0.1:1/some_db"; let pool = MysqlPool::new(url, &cfg, &tls).unwrap(); @@ -131,6 +164,10 @@ mod tests { body.contains("pool connection failed"), "expected generic message; got: {body}" ); + assert!( + body.contains("(network)"), + "expected (network) class hint for TCP-refused port 1; got: {body}" + ); for forbidden in ["leaky_user", "leaky_pass", "some_db"] { assert!( !body.contains(forbidden), @@ -138,4 +175,27 @@ mod tests { ); } } + + /// Unit-check the variants of `classify_mysql_error` we can construct + /// without a live server. `mysql_async::Error` is public so each branch + /// is exercised directly; covers the contract that auth maps to "auth" + /// and other server errors map to "server-policy". + #[test] + fn classify_mysql_error_branches() { + use mysql_async::{Error, ServerError}; + // 1045 = ER_ACCESS_DENIED_ERROR — the canonical "wrong password". + let auth = Error::Server(ServerError { + code: 1045, + message: "Access denied".into(), + state: "28000".into(), + }); + assert_eq!(classify_mysql_error(&auth), "auth"); + // 1040 = ER_CON_COUNT_ERROR ("too many connections") — server-policy. + let policy = Error::Server(ServerError { + code: 1040, + message: "Too many connections".into(), + state: "08004".into(), + }); + assert_eq!(classify_mysql_error(&policy), "server-policy"); + } } diff --git a/iii-database/src/pool/postgres.rs b/iii-database/src/pool/postgres.rs index 3d5a5939..856eeb9d 100644 --- a/iii-database/src/pool/postgres.rs +++ b/iii-database/src/pool/postgres.rs @@ -72,12 +72,32 @@ impl PostgresPool { db: db_name.clone(), waited_ms, }, - other => { + deadpool_postgres::PoolError::Backend(be) => { + let class = classify_pg_error(&be); // `deadpool_postgres::PoolError::Display` chains through // `tokio_postgres::Error`, which can include the configured // host and other connection-string fragments. Logging is // operator-only (stderr); the RPC reply gets a generic - // message so cross-tenant callers don't see infra details. + // message plus a coarse class hint so cross-tenant callers + // can self-triage without seeing infra details. + tracing::warn!( + driver = "postgres", + db = %db_name, + class, + error = ?be, + "pool acquire failed" + ); + DbError::DriverError { + driver: "postgres".into(), + code: None, + message: format!("pool connection failed ({class})"), + failed_index: None, + } + } + other => { + // `PoolError::Closed`, `NoRuntimeSpecified`, `PostCreateHook`. + // None of these carry a tokio_postgres::Error to classify; + // the deadpool Display is safe (no userinfo) on all three. tracing::warn!( driver = "postgres", db = %db_name, @@ -87,7 +107,7 @@ impl PostgresPool { DbError::DriverError { driver: "postgres".into(), code: None, - message: "pool connection failed; check server availability".into(), + message: "pool connection failed (unknown)".into(), failed_index: None, } } @@ -95,10 +115,77 @@ impl PostgresPool { } } +/// Classify a `tokio_postgres::Error` into a coarse hint word for the RPC +/// reply. The hint never includes hostnames, userinfo, database names, or +/// any other connection-string fragments — it's a single bare word from +/// the closed set {`tls`, `auth`, `network`, `server-policy`, `unknown`}. +/// +/// `tokio_postgres::error::Kind` is private (no `pub fn kind()`); the +/// upstream public API exposes only `code()`, `as_db_error()`, `is_closed()`, +/// and `source()`. So the classifier uses (1) the SQLSTATE class when the +/// server replied, (2) a `source()`-chain walk for in-band rustls/io types, +/// and (3) a `Debug`-string sniff as a last resort. The upstream `Debug` +/// impl prints the `kind:` field via `debug_struct` and has been stable +/// for years; the alternative is a real upstream API gap. +fn classify_pg_error(err: &tokio_postgres::Error) -> &'static str { + if let Some(code) = err.code() { + match &code.code()[..2] { + // invalid_authorization_specification / invalid_password + "28" => return "auth", + // connection_exception family + "08" => return "network", + // insufficient_resources / operator_intervention + "53" | "57" => return "server-policy", + _ => {} + } + } + + // Walk std::error::Error::source(). tokio-rustls wraps rustls::Error + // inside io::Error via `io::Error::new(InvalidData, _)`, so the rustls + // type is typically reachable two hops down for cert failures. Bound + // the walk so a pathological self-referential chain can't hang us. + use std::error::Error as _; + let mut cur: Option<&(dyn std::error::Error + 'static)> = err.source(); + let mut saw_rustls = false; + let mut saw_io = false; + for _ in 0..16 { + let Some(s) = cur else { break }; + if s.downcast_ref::().is_some() { + saw_rustls = true; + } + if s.downcast_ref::().is_some() { + saw_io = true; + } + cur = s.source(); + } + if saw_rustls { + return "tls"; + } + + // Kind sniff (fallback for Kind::Tls / Authentication / Connect whose + // source() type isn't downcastable to a concrete crate type we know). + let dbg = format!("{err:?}"); + if dbg.contains("kind: Tls") { + return "tls"; + } + if dbg.contains("kind: Authentication") { + // Covers Neon's pooler-endpoint case where the SCRAM exchange + // raises "server did not use channel binding" with channel_binding=require. + return "auth"; + } + if dbg.contains("kind: Connect") || dbg.contains("kind: Closed") { + return "network"; + } + if saw_io { + return "network"; + } + "unknown" +} + #[cfg(test)] mod tests { use super::*; - use crate::config::PoolConfig; + use crate::config::{PoolConfig, TlsConfig, TlsMode}; fn url() -> Option { std::env::var("TEST_POSTGRES_URL").ok() @@ -112,9 +199,9 @@ mod tests { }; // Local docker postgres in tests is plaintext; explicitly disable // TLS so the test passes without a server-side cert. - let tls = crate::config::TlsConfig { - mode: crate::config::TlsMode::Disable, - ca_cert: None, + let tls = TlsConfig { + mode: TlsMode::Disable, + ..Default::default() }; let pool = PostgresPool::new(&u, &PoolConfig::default(), &tls) .await @@ -130,16 +217,17 @@ mod tests { // Hits a port that nothing listens on so deadpool returns // `PoolError::Backend(tokio_postgres::Error)` — the non-Timeout // path that previously echoed the underlying error verbatim. - // Asserts the RPC body uses the generic message and contains - // none of the userinfo/host fragments from the URL. + // Asserts the RPC body uses the generic message, carries the + // `(network)` class hint, and contains none of the userinfo/host + // fragments from the URL. let cfg = PoolConfig { max: 1, idle_timeout_ms: 1_000, acquire_timeout_ms: 500, }; - let tls = crate::config::TlsConfig { - mode: crate::config::TlsMode::Disable, - ca_cert: None, + let tls = TlsConfig { + mode: TlsMode::Disable, + ..Default::default() }; let url = "postgres://leaky_user:leaky_pass@127.0.0.1:1/some_db"; let pool = PostgresPool::new(url, &cfg, &tls).await.unwrap(); @@ -149,6 +237,10 @@ mod tests { body.contains("pool connection failed"), "expected generic message; got: {body}" ); + assert!( + body.contains("(network)"), + "expected (network) class hint for TCP-refused port 1; got: {body}" + ); for forbidden in ["leaky_user", "leaky_pass", "some_db"] { assert!( !body.contains(forbidden), @@ -156,4 +248,24 @@ mod tests { ); } } + + /// Smoke-check the SQLSTATE branch of `classify_pg_error`. Server-side + /// failures (auth, connection_exception, server-policy) come with a + /// 5-char SQLSTATE that we lean on first because it doesn't depend on + /// the brittle `Debug` sniff. + #[test] + fn classify_pg_error_uses_sqlstate_class_when_present() { + use tokio_postgres::error::SqlState; + // We can't construct a `tokio_postgres::Error` directly (private + // ctor), but we can verify the SQLSTATE-class table by inspection: + // every code in the auth/network/server-policy families used by + // upstream falls in the 2-char prefixes we match on. This guards + // against accidental table drift. + assert_eq!( + &SqlState::INVALID_AUTHORIZATION_SPECIFICATION.code()[..2], + "28" + ); + assert_eq!(&SqlState::INVALID_PASSWORD.code()[..2], "28"); + assert_eq!(&SqlState::CONNECTION_EXCEPTION.code()[..2], "08"); + } } diff --git a/iii-database/src/pool/tls.rs b/iii-database/src/pool/tls.rs index 0d496a06..11f3d067 100644 --- a/iii-database/src/pool/tls.rs +++ b/iii-database/src/pool/tls.rs @@ -79,7 +79,16 @@ pub fn make_mysql_ssl_opts(tls: &TlsConfig) -> Result Result Result { - let roots = build_root_store(tls.ca_cert.as_deref())?; + let roots = build_root_store(tls.ca_cert.as_deref(), tls.trust_native)?; let provider = Arc::new(default_provider()); match tls.mode { @@ -145,12 +154,46 @@ fn build_client_config(tls: &TlsConfig) -> Result { } } -/// Build a `RootCertStore` from either an operator-supplied PEM file or -/// the OS trust store. The `ca_cert` path **replaces** the native store; -/// it is not additive. This matches the typical operator intent: "trust -/// these certs, nothing else." -pub fn build_root_store(ca_cert: Option<&str>) -> Result { +/// Build a `RootCertStore` from the OS trust store and/or an operator- +/// supplied PEM file. +/// +/// Behavior: +/// - `trust_native = true` (default): load `rustls_native_certs` and, +/// if `ca_cert` is set, extend with its certs. This is the recommended +/// posture: managed providers whose certs already chain to a public +/// root keep working, and operators only need to point `ca_cert` at +/// a private CA when they have one (e.g. Supabase Intermediate 2021). +/// - `trust_native = false`: trust only the certs in `ca_cert`. Used +/// for strict-isolation deployments that explicitly do not want the +/// public web PKI accepted. With both `trust_native = false` *and* +/// `ca_cert = None`, the result is a config error (no trust anchors). +/// +/// Per-cert `RootCertStore::add` failures are non-fatal for the native +/// store (a bad cert in the OS store shouldn't block the whole worker if +/// the rest are usable) but are fatal for `ca_cert` (operator-supplied +/// certs are intentional and we want loud failures). +pub fn build_root_store( + ca_cert: Option<&str>, + trust_native: bool, +) -> Result { let mut store = RootCertStore::empty(); + + if trust_native { + let result = rustls_native_certs::load_native_certs(); + if result.certs.is_empty() && ca_cert.is_none() { + return Err(DbError::ConfigError { + message: format!( + "no native CA certificates loaded ({} errors); \ + set `tls.ca_cert` to provide them", + result.errors.len() + ), + }); + } + for cert in result.certs { + let _ = store.add(cert); + } + } + if let Some(path) = ca_cert { let pem = std::fs::read(path).map_err(|e| DbError::ConfigError { message: format!("ca_cert read `{path}`: {e}"), @@ -171,25 +214,16 @@ pub fn build_root_store(ca_cert: Option<&str>) -> Result message: format!("ca_cert `{path}`: no PEM CERTIFICATE blocks found"), }); } - return Ok(store); } - // Native trust store. `load_native_certs` returns errors as a Vec - // alongside the certs — non-fatal (one bad cert in the store - // shouldn't block startup if the rest are usable). - let result = rustls_native_certs::load_native_certs(); - if result.certs.is_empty() { + + if !trust_native && ca_cert.is_none() { return Err(DbError::ConfigError { - message: format!( - "no native CA certificates loaded ({} errors); set `tls.ca_cert` to provide them", - result.errors.len() - ), + message: "tls.trust_native is false and tls.ca_cert is unset; \ + no trust anchors available" + .into(), }); } - for cert in result.certs { - // Ignore individual `add` failures: bad cert in the OS store - // shouldn't fail the whole worker if other certs work. - let _ = store.add(cert); - } + Ok(store) } @@ -272,17 +306,61 @@ mod tests { use super::*; use std::io::Write; + /// ISRG Root X1 (Let's Encrypt). Real, publicly-known, stable trust + /// anchor that doesn't expire until 2035-06-04. We use it as a + /// fixture so the additive-vs-replace tests have a concrete cert to + /// load through `rustls_pemfile::certs` and `RootCertStore::add` + /// without needing a cert-gen dev-dep. + const FIXTURE_CA_PEM: &[u8] = b"\ +-----BEGIN CERTIFICATE-----\n\ +MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw\n\ +TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh\n\ +cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4\n\ +WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu\n\ +ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY\n\ +MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc\n\ +h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+\n\ +0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U\n\ +A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW\n\ +T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH\n\ +B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC\n\ +B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv\n\ +KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn\n\ +OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn\n\ +jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw\n\ +qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI\n\ +rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV\n\ +HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq\n\ +hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL\n\ +ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ\n\ +3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK\n\ +NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5\n\ +ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur\n\ +TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC\n\ +jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc\n\ +oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq\n\ +4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA\n\ +mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d\n\ +emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=\n\ +-----END CERTIFICATE-----\n"; + + fn write_fixture_pem() -> tempfile::NamedTempFile { + let mut f = tempfile::NamedTempFile::new().unwrap(); + f.write_all(FIXTURE_CA_PEM).unwrap(); + f + } + #[test] fn build_root_store_loads_native_certs() { // System trust store should have at least a few CAs on a normal // dev machine; if this fails the dev environment is unusual. - let store = build_root_store(None).expect("native certs"); + let store = build_root_store(None, true).expect("native certs"); assert!(!store.roots.is_empty(), "native trust store is empty"); } #[test] fn build_root_store_rejects_missing_file() { - let err = build_root_store(Some("/no/such/path/ca.pem")).unwrap_err(); + let err = build_root_store(Some("/no/such/path/ca.pem"), true).unwrap_err(); let body = serde_json::to_string(&err).unwrap(); assert!(body.contains("ca_cert read"), "got: {body}"); } @@ -293,16 +371,60 @@ mod tests { // Write a PEM-shaped block that's not a certificate. f.write_all(b"-----BEGIN PRIVATE KEY-----\nMIIBVQ==\n-----END PRIVATE KEY-----\n") .unwrap(); - let err = build_root_store(Some(f.path().to_str().unwrap())).unwrap_err(); + let err = build_root_store(Some(f.path().to_str().unwrap()), true).unwrap_err(); let body = serde_json::to_string(&err).unwrap(); assert!(body.contains("no PEM CERTIFICATE"), "got: {body}"); } + /// Additive default: with `trust_native = true` and a `ca_cert` set, + /// the store contains both the native count *and* the supplied cert. + /// This is the regression test for the Supabase use case — operators + /// can supply Supabase Intermediate 2021 without forfeiting the public + /// web PKI for other databases sharing the same TlsConfig surface. + #[test] + fn build_root_store_extends_native_when_trust_native_true() { + let native_only = build_root_store(None, true).expect("native certs"); + let native_count = native_only.roots.len(); + let f = write_fixture_pem(); + let extended = + build_root_store(Some(f.path().to_str().unwrap()), true).expect("native + fixture"); + assert_eq!( + extended.roots.len(), + native_count + 1, + "expected native_count + 1 fixture cert; native={native_count} extended={}", + extended.roots.len() + ); + } + + /// Strict-isolation: `trust_native = false` + ca_cert provided yields + /// exactly the supplied cert(s), nothing more. Preserves the old + /// "trust only my CA" posture from before the additive default. + #[test] + fn build_root_store_only_ca_cert_when_trust_native_false() { + let f = write_fixture_pem(); + let store = + build_root_store(Some(f.path().to_str().unwrap()), false).expect("ca_cert only"); + assert_eq!(store.roots.len(), 1, "expected exactly the fixture cert"); + } + + /// Both flags off is a misconfiguration: there are no trust anchors, + /// so any TLS handshake would fail. Catch it at config-build time + /// with a clear `CONFIG_ERROR`, not later as an opaque pool failure. + #[test] + fn build_root_store_errors_when_trust_native_false_and_no_ca_cert() { + let err = build_root_store(None, false).unwrap_err(); + let body = serde_json::to_string(&err).unwrap(); + assert!( + body.contains("no trust anchors"), + "expected explicit no-trust-anchors message; got: {body}" + ); + } + #[test] fn make_pg_connector_disable_returns_none() { let tls = TlsConfig { mode: TlsMode::Disable, - ca_cert: None, + ..Default::default() }; assert!(make_pg_connector(&tls).unwrap().is_none()); } @@ -311,7 +433,7 @@ mod tests { fn make_pg_connector_require_returns_some() { let tls = TlsConfig { mode: TlsMode::Require, - ca_cert: None, + ..Default::default() }; let conn = make_pg_connector(&tls).expect("require mode builds"); assert!(conn.is_some()); @@ -321,7 +443,7 @@ mod tests { fn make_pg_connector_verify_full_returns_some() { let tls = TlsConfig { mode: TlsMode::VerifyFull, - ca_cert: None, + ..Default::default() }; let conn = make_pg_connector(&tls).expect("verify-full mode builds"); assert!(conn.is_some()); @@ -331,7 +453,7 @@ mod tests { fn make_mysql_ssl_opts_disable_returns_none() { let tls = TlsConfig { mode: TlsMode::Disable, - ca_cert: None, + ..Default::default() }; assert!(make_mysql_ssl_opts(&tls).unwrap().is_none()); } @@ -340,7 +462,7 @@ mod tests { fn make_mysql_ssl_opts_require_skips_domain_validation() { let tls = TlsConfig { mode: TlsMode::Require, - ca_cert: None, + ..Default::default() }; let opts = make_mysql_ssl_opts(&tls).unwrap().unwrap(); // SslOpts doesn't expose getters for its danger-flags directly, diff --git a/iii-database/src/transaction.rs b/iii-database/src/transaction.rs new file mode 100644 index 00000000..5486f8d5 --- /dev/null +++ b/iii-database/src/transaction.rs @@ -0,0 +1,410 @@ +//! Interactive-transaction registry. +//! +//! Holds `PinnedConn`s that are currently inside a server-side +//! `BEGIN ... COMMIT/ROLLBACK` block. Each entry pins one pool connection +//! under a UUID handle; `transactionQuery` / `transactionExecute` / +//! `commitTransaction` / `rollbackTransaction` look up the handle and +//! run SQL on the held connection. +//! +//! Timeouts are enforced by a background sweep task (`spawn_timeout_watcher`) +//! that polls every second and best-effort-rollbacks any entry whose +//! deadline has passed. The sweep takes the entry from the map *first* +//! (so concurrent handler calls fast-fail with `TRANSACTION_NOT_FOUND`), +//! then waits for any in-flight statement to release the per-conn mutex +//! before issuing `ROLLBACK` — graceful rather than ripping the connection +//! out mid-statement. + +use crate::config::DriverKind; +use crate::driver; +use crate::error::DbError; +use crate::handle::PinnedConn; +use chrono::{DateTime, Duration as CDuration, Utc}; +use iii_sdk::Logger; +use schemars::JsonSchema; +use serde::Serialize; +use serde_json::json; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::{Mutex, OwnedMutexGuard, RwLock}; +use uuid::Uuid; + +/// JSON wire envelope returned by `beginTransaction`. +#[derive(Debug, Clone, Serialize, JsonSchema)] +pub struct TxHandleResponse { + pub id: String, + pub expires_at: DateTime, +} + +struct TxEntry { + db_name: String, + driver: DriverKind, + started_at: DateTime, + deadline: DateTime, + conn: Arc>, +} + +/// Owned-guard return shape for `TxRegistry::lock`. The caller holds the +/// per-conn mutex via `guard` for the duration of one statement; the other +/// fields are pre-extracted metadata so handlers don't have to re-read the +/// map after locking. +pub struct TxLock { + pub db_name: String, + pub driver: DriverKind, + pub started_at: DateTime, + pub guard: OwnedMutexGuard, +} + +/// Return shape for `TxRegistry::take`. The caller has exclusive ownership +/// of the conn Arc; locking it and running COMMIT/ROLLBACK on the variant +/// is the caller's job. +pub struct TakenTx { + pub db_name: String, + pub driver: DriverKind, + pub started_at: DateTime, + pub conn_arc: Arc>, +} + +/// Map of `transaction_id` (UUIDv4 string) → `TxEntry`. Cheap to clone +/// (single `Arc>`); store it in `AppState` directly. +#[derive(Clone, Default)] +pub struct TxRegistry { + inner: Arc>>, +} + +impl TxRegistry { + pub fn new() -> Self { + Self::default() + } + + /// Insert a freshly-begun transaction. The caller has already pulled a + /// connection from the pool and issued `BEGIN [ISOLATION LEVEL X]` on + /// it. We just record the pinned conn + metadata and return the handle. + pub async fn insert( + &self, + db_name: String, + driver: DriverKind, + conn: PinnedConn, + timeout: Duration, + ) -> TxHandleResponse { + let id = Uuid::new_v4().to_string(); + let now = Utc::now(); + // Clamp pathologically-large `timeout`s to `i64::MAX` ms so the + // chrono arithmetic doesn't panic. The handler clamps to 5 minutes + // long before this lands. + let deadline = + now + CDuration::from_std(timeout).unwrap_or_else(|_| CDuration::seconds(30)); + let entry = TxEntry { + db_name, + driver, + started_at: now, + deadline, + conn: Arc::new(Mutex::new(conn)), + }; + self.inner.write().await.insert(id.clone(), entry); + TxHandleResponse { + id, + expires_at: deadline, + } + } + + /// Acquire an exclusive lock on the pinned conn for query/execute. + /// Returns `TRANSACTION_NOT_FOUND` if the id is unknown, has been + /// finalized, or has been auto-rolled-back by the timeout watcher. + /// The returned `OwnedMutexGuard` queues behind any concurrent + /// statement on the same id; callers serialize naturally. + pub async fn lock(&self, id: &str) -> Result { + // Extract the Arc under a brief read lock, then await `lock_owned` + // outside the map's read lock. Holding the map read-lock across an + // `.await` would block all other handler lookups (begin/lock/take) + // for the duration of any in-flight statement on this id. + let g = self.inner.read().await; + let entry = g.get(id).ok_or_else(|| DbError::TransactionNotFound { + transaction_id: id.to_string(), + })?; + let db_name = entry.db_name.clone(); + let driver = entry.driver; + let started_at = entry.started_at; + let arc = Arc::clone(&entry.conn); + drop(g); + let guard = arc.lock_owned().await; + Ok(TxLock { + db_name, + driver, + started_at, + guard, + }) + } + + /// Atomically remove an entry from the registry and return the conn + /// Arc plus metadata. Subsequent `lock()` and `take()` calls against + /// the id return `TRANSACTION_NOT_FOUND`. Used by `commitTransaction` + /// and `rollbackTransaction` to finalize the txn. + pub async fn take(&self, id: &str) -> Result { + let entry = + self.inner + .write() + .await + .remove(id) + .ok_or_else(|| DbError::TransactionNotFound { + transaction_id: id.to_string(), + })?; + Ok(TakenTx { + db_name: entry.db_name, + driver: entry.driver, + started_at: entry.started_at, + conn_arc: entry.conn, + }) + } + + /// Spawn the timeout watcher task. Polls every 1 s, rolls back any + /// transaction whose deadline has passed, and removes the entry. The + /// returned `JoinHandle` is owned by `main.rs` for the lifetime of the + /// worker process; it loops forever until the runtime is dropped. + pub fn spawn_timeout_watcher(&self, log: Logger) -> tokio::task::JoinHandle<()> { + let me = self.clone(); + tokio::spawn(async move { + let mut interval = tokio::time::interval(Duration::from_secs(1)); + // Drop ticks that fire while a previous sweep is still running. + // A slow sweep (e.g. ROLLBACK of a long-running txn) would + // otherwise queue up multiple back-to-back sweeps. + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + loop { + interval.tick().await; + me.run_one_sweep(&log).await; + } + }) + } + + async fn run_one_sweep(&self, log: &Logger) { + let now = Utc::now(); + // Take all expired entries in a single write-lock to keep the + // critical section short; process them outside the lock. + let expired: Vec<(String, TxEntry)> = { + let mut g = self.inner.write().await; + let ids: Vec = g + .iter() + .filter(|(_, e)| e.deadline <= now) + .map(|(id, _)| id.clone()) + .collect(); + ids.into_iter() + .filter_map(|id| g.remove(&id).map(|e| (id, e))) + .collect() + }; + for (id, entry) in expired { + let started_at = entry.started_at; + let deadline = entry.deadline; + let driver = entry.driver; + let db_name = entry.db_name.clone(); + // Wait for any in-flight tx op to finish, then ROLLBACK. + // The Arc's strong count is now 1 (we hold it; the map dropped + // its ref), so dropping the guard at the end of the scope will + // also drop the PinnedConn → conn returns to its pool. + let mut guard = entry.conn.lock_owned().await; + let result = rollback_inline(&mut guard).await; + let now2 = Utc::now(); + let duration_ms = (now2 - started_at).num_milliseconds().max(0); + let overshoot_ms = (now2 - deadline).num_milliseconds().max(0); + match result { + Ok(()) => log.warn( + "db_tx_timed_out", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.transaction.id": id, + "duration_ms": duration_ms, + "deadline_overshoot_ms": overshoot_ms, + })), + ), + Err(e) => log.error( + "db_tx_timeout_rollback_failed", + Some(json!({ + "db.system": driver_system(driver), + "db.name": db_name, + "db.transaction.id": id, + "duration_ms": duration_ms, + "error": serde_json::to_value(&e).ok(), + })), + ), + } + drop(guard); + } + } + + /// Number of currently-tracked transactions. Test helper. + #[cfg(test)] + pub async fn len(&self) -> usize { + self.inner.read().await.len() + } + + /// Whether the registry currently tracks no transactions. Test helper. + #[cfg(test)] + pub async fn is_empty(&self) -> bool { + self.inner.read().await.is_empty() + } +} + +/// Map `DriverKind` to the OTel `db.system` semantic-convention value. +pub fn driver_system(d: DriverKind) -> &'static str { + match d { + DriverKind::Postgres => "postgres", + DriverKind::Mysql => "mysql", + DriverKind::Sqlite => "sqlite", + } +} + +/// Dispatch `ROLLBACK` onto whatever driver variant the locked conn is. +/// Used by the timeout sweep; explicit `rollbackTransaction` calls invoke +/// the driver helpers directly in the handler. +async fn rollback_inline(guard: &mut OwnedMutexGuard) -> Result<(), DbError> { + match &mut **guard { + PinnedConn::Sqlite(slot) => driver::sqlite::tx_rollback(slot).await, + PinnedConn::Postgres(client) => driver::postgres::tx_rollback(client).await, + PinnedConn::Mysql(conn) => driver::mysql::tx_rollback(conn).await, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::PoolConfig; + use crate::pool::SqlitePool; + + async fn sqlite_pinned() -> PinnedConn { + let pool = SqlitePool::new("sqlite::memory:", &PoolConfig::default()).unwrap(); + let conn = pool.acquire().await.unwrap(); + PinnedConn::Sqlite(Some(conn)) + } + + #[tokio::test(flavor = "multi_thread")] + async fn insert_then_lock_returns_metadata() { + let reg = TxRegistry::new(); + let conn = sqlite_pinned().await; + let h = reg + .insert( + "primary".into(), + DriverKind::Sqlite, + conn, + Duration::from_secs(30), + ) + .await; + assert_eq!(h.id.len(), 36); + let lock = reg.lock(&h.id).await.unwrap(); + assert_eq!(lock.db_name, "primary"); + assert!(matches!(lock.driver, DriverKind::Sqlite)); + } + + #[tokio::test(flavor = "multi_thread")] + async fn lock_after_take_returns_transaction_not_found() { + // Once `take` removes an entry, subsequent `lock` lookups must + // fast-fail with TRANSACTION_NOT_FOUND — this is what makes + // commitTransaction/rollbackTransaction one-shot finalization. + let reg = TxRegistry::new(); + let conn = sqlite_pinned().await; + let h = reg + .insert( + "primary".into(), + DriverKind::Sqlite, + conn, + Duration::from_secs(30), + ) + .await; + let _taken = reg.take(&h.id).await.unwrap(); + match reg.lock(&h.id).await { + Err(DbError::TransactionNotFound { transaction_id }) => { + assert_eq!(transaction_id, h.id); + } + Err(other) => panic!("expected TransactionNotFound, got {other:?}"), + Ok(_) => panic!("expected TransactionNotFound, got Ok"), + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn take_twice_returns_transaction_not_found() { + let reg = TxRegistry::new(); + let conn = sqlite_pinned().await; + let h = reg + .insert( + "primary".into(), + DriverKind::Sqlite, + conn, + Duration::from_secs(30), + ) + .await; + let _first = reg.take(&h.id).await.unwrap(); + assert!(matches!( + reg.take(&h.id).await, + Err(DbError::TransactionNotFound { .. }) + )); + } + + #[tokio::test(flavor = "multi_thread")] + async fn lock_unknown_id_returns_transaction_not_found() { + // `TxLock` holds an `OwnedMutexGuard` which has no `Debug` impl, so + // we can't use `unwrap_err()` directly — pattern-match the Result. + let reg = TxRegistry::new(); + match reg.lock("00000000-0000-0000-0000-000000000000").await { + Err(DbError::TransactionNotFound { .. }) => {} + Err(other) => panic!("expected TransactionNotFound, got {other:?}"), + Ok(_) => panic!("expected TransactionNotFound, got Ok"), + } + } + + /// Regression: after the deadline elapses, the watcher's next sweep + /// must (a) remove the entry from the map and (b) issue `ROLLBACK` on + /// the pinned conn. We drive the sweep manually instead of waiting for + /// the 1 s poll interval so the test runs in milliseconds. + #[tokio::test(flavor = "multi_thread")] + async fn timeout_sweep_removes_expired_entries_and_rolls_back() { + let reg = TxRegistry::new(); + let conn = sqlite_pinned().await; + // Open a "real" txn on the conn so ROLLBACK has something to abort. + // BEGIN must succeed on a freshly-acquired sqlite conn — no + // ambient transaction state from the pool. + let mut conn = conn; + if let PinnedConn::Sqlite(slot) = &mut conn { + driver::sqlite::tx_begin(slot, None).await.unwrap(); + } + // Insert with a 1ms timeout, then wait past it. + let h = reg + .insert( + "primary".into(), + DriverKind::Sqlite, + conn, + Duration::from_millis(1), + ) + .await; + tokio::time::sleep(Duration::from_millis(20)).await; + assert_eq!(reg.len().await, 1, "entry still present before sweep"); + + let log = Logger::new(); + reg.run_one_sweep(&log).await; + assert_eq!(reg.len().await, 0, "expired entry must be removed"); + + // Subsequent lock fails with TRANSACTION_NOT_FOUND. + match reg.lock(&h.id).await { + Err(DbError::TransactionNotFound { .. }) => {} + Err(other) => panic!("expected TransactionNotFound after sweep, got {other:?}"), + Ok(_) => panic!("expected TransactionNotFound after sweep, got Ok"), + } + } + + /// Live entries (deadline still in the future) must not be touched. + #[tokio::test(flavor = "multi_thread")] + async fn timeout_sweep_leaves_live_entries_alone() { + let reg = TxRegistry::new(); + let conn = sqlite_pinned().await; + // 60-second deadline so a sub-second test can't possibly elapse it. + let _h = reg + .insert( + "primary".into(), + DriverKind::Sqlite, + conn, + Duration::from_secs(60), + ) + .await; + let log = Logger::new(); + reg.run_one_sweep(&log).await; + assert_eq!(reg.len().await, 1, "live entry must survive sweep"); + } +} diff --git a/iii-database/src/triggers/handler.rs b/iii-database/src/triggers/handler.rs index 49e3b045..3a00ff2e 100644 --- a/iii-database/src/triggers/handler.rs +++ b/iii-database/src/triggers/handler.rs @@ -1,213 +1,13 @@ -//! TriggerHandler implementations for `iii-database::query-poll` and -//! `iii-database::row-change`. Wired into the worker via -//! `iii.register_trigger_type` from main.rs. -//! -//! The engine routes `iii.registerTrigger({type: "iii-database::query-poll", ...})` -//! calls from any client (e.g. the test harness) back to the worker that -//! registered that trigger type. We spawn a per-instance polling loop on -//! `register_trigger` and cancel it on `unregister_trigger`. +//! TriggerHandler implementations for `iii-database::row-change`. Wired into +//! the worker via `iii.register_trigger_type` from main.rs. -use crate::handlers::AppState; -use crate::triggers::query_poll::{self, Dispatch, DispatchAck, DispatchedBatch, QueryPollConfig}; use async_trait::async_trait; -use iii_sdk::{protocol::TriggerRequest, IIIError, TriggerConfig, TriggerHandler, III}; -use std::collections::HashMap; -use std::sync::Arc; -use tokio::sync::Mutex; -use tokio::task::JoinHandle; - -/// Dispatch impl that forwards a polled batch to the engine via `iii.trigger`. -/// The engine routes the invocation to whoever registered `function_id`. -struct EngineDispatch { - iii: III, - function_id: String, -} - -#[async_trait] -impl Dispatch for EngineDispatch { - async fn dispatch(&self, batch: DispatchedBatch) -> Result { - let payload = - serde_json::to_value(&batch).map_err(|e| crate::error::DbError::DriverError { - driver: "engine-dispatch".into(), - code: None, - message: format!("serialize batch: {e}"), - failed_index: None, - })?; - let req = TriggerRequest { - function_id: self.function_id.clone(), - payload, - action: None, - timeout_ms: None, - }; - match self.iii.trigger(req).await { - Ok(value) => { - // Three response shapes, three behaviors: - // 1. null → function returned void = successful completion; - // ack so the cursor advances. - // 2. valid {ack?, commit_cursor?} → use as-is. - // 3. anything else → malformed; fail-safe to ack=false so - // the next tick retries instead of silently - // dropping rows the function never processed. - if value.is_null() { - Ok(DispatchAck { - ack: true, - commit_cursor: None, - }) - } else { - Ok( - serde_json::from_value::(value).unwrap_or(DispatchAck { - ack: false, - commit_cursor: None, - }), - ) - } - } - Err(e) => Err(crate::error::DbError::DriverError { - driver: "engine-dispatch".into(), - code: None, - message: format!("trigger invocation failed: {e}"), - failed_index: None, - }), - } - } -} - -/// `iii-database::query-poll` trigger handler. Spawns a polling loop per -/// registered trigger instance; cancels on unregister. -/// -/// Tasks are tracked twice: -/// - by engine-assigned instance id (for unregister, which receives that id) -/// - by user-supplied `trigger_id` (so a re-registration with the same -/// trigger_id replaces the old task, which is essential for idempotent -/// re-runs of clients whose `unregister` is fire-and-forget across a -/// process exit). -pub struct QueryPollTrigger { - state: AppState, - iii: III, - /// Map of trigger instance id → spawned task handle. Indexed for - /// `unregister_trigger`. - tasks: Arc>>>, - /// Map of user-supplied `trigger_id` → engine-assigned instance id. - /// Used to evict stale tasks when the same `trigger_id` is registered - /// again before the prior `unregister` has reached us. - by_trigger_id: Arc>>, -} - -impl QueryPollTrigger { - pub fn new(state: AppState, iii: III) -> Self { - Self { - state, - iii, - tasks: Arc::new(Mutex::new(HashMap::new())), - by_trigger_id: Arc::new(Mutex::new(HashMap::new())), - } - } -} +use iii_sdk::{IIIError, TriggerConfig, TriggerHandler}; fn iii_err(err: T) -> IIIError { IIIError::Handler(serde_json::to_string(&err).unwrap_or_else(|_| "{}".into())) } -#[async_trait] -impl TriggerHandler for QueryPollTrigger { - async fn register_trigger(&self, config: TriggerConfig) -> Result<(), IIIError> { - let mut cfg: QueryPollConfig = - serde_json::from_value(config.config.clone()).map_err(|e| { - iii_err(crate::error::DbError::ConfigError { - message: format!("query-poll config: {e}"), - }) - })?; - // If the user-provided trigger_id is empty (or absent — serde default), - // fall back to the engine-assigned instance id so the cursor table - // key is stable across restarts of the same instance. - if cfg.trigger_id.is_empty() { - cfg.trigger_id = config.id.clone(); - } - cfg.validate().map_err(iii_err)?; - - let pool = self - .state - .pools - .get(&cfg.db_name) - .ok_or_else(|| { - iii_err(crate::error::DbError::UnknownDb { - db: cfg.db_name.clone(), - }) - })? - .clone(); - - let dispatch: Arc = Arc::new(EngineDispatch { - iii: self.iii.clone(), - function_id: config.function_id.clone(), - }); - - let trigger_id = cfg.trigger_id.clone(); - - // Acquire the registry locks *before* spawning. Previously the spawn - // ran first (outside any lock) and the JoinHandle was inserted into - // `tasks` afterward — a concurrent `unregister_trigger(config.id)` - // could fire in between, find `tasks` empty, return Ok, and the - // newly-spawned poller would leak running forever (orphaned task - // with no abort handle and no entry in either index). - // - // `tokio::spawn` returns synchronously (it just schedules the future - // on the runtime), so holding the lock across it grows the critical - // section by microseconds — well worth closing the TOCTOU race. - // - // Lock order `by_trigger_id` → `tasks` is canonical and matches - // `unregister_trigger` below; reversing would deadlock on concurrent - // calls. - { - let mut by_id = self.by_trigger_id.lock().await; - let mut tasks = self.tasks.lock().await; - - // Evict stale instance for the same user-supplied trigger_id. - let stale = by_id.insert(trigger_id.clone(), config.id.clone()); - if let Some(s) = stale { - if let Some(old_task) = tasks.remove(&s) { - old_task.abort(); - tracing::info!( - trigger_id = %trigger_id, - evicted_instance = %s, - "query-poll evicted stale task on re-registration" - ); - } - } - - // Spawn under the lock so unregister_trigger(config.id) cannot - // interleave between "task spawned" and "task in registry". - let task = tokio::spawn(async move { - query_poll::run_loop(pool, cfg, dispatch).await; - }); - tasks.insert(config.id.clone(), task); - } - - tracing::info!( - trigger_instance = %config.id, - trigger_id = %trigger_id, - function_id = %config.function_id, - "query-poll trigger registered" - ); - Ok(()) - } - - async fn unregister_trigger(&self, config: TriggerConfig) -> Result<(), IIIError> { - // Lock order: `by_trigger_id` → `tasks`, matching `register_trigger`. - // Reverse ordering would deadlock against a concurrent register. - let mut by_id = self.by_trigger_id.lock().await; - let mut tasks = self.tasks.lock().await; - if let Some(task) = tasks.remove(&config.id) { - task.abort(); - tracing::info!(trigger_instance = %config.id, "query-poll trigger unregistered"); - } - // Best-effort: drop any reverse-index entries that point at this - // instance. If a different instance has since taken the trigger_id - // slot, leave that mapping alone. - by_id.retain(|_, instance| instance != &config.id); - Ok(()) - } -} - /// `iii-database::row-change` trigger handler. v1.0 stubs the streaming decoder /// pending an upstream tokio-postgres replication API release. `register_trigger` /// returns Unsupported so callers see a clear error instead of silently never diff --git a/iii-database/src/triggers/mod.rs b/iii-database/src/triggers/mod.rs index 5ca0c457..1e7e0df5 100644 --- a/iii-database/src/triggers/mod.rs +++ b/iii-database/src/triggers/mod.rs @@ -2,8 +2,7 @@ //! at worker startup. //! //! Only `handler` is part of the public crate surface (consumed by main.rs). -//! `query_poll` and `row_change` are implementation modules. +//! `row_change` is an implementation module. pub mod handler; -pub(crate) mod query_poll; pub(crate) mod row_change; diff --git a/iii-database/src/triggers/query_poll.rs b/iii-database/src/triggers/query_poll.rs deleted file mode 100644 index 66d862be..00000000 --- a/iii-database/src/triggers/query_poll.rs +++ /dev/null @@ -1,390 +0,0 @@ -//! query-poll trigger — cursor-based polling loop. -//! -//! On each tick: -//! 1. Read the cursor from `__iii_cursors` for this `trigger_id`. -//! 2. Run the user SQL with the cursor bound as the single positional parameter. -//! 3. If rows returned: dispatch a batch to the engine. -//! 4. On `ack: true`, write the max of `cursor_column` back to `__iii_cursors`. - -use crate::cursor; -use crate::driver; -use crate::error::DbError; -use crate::pool::Pool; -use crate::value::{JsonParam, RowValue}; -use async_trait::async_trait; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use std::sync::Arc; -use std::time::Duration; - -#[derive(Debug, Clone, Deserialize)] -pub struct QueryPollConfig { - pub trigger_id: String, - #[serde(rename = "db")] - pub db_name: String, - pub sql: String, - #[serde(default = "default_interval_ms")] - pub interval_ms: u64, - pub cursor_column: String, - #[serde(default = "default_cursor_table")] - pub cursor_table: String, -} - -fn default_interval_ms() -> u64 { - 1000 -} -fn default_cursor_table() -> String { - cursor::DEFAULT_CURSOR_TABLE.to_string() -} - -#[derive(Debug, Clone, Serialize)] -pub struct DispatchedBatch { - pub db: String, - pub rows: Vec>, - pub cursor: Option, - pub polled_at: chrono::DateTime, -} - -#[derive(Debug, Clone, Deserialize, Default)] -pub struct DispatchAck { - #[serde(default = "default_ack")] - pub ack: bool, - #[serde(default)] - pub commit_cursor: Option, -} - -fn default_ack() -> bool { - true -} - -#[async_trait] -pub trait Dispatch: Send + Sync { - async fn dispatch(&self, batch: DispatchedBatch) -> Result; -} - -impl QueryPollConfig { - /// Validate operator-supplied identifiers that get interpolated into - /// SQL strings (currently `cursor_table`). Call this once at config-load - /// time; `run_one_tick` also defends in depth on the chance the config - /// was constructed in code without going through `from_yaml`. - pub fn validate(&self) -> Result<(), DbError> { - crate::config::validate_sql_identifier(&self.cursor_table).map_err(|e| { - DbError::ConfigError { - message: format!("query-poll cursor_table: {e}"), - } - })?; - Ok(()) - } -} - -/// Compute the max cursor value across all rows in a poll batch. -/// -/// Two-pass design keeps the integer hot path zero-alloc: -/// 1. Try to compute `i64::max` over every row's cursor cell. Pure -/// `RowValue::Int`/`BigInt` go straight in; `Text`/`Decimal` are -/// `parse::()`'d (handles stringly-typed integer cursors). -/// 2. If any row's cell can't be coerced to i64, fall back to lexicographic -/// max — but only stringify cells that aren't already textual. -/// -/// On the common integer-cursor case (the example SQL in the README), this -/// allocates exactly one String at the end (the i64::to_string of the max), -/// regardless of batch size. -fn compute_cursor_max(rows: &[crate::driver::Row], col_idx: usize) -> Option { - let mut int_max: Option = None; - let mut all_ints = true; - for row in rows { - let Some(v) = row.0.get(col_idx) else { - continue; - }; - let parsed: Option = match v { - RowValue::Int(n) | RowValue::BigInt(n) => Some(*n), - RowValue::Text(s) | RowValue::Decimal(s) => s.parse::().ok(), - _ => None, - }; - match parsed { - Some(n) => int_max = Some(int_max.map_or(n, |m| m.max(n))), - None => { - all_ints = false; - break; - } - } - } - if all_ints { - return int_max.map(|n| n.to_string()); - } - // Fallback: stringly-typed cursor (UUIDs, ISO-8601, etc.). Borrow `&str` - // from `Text`/`Decimal` cells; for variants without a native string repr, - // stringify just-in-time. Cow keeps the per-row branch alloc-free on the - // common path where every row is text. - use std::borrow::Cow; - let mut best: Option> = None; - for row in rows { - let Some(v) = row.0.get(col_idx) else { - continue; - }; - let cur: Cow<'_, str> = match v { - RowValue::Text(s) | RowValue::Decimal(s) => Cow::Borrowed(s.as_str()), - other => Cow::Owned(other.to_json().to_string().trim_matches('"').to_string()), - }; - best = Some(match best { - Some(prev) if prev.as_ref() >= cur.as_ref() => prev, - _ => cur, - }); - } - best.map(Cow::into_owned) -} - -pub async fn run_one_tick( - pool: &Pool, - cfg: &QueryPollConfig, - dispatch: Arc, -) -> Result<(), DbError> { - cfg.validate()?; - cursor::ensure_table(pool, &cfg.cursor_table).await?; - let cur = cursor::read_cursor(pool, &cfg.cursor_table, &cfg.trigger_id).await?; - let cur_param = match cur.as_deref() { - Some(s) => match s.parse::() { - Ok(n) => JsonParam::Int(n), - Err(_) => JsonParam::Text(s.to_string()), - }, - None => JsonParam::Null, - }; - - let result = match pool { - Pool::Sqlite(p) => driver::sqlite::query(p, &cfg.sql, &[cur_param], 30_000).await?, - Pool::Postgres(p) => driver::postgres::query(p, &cfg.sql, &[cur_param], 30_000).await?, - Pool::Mysql(p) => driver::mysql::query(p, &cfg.sql, &[cur_param], 30_000).await?, - }; - - if result.rows.is_empty() { - return Ok(()); - } - - let col_idx = result - .columns - .iter() - .position(|c| c.name == cfg.cursor_column) - .ok_or_else(|| DbError::ConfigError { - message: format!( - "cursor_column `{}` not found in result columns", - cfg.cursor_column - ), - })?; - - let max_cursor: Option = compute_cursor_max(&result.rows, col_idx); - - let json_rows = crate::handlers::query_rows_to_objects(&result.columns, result.rows); - - let batch = DispatchedBatch { - db: cfg.db_name.clone(), - rows: json_rows, - cursor: max_cursor.clone(), - polled_at: chrono::Utc::now(), - }; - - let ack = dispatch.dispatch(batch).await?; - if ack.ack { - let new_cursor = ack.commit_cursor.or(max_cursor); - if let Some(c) = new_cursor { - cursor::write_cursor(pool, &cfg.cursor_table, &cfg.trigger_id, &c).await?; - } - } - Ok(()) -} - -pub async fn run_loop(pool: Pool, cfg: QueryPollConfig, dispatch: Arc) { - let mut interval = tokio::time::interval(Duration::from_millis(cfg.interval_ms)); - // Drop ticks that fire while a previous tick is still running, instead of - // bursting to catch up. Without this, a slow query would queue up multiple - // back-to-back polls and hammer the DB once the slow tick completes. - interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); - loop { - interval.tick().await; - if let Err(e) = run_one_tick(&pool, &cfg, dispatch.clone()).await { - tracing::warn!(trigger_id = %cfg.trigger_id, error = ?e, "query-poll tick failed"); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::config::PoolConfig; - use crate::pool::{Pool, SqlitePool}; - use std::sync::{Arc, Mutex}; - - #[derive(Default)] - struct CapturingDispatch { - calls: Mutex>, - ack: bool, - } - - #[async_trait] - impl Dispatch for CapturingDispatch { - async fn dispatch( - &self, - batch: DispatchedBatch, - ) -> Result { - self.calls.lock().unwrap().push(batch); - Ok(DispatchAck { - ack: self.ack, - commit_cursor: None, - }) - } - } - - #[tokio::test(flavor = "multi_thread")] - async fn poll_emits_new_rows_and_advances_cursor_on_ack() { - let p = Pool::Sqlite(SqlitePool::new("sqlite::memory:", &PoolConfig::default()).unwrap()); - - // Setup table - let s = match &p { - Pool::Sqlite(s) => s, - _ => unreachable!(), - }; - crate::driver::sqlite::execute( - s, - "CREATE TABLE outbox (id INTEGER PRIMARY KEY, body TEXT)", - &[], - &[], - ) - .await - .unwrap(); - // Use separate inserts because driver::sqlite::execute uses Connection::execute (single statement) - for body in &["a", "b", "c"] { - crate::driver::sqlite::execute( - s, - "INSERT INTO outbox (body) VALUES (?)", - &[JsonParam::Text((*body).into())], - &[], - ) - .await - .unwrap(); - } - - let dispatch = Arc::new(CapturingDispatch { - calls: Default::default(), - ack: true, - }); - let cfg = QueryPollConfig { - trigger_id: "trig-1".into(), - db_name: "primary".into(), - sql: "SELECT id, body FROM outbox WHERE id > COALESCE(?, 0) ORDER BY id LIMIT 50" - .into(), - interval_ms: 25, - cursor_column: "id".into(), - cursor_table: crate::cursor::DEFAULT_CURSOR_TABLE.into(), - }; - - run_one_tick(&p, &cfg, dispatch.clone()).await.unwrap(); - let calls_len = dispatch.calls.lock().unwrap().len(); - assert_eq!(calls_len, 1); - assert_eq!(dispatch.calls.lock().unwrap()[0].rows.len(), 3); - - // Cursor should now be "3". - let v = crate::cursor::read_cursor(&p, &cfg.cursor_table, &cfg.trigger_id) - .await - .unwrap(); - assert_eq!(v.as_deref(), Some("3")); - - // Second tick produces no new rows. - run_one_tick(&p, &cfg, dispatch.clone()).await.unwrap(); - assert_eq!( - dispatch.calls.lock().unwrap().len(), - 1, - "no new rows should produce no dispatch" - ); - } - - #[tokio::test(flavor = "multi_thread")] - async fn poll_does_not_advance_on_nack() { - let p = Pool::Sqlite(SqlitePool::new("sqlite::memory:", &PoolConfig::default()).unwrap()); - let s = match &p { - Pool::Sqlite(s) => s, - _ => unreachable!(), - }; - crate::driver::sqlite::execute( - s, - "CREATE TABLE outbox (id INTEGER PRIMARY KEY, body TEXT)", - &[], - &[], - ) - .await - .unwrap(); - crate::driver::sqlite::execute(s, "INSERT INTO outbox (body) VALUES ('x')", &[], &[]) - .await - .unwrap(); - - let dispatch = Arc::new(CapturingDispatch { - calls: Default::default(), - ack: false, - }); - let cfg = QueryPollConfig { - trigger_id: "trig-x".into(), - db_name: "primary".into(), - sql: "SELECT id, body FROM outbox WHERE id > COALESCE(?, 0) ORDER BY id".into(), - interval_ms: 25, - cursor_column: "id".into(), - cursor_table: crate::cursor::DEFAULT_CURSOR_TABLE.into(), - }; - run_one_tick(&p, &cfg, dispatch.clone()).await.unwrap(); - let v = crate::cursor::read_cursor(&p, &cfg.cursor_table, &cfg.trigger_id) - .await - .unwrap(); - assert!(v.is_none(), "cursor should not advance on nack"); - } - - #[tokio::test(flavor = "multi_thread")] - async fn poll_cursor_advances_numerically_across_digit_boundary() { - // Regression: lexicographic max("9","10") == "9", which would replay - // row 10 forever. The driver must compare cursor values numerically - // when every value parses as i64. - let p = Pool::Sqlite(SqlitePool::new("sqlite::memory:", &PoolConfig::default()).unwrap()); - let s = match &p { - Pool::Sqlite(s) => s, - _ => unreachable!(), - }; - crate::driver::sqlite::execute( - s, - "CREATE TABLE outbox (id INTEGER PRIMARY KEY, body TEXT)", - &[], - &[], - ) - .await - .unwrap(); - // 12 rows so the batch crosses the 9→10 boundary. - for i in 1..=12 { - crate::driver::sqlite::execute( - s, - "INSERT INTO outbox (body) VALUES (?)", - &[JsonParam::Text(format!("body-{i}"))], - &[], - ) - .await - .unwrap(); - } - - let dispatch = Arc::new(CapturingDispatch { - calls: Default::default(), - ack: true, - }); - let cfg = QueryPollConfig { - trigger_id: "trig-num".into(), - db_name: "primary".into(), - sql: "SELECT id, body FROM outbox WHERE id > COALESCE(?, 0) ORDER BY id LIMIT 50" - .into(), - interval_ms: 25, - cursor_column: "id".into(), - cursor_table: crate::cursor::DEFAULT_CURSOR_TABLE.into(), - }; - run_one_tick(&p, &cfg, dispatch.clone()).await.unwrap(); - let v = crate::cursor::read_cursor(&p, &cfg.cursor_table, &cfg.trigger_id) - .await - .unwrap(); - assert_eq!( - v.as_deref(), - Some("12"), - "cursor must be the numeric max (12), not the lexicographic max (9)" - ); - } -} diff --git a/iii-database/src/triggers/row_change.rs b/iii-database/src/triggers/row_change.rs index ebba14db..aab6faf4 100644 --- a/iii-database/src/triggers/row_change.rs +++ b/iii-database/src/triggers/row_change.rs @@ -382,7 +382,7 @@ mod tests { }; let tls = crate::config::TlsConfig { mode: crate::config::TlsMode::Disable, - ca_cert: None, + ..Default::default() }; let mut client = connect_for_setup(&u, &tls).await.unwrap(); // Cleanup from prior run diff --git a/iii-database/tests/e2e/README.md b/iii-database/tests/e2e/README.md index 3d0467fd..d710ca09 100644 --- a/iii-database/tests/e2e/README.md +++ b/iii-database/tests/e2e/README.md @@ -1,15 +1,20 @@ # iii-database worker — end-to-end harness -Self-asserting smoke harness for the `iii-database` worker. Validates the 5 -core RPC functions, the `query-poll` trigger, and the `row-change` slot/ -publication derivation contract against real **SQLite**, **PostgreSQL 16**, -and **MySQL 8.4** with one command. +Self-asserting smoke harness for the `iii-database` worker. Validates the +function surface (query / execute / prepareStatement / runStatement / +transaction), the **interactive-transaction** surface (begin / +transactionQuery / transactionExecute / commit / rollback), the +`row-change` slot/publication derivation contract, and the side-channel- +finalization repros from the `/review` of branch `feat/database-and-skills` +against real **SQLite**, **PostgreSQL 16**, and **MySQL 8.4** with one +command. Runs locally and in CI (`.github/workflows/iii-database-e2e.yml`). ## Prerequisites -- Docker (for the postgres + mysql containers) +- Docker (for the postgres + mysql containers). Rootless podman works too — + see the `COMPOSE` env override below. - Rust toolchain (`cargo` on `$PATH`) - Node.js 20+ (`npm` on `$PATH`) - The iii engine on `$PATH`. Install with: @@ -22,20 +27,26 @@ Runs locally and in CI (`.github/workflows/iii-database-e2e.yml`). ## Run ```sh -./run-tests.sh +./run-tests.sh # full suite + bypass repros +./run-tests.sh --bypass-only # ONLY the 4 side-channel-finalization repros +./run-tests.sh --no-bypass # full suite without the bypass repros (use + # when the worker hasn't shipped the fix yet) ``` Builds the worker (`cargo build --release --bin iii-database`), brings up -the docker stack with `wal_level=logical`, starts the engine, and runs ~90 -assertions across all 3 drivers. Exits 0 on PASS, 1 on any FAIL. +the docker stack with `wal_level=logical`, starts the engine, and runs the +selected case groups across all 3 drivers. Exits 0 on PASS, 1 on any FAIL. ## Flags | Flag | Effect | |---|---| -| `--keep` | Leave docker stack up after the run for debugging | +| `--keep` | Leave the compose stack up after the run for debugging | | `--no-build` | Skip the cargo build step | +| `--with-cargo-test` | Also run `cargo test --all-features` with live DB URLs (CI uses this) | | `--filter=` | Run only one driver | +| `--bypass-only` | Run only the side-channel-finalization bypass repros (4 cases × 3 drivers, gated per case) | +| `--no-bypass` | Run the full suite without the bypass repros | ## Env overrides @@ -48,9 +59,34 @@ overridden: | `III_BIN` | `$(command -v iii)` then `$HOME/.local/bin/iii` | Engine binary | | `WORKER_BIN_TARGET` | `$WORKER_SRC/target/release/iii-database` | Built worker | | `WORKER_BIN_LINK` | `$HOME/.iii/workers/iii-database` | Symlink the engine reads | +| `COMPOSE` | `docker compose` | Compose command. Set to `podman-compose` for rootless podman; the script auto-switches its healthcheck strategy to `podman inspect` (since podman-compose 1.x doesn't implement compose v2's `--wait`). | +| `HARNESS_MODE` | `full` | `full` / `no-bypass` / `bypass-only`. Set by the flags above; you usually don't need to set this directly. | | `HARNESS_TIMEOUT` | `180` | Seconds to wait for the test sentinel | | `HEALTH_TIMEOUT` | `60` | Seconds to wait for db healthchecks | +## Bypass repros (`--bypass-only`) + +The bypass-repro cases live in +`workers/harness/src/cases-tx-control-bypass.ts` and demonstrate the three +ways the `is_transaction_control_sql` filter in +`src/handlers/transaction_execute.rs` was bypassable before the fix in this +branch. Each case stages a row inside an interactive transaction, attempts +the bypass, and — if the worker accepts the SQL — proves the desync by +counting outside-txn rows that shouldn't be visible. PASS = defense holds; +FAIL = bypass confirmed (the failure message names the exact mode and the +leaked-row count). + +| # | Bypass | Drivers | Maps to | +|---|---|---|---| +| 1 | `/* */COMMIT` via `transactionExecute` | all 3 | finding #1 (block-comment strip) | +| 1b | `--\nCOMMIT` via `transactionExecute` | pg + mysql | finding #1 (line-comment strip) | +| 2 | `COMMIT` via `transactionQuery` | all 3 | finding #2 (missing guard on query path) | +| 3 | `START TRANSACTION` via `transactionExecute` | mysql only | finding #3 (`START TRANSACTION` keyword) | + +On the post-fix worker all four PASS. On a pre-fix worker each FAILing case +prints the desync evidence (e.g. `BYPASS CONFIRMED [#1]: '/* */COMMIT' was +accepted; outside-tx COUNT=1`). + ## Layout | File | Role | @@ -59,6 +95,8 @@ overridden: | `docker-compose.yml` | Postgres (wal_level=logical) + MySQL with healthchecks | | `config.yaml` | Engine config (queue, observability, iii-database, harness) | | `workers/harness/` | TypeScript smoke-test worker (runs as a host process) | +| `workers/harness/src/cases-interactive-tx.ts` | Interactive-transaction lifecycle cases | +| `workers/harness/src/cases-tx-control-bypass.ts` | Side-channel-finalization repros | | `reports/report.json` | Per-case results (latest run) | ## CI @@ -77,4 +115,7 @@ the same docker compose stack used locally, and shells out to - **`iii engine binary missing`**: install with the script above. - **Sentinel timeout**: tail `reports/harness-*.log` for the harness output. - **Docker daemon not running**: start Docker Desktop (or `colima start`) - and re-run. + and re-run. Or use rootless podman: `COMPOSE='podman-compose' ./run-tests.sh`. +- **Bypass cases FAIL**: expected on a worker that hasn't shipped the + side-channel-finalization fix. Re-run with `--no-bypass` to confirm the + rest of the suite is clean. diff --git a/iii-database/tests/e2e/run-tests.sh b/iii-database/tests/e2e/run-tests.sh index e5601372..99632298 100755 --- a/iii-database/tests/e2e/run-tests.sh +++ b/iii-database/tests/e2e/run-tests.sh @@ -12,6 +12,12 @@ III_BIN="${III_BIN:-$(command -v iii 2>/dev/null || echo "$HOME/.local/bin/iii") WORKER_BIN_TARGET="${WORKER_BIN_TARGET:-$WORKER_SRC/target/release/iii-database}" WORKER_BIN_LINK="${WORKER_BIN_LINK:-$HOME/.iii/workers/iii-database}" +# Container runtime. Defaults to `docker compose` (compose v2; what CI runs). +# Override to `podman-compose` for rootless podman on dev laptops; the script +# falls back to a healthcheck poll because podman-compose 1.x doesn't +# implement compose v2's `up --wait` flag. +COMPOSE="${COMPOSE:-docker compose}" + REPORT_PATH="$ROOT_DIR/reports/report.json" TS=$(date +%Y%m%d-%H%M%S) ENGINE_LOG="$ROOT_DIR/reports/engine-$TS.log" @@ -23,6 +29,7 @@ KEEP=0 NO_BUILD=0 WITH_CARGO_TEST=0 FILTER="" +MODE="full" for arg in "$@"; do case "$arg" in @@ -30,23 +37,34 @@ for arg in "$@"; do --no-build) NO_BUILD=1 ;; --with-cargo-test) WITH_CARGO_TEST=1 ;; --filter=*) FILTER="${arg#--filter=}" ;; + --bypass-only) MODE="bypass-only" ;; + --no-bypass) MODE="no-bypass" ;; -h|--help) cat <] +Usage: $0 [--keep] [--no-build] [--with-cargo-test] + [--filter=] [--bypass-only|--no-bypass] - --keep Leave docker compose stack running after the run. + --keep Leave the compose stack running after the run. --no-build Skip cargo build of the iii-database worker. --with-cargo-test Run \`cargo test --all-features\` after compose is healthy with TEST_POSTGRES_URL and TEST_MYSQL_URL pointing at the - docker stack — exercises gated driver/pool tests with - real DBs. CI uses this; local dev usually doesn't need it. + stack — exercises gated driver/pool tests with real DBs. + CI uses this; local dev usually doesn't need it. --filter=KEY Run only one driver (default: all 3). + --bypass-only Run ONLY the side-channel-finalization bypass repros. + Use this to focus on the /review findings: post-fix all + 4 cases must PASS; pre-fix at least one FAILs. + --no-bypass Run the full suite WITHOUT the bypass repros. Useful + when running against a worker that hasn't shipped the + fix yet so the rest of the suite reports cleanly. Env overrides: WORKER_SRC Path to the database worker crate (default: ../..). III_BIN Path to the iii engine binary (default: \$(command -v iii) or \$HOME/.local/bin/iii). WORKER_BIN_TARGET Path to the built worker binary (default: \$WORKER_SRC/target/release/iii-database). WORKER_BIN_LINK Path to the symlink the engine reads (default: \$HOME/.iii/workers/iii-database). + COMPOSE Compose command (default: 'docker compose'; set to + 'podman-compose' for rootless podman). HARNESS_TIMEOUT Seconds to wait for the harness sentinel (default: 180). HEALTH_TIMEOUT Seconds to wait for postgres/mysql healthchecks (default: 60). EOF @@ -69,7 +87,7 @@ cleanup() { wait "$ENGINE_PID" 2>/dev/null || true fi if [[ "$KEEP" -eq 0 ]]; then - (cd "$ROOT_DIR" && docker compose down -v >/dev/null 2>&1) || true + (cd "$ROOT_DIR" && $COMPOSE down -v >/dev/null 2>&1) || true fi exit "$code" } @@ -100,14 +118,58 @@ if [[ ! -x "$III_BIN" ]]; then exit 1 fi -# 4. Bring up postgres + mysql and wait for healthchecks via compose's --wait -# (compose v2 native; exits non-zero if any service fails to become healthy -# within HEALTH_TIMEOUT). Beats parsing `compose ps --format json` with regex. -echo "[run-tests] docker compose up -d --wait (timeout=${HEALTH_TIMEOUT}s)" -if ! (cd "$ROOT_DIR" && docker compose up -d --wait --wait-timeout "$HEALTH_TIMEOUT"); then - echo "[run-tests] FATAL: services did not become healthy within ${HEALTH_TIMEOUT}s" >&2 - (cd "$ROOT_DIR" && docker compose logs --tail 40) >&2 - exit 1 +# 4. Bring up postgres + mysql. +# +# docker compose v2 supports `up -d --wait --wait-timeout N`, which exits +# non-zero if any service fails to become healthy within the budget. That's +# the happy path and what CI uses. podman-compose 1.x doesn't implement +# `--wait`, so we detect the runtime and fall back to a healthcheck poll. +if [[ "$COMPOSE" == "docker compose" || "$COMPOSE" == "docker-compose" ]]; then + echo "[run-tests] $COMPOSE up -d --wait (timeout=${HEALTH_TIMEOUT}s)" + if ! (cd "$ROOT_DIR" && $COMPOSE up -d --wait --wait-timeout "$HEALTH_TIMEOUT"); then + echo "[run-tests] FATAL: services did not become healthy within ${HEALTH_TIMEOUT}s" >&2 + (cd "$ROOT_DIR" && $COMPOSE logs --tail 40) >&2 + exit 1 + fi +else + # Fallback for podman-compose et al. Poll `podman inspect` (or `docker + # inspect`) for the `.State.Health.Status` field — works on both runtimes + # because Healthcheck status is an OCI-standard field. + echo "[run-tests] $COMPOSE up -d" + (cd "$ROOT_DIR" && $COMPOSE up -d) + + # Auto-detect the runtime CLI from the compose command. `podman-compose` + # → `podman`; everything else → `docker`. + INSPECT_BIN="docker" + if [[ "$COMPOSE" == "podman-compose" ]]; then + INSPECT_BIN="podman" + fi + project=$(basename "$ROOT_DIR") + echo "[run-tests] waiting up to ${HEALTH_TIMEOUT}s for postgres + mysql to be healthy ($INSPECT_BIN inspect)" + deadline=$(( $(date +%s) + HEALTH_TIMEOUT )) + while :; do + # Containers are named {_|-}{_|-}1 depending on the + # compose version — accept either separator. + pg_name=$("$INSPECT_BIN" ps --format '{{.Names}}' | grep -E "${project}[-_]postgres([-_]1)?$" | head -1 || true) + my_name=$("$INSPECT_BIN" ps --format '{{.Names}}' | grep -E "${project}[-_]mysql([-_]1)?$" | head -1 || true) + pg_status="" + my_status="" + if [[ -n "$pg_name" ]]; then + pg_status=$("$INSPECT_BIN" inspect "$pg_name" --format '{{.State.Health.Status}}' 2>/dev/null || echo "") + fi + if [[ -n "$my_name" ]]; then + my_status=$("$INSPECT_BIN" inspect "$my_name" --format '{{.State.Health.Status}}' 2>/dev/null || echo "") + fi + if [[ "$pg_status" == "healthy" && "$my_status" == "healthy" ]]; then + break + fi + if (( $(date +%s) > deadline )); then + echo "[run-tests] FATAL: services did not become healthy within ${HEALTH_TIMEOUT}s (pg=$pg_status mysql=$my_status)" >&2 + (cd "$ROOT_DIR" && $COMPOSE logs --tail 40) >&2 + exit 1 + fi + sleep 1 + done fi echo "[run-tests] both services healthy" @@ -128,7 +190,7 @@ if [[ "$WITH_CARGO_TEST" -eq 1 ]]; then fi # 6. Reset SQLite file -rm -f "$ROOT_DIR/data/test.sqlite" +rm -f "$ROOT_DIR/data/test.sqlite" "$ROOT_DIR/data/iii.db" # 7. Install harness deps if needed if [[ ! -d "$ROOT_DIR/workers/harness/node_modules" ]]; then @@ -169,13 +231,14 @@ done echo "[run-tests] engine listening" # 10. Launch the harness as a host node process -echo "[run-tests] starting harness" +echo "[run-tests] starting harness (mode=$MODE)" HARNESS_ENV=() if [[ -n "$FILTER" ]]; then HARNESS_ENV+=("HARNESS_FILTER=$FILTER") fi HARNESS_ENV+=("III_URL=ws://127.0.0.1:49134") HARNESS_ENV+=("HARNESS_REPORT_PATH=$REPORT_PATH") +HARNESS_ENV+=("HARNESS_MODE=$MODE") ( cd "$ROOT_DIR/workers/harness" && env "${HARNESS_ENV[@]}" npm run --silent dev ) > "$HARNESS_LOG" 2>&1 & HARNESS_PID=$! diff --git a/iii-database/tests/e2e/workers/harness/src/cases-boundary.ts b/iii-database/tests/e2e/workers/harness/src/cases-boundary.ts index 06cb2b26..b6d4af36 100644 --- a/iii-database/tests/e2e/workers/harness/src/cases-boundary.ts +++ b/iii-database/tests/e2e/workers/harness/src/cases-boundary.ts @@ -4,7 +4,7 @@ import { expect, expectEqual } from './cases.ts'; /** * Boundary-value cases targeting type encoding, NULL handling, and string * round-trip. Each test creates and drops its own scratch table so it stays - * independent of the shared `t` / `outbox` tables touched by the function suite. + * independent of the shared `t` table touched by the function suite. * * i64 boundary tests use inline SQL literals because JSON cannot carry an * exact i64 across all values (JS Number tops out at 2^53-1). The bug surface diff --git a/iii-database/tests/e2e/workers/harness/src/cases-interactive-tx.ts b/iii-database/tests/e2e/workers/harness/src/cases-interactive-tx.ts new file mode 100644 index 00000000..17e540c4 --- /dev/null +++ b/iii-database/tests/e2e/workers/harness/src/cases-interactive-tx.ts @@ -0,0 +1,251 @@ +import type { TestCase } from './cases.ts'; +import { expect, expectEqual } from './cases.ts'; + +/** + * Interactive-transaction surface end-to-end cases. Drives the full + * lifecycle (begin → query → execute → commit/rollback) against each + * driver, plus the negative paths: TRANSACTION_NOT_FOUND after + * finalization, COMMIT/ROLLBACK SQL rejection through transactionExecute, + * and timeout-driven auto-rollback. + * + * Each case creates and drops its own scratch table so it stays + * independent of the function-suite's shared `t`. + */ +export const INTERACTIVE_TX_CASES: TestCase[] = [ + { + name: 'interactive tx commit lands writes', + async run({ driver, dialect, call }) { + const ph1 = dialect.placeholder(1); + await call('iii-database::execute', { db: driver, sql: 'DROP TABLE IF EXISTS itx_commit' }); + await call('iii-database::execute', { + db: driver, + sql: `CREATE TABLE itx_commit (id ${dialect.idColumnDDL()}, n INT NOT NULL)`, + }); + try { + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction?.id; + expect(typeof id === 'string' && id.length === 36, `tx id must be UUID, got ${id}`); + + const ins = await call('iii-database::transactionExecute', { + transaction_id: id, + sql: `INSERT INTO itx_commit (n) VALUES (${ph1})`, + params: [42], + }); + expectEqual(ins.affected_rows, 1, 'insert affected_rows inside tx'); + + // Read-your-writes: the INSERT must be visible from inside the same tx. + const ryw = await call('iii-database::transactionQuery', { + transaction_id: id, + sql: 'SELECT n FROM itx_commit', + }); + expectEqual(ryw.row_count, 1, 'tx sees its own insert'); + expectEqual(Number(ryw.rows[0].n), 42, 'tx read-your-writes value'); + + const c = await call('iii-database::commitTransaction', { transaction_id: id }); + expectEqual(c.committed, true, 'commit returns committed=true'); + + // Verify outside the tx that the write landed. + const verify = await call('iii-database::query', { db: driver, sql: 'SELECT n FROM itx_commit' }); + expectEqual(verify.row_count, 1, 'committed write visible after commit'); + expectEqual(Number(verify.rows[0].n), 42, 'post-commit value'); + } finally { + await call('iii-database::execute', { db: driver, sql: 'DROP TABLE itx_commit' }); + } + }, + }, + { + name: 'interactive tx rollback discards writes', + async run({ driver, dialect, call }) { + const ph1 = dialect.placeholder(1); + await call('iii-database::execute', { db: driver, sql: 'DROP TABLE IF EXISTS itx_rollback' }); + await call('iii-database::execute', { + db: driver, + sql: `CREATE TABLE itx_rollback (id ${dialect.idColumnDDL()}, n INT NOT NULL)`, + }); + try { + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + await call('iii-database::transactionExecute', { + transaction_id: id, + sql: `INSERT INTO itx_rollback (n) VALUES (${ph1})`, + params: [99], + }); + const r = await call('iii-database::rollbackTransaction', { transaction_id: id }); + expectEqual(r.rolled_back, true, 'rollback returns rolled_back=true'); + + const verify = await call('iii-database::query', { + db: driver, + sql: 'SELECT COUNT(*) AS c FROM itx_rollback', + }); + expectEqual(Number(verify.rows[0].c), 0, 'rolled-back insert not visible'); + } finally { + await call('iii-database::execute', { db: driver, sql: 'DROP TABLE itx_rollback' }); + } + }, + }, + { + name: 'interactive tx commit then query returns TRANSACTION_NOT_FOUND', + async run({ driver, call, expectError }) { + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + await call('iii-database::commitTransaction', { transaction_id: id }); + await expectError( + () => call('iii-database::transactionQuery', { transaction_id: id, sql: 'SELECT 1' }), + 'TRANSACTION_NOT_FOUND', + ); + }, + }, + { + name: 'interactive tx double-commit returns TRANSACTION_NOT_FOUND', + async run({ driver, call, expectError }) { + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + await call('iii-database::commitTransaction', { transaction_id: id }); + await expectError( + () => call('iii-database::commitTransaction', { transaction_id: id }), + 'TRANSACTION_NOT_FOUND', + ); + }, + }, + { + name: 'transactionExecute rejects COMMIT/ROLLBACK SQL with INVALID_PARAM', + async run({ driver, call, expectError }) { + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + try { + await expectError( + () => call('iii-database::transactionExecute', { transaction_id: id, sql: 'COMMIT' }), + 'INVALID_PARAM', + ); + await expectError( + () => call('iii-database::transactionExecute', { transaction_id: id, sql: 'ROLLBACK' }), + 'INVALID_PARAM', + ); + await expectError( + () => call('iii-database::transactionExecute', { transaction_id: id, sql: 'BEGIN' }), + 'INVALID_PARAM', + ); + } finally { + // The tx is still open (rejections don't finalize); clean up. + try { + await call('iii-database::rollbackTransaction', { transaction_id: id }); + } catch {/* ignore */} + } + }, + }, + { + // Regression for the three side-channel-finalization bypasses identified + // in pre-landing review: `/* */COMMIT`, `--\nCOMMIT`, and `START + // TRANSACTION`. Without the strip-leading-comments fix and the + // START-keyword addition, all three slipped past `is_transaction_control_sql` + // and reached the driver, desyncing the registry from the conn's txn state. + name: 'transactionExecute rejects comment-prefixed and START-form finalization', + async run({ driver, call, expectError }) { + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + try { + await expectError( + () => + call('iii-database::transactionExecute', { + transaction_id: id, + sql: '/* sneak */COMMIT', + }), + 'INVALID_PARAM', + ); + await expectError( + () => + call('iii-database::transactionExecute', { + transaction_id: id, + sql: '-- sneak\nCOMMIT', + }), + 'INVALID_PARAM', + ); + await expectError( + () => + call('iii-database::transactionExecute', { + transaction_id: id, + sql: 'START TRANSACTION', + }), + 'INVALID_PARAM', + ); + } finally { + try { + await call('iii-database::rollbackTransaction', { transaction_id: id }); + } catch {/* ignore */} + } + }, + }, + { + // Regression: `transactionQuery` previously had no transaction-control + // filter at all. PG/MySQL/SQLite all execute COMMIT through the + // prepared-statement path used by `run_prepared`. Must now reject. + name: 'transactionQuery rejects COMMIT/ROLLBACK and comment-prefixed forms', + async run({ driver, call, expectError }) { + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + try { + await expectError( + () => call('iii-database::transactionQuery', { transaction_id: id, sql: 'COMMIT' }), + 'INVALID_PARAM', + ); + await expectError( + () => + call('iii-database::transactionQuery', { transaction_id: id, sql: 'ROLLBACK' }), + 'INVALID_PARAM', + ); + await expectError( + () => + call('iii-database::transactionQuery', { + transaction_id: id, + sql: '/* sneak */COMMIT', + }), + 'INVALID_PARAM', + ); + await expectError( + () => + call('iii-database::transactionQuery', { + transaction_id: id, + sql: 'START TRANSACTION', + }), + 'INVALID_PARAM', + ); + } finally { + try { + await call('iii-database::rollbackTransaction', { transaction_id: id }); + } catch {/* ignore */} + } + }, + }, + { + name: 'interactive tx beginTransaction unknown isolation returns INVALID_PARAM', + async run({ driver, call, expectError }) { + await expectError( + () => + call('iii-database::beginTransaction', { db: driver, isolation: 'bogus_level' }), + 'INVALID_PARAM', + ); + }, + }, + { + // Timeout-driven auto-rollback. timeout_ms is set low (300 ms) so the + // background watcher (1 s poll cadence) fires within a couple of ticks + // and the next call sees TRANSACTION_NOT_FOUND. Skipped on pg/mysql for + // brevity — the timeout logic is driver-agnostic and sqlite is the + // fastest path. + name: 'interactive tx timeout auto-rolls-back and frees the id', + applies: ['sqlite_db'], + async run({ driver, call, expectError }) { + const begin = await call('iii-database::beginTransaction', { + db: driver, + timeout_ms: 300, + }); + const id = begin.transaction.id; + // Wait past the deadline + one watcher tick + a safety margin. + await new Promise((r) => setTimeout(r, 1800)); + await expectError( + () => call('iii-database::transactionQuery', { transaction_id: id, sql: 'SELECT 1' }), + 'TRANSACTION_NOT_FOUND', + ); + }, + }, +]; diff --git a/iii-database/tests/e2e/workers/harness/src/cases-trigger.ts b/iii-database/tests/e2e/workers/harness/src/cases-trigger.ts deleted file mode 100644 index 753aaba6..00000000 --- a/iii-database/tests/e2e/workers/harness/src/cases-trigger.ts +++ /dev/null @@ -1,121 +0,0 @@ -import type { TestCase } from './cases.ts'; -import { expect } from './cases.ts'; - -/** - * Trigger config + lifecycle edge cases. Gated to sqlite_db because the - * trigger logic is driver-agnostic and sqlite is the fastest path; running - * across all three drivers buys nothing here. - * - * SDK note: `iii.registerTrigger` returns synchronously without awaiting the - * worker's `register_trigger` handler, so worker-side validation errors do - * NOT surface as a JS rejection. We test broken triggers by *absence of - * dispatches* — register, seed rows, assert nothing arrives. - */ -export const TRIGGER_CASES: TestCase[] = [ - { - name: 'trigger with invalid cursor_table never dispatches', - applies: ['sqlite_db'], - async run({ driver, dialect, iii, call, resetReceived, expectSilence }) { - // Use a unique scratch table per test to stay isolated from the polling case's outbox. - await call('iii-database::execute', { db: driver, sql: 'DROP TABLE IF EXISTS tg_bad_cursor_table' }); - await call('iii-database::execute', { - db: driver, - sql: `CREATE TABLE tg_bad_cursor_table (id ${dialect.idColumnDDL()}, body TEXT NOT NULL)`, - }); - resetReceived(); - const ph1 = dialect.placeholder(1); - // 'bad-name' contains a hyphen → fails validate_sql_identifier. Per - // query_poll.rs:70-78, validate() rejects at first tick before any - // dispatch can land. - const handle = iii.registerTrigger({ - type: 'iii-database::query-poll', - function_id: 'harness::on_outbox_row', - config: { - trigger_id: `harness-bad-cursor-table-${driver}`, - db: driver, - sql: `SELECT id, body, '${driver}' AS db FROM tg_bad_cursor_table WHERE id > COALESCE(${ph1}, 0) ORDER BY id LIMIT 50`, - interval_ms: 200, - cursor_column: 'id', - cursor_table: 'bad-name', - }, - }); - try { - await call('iii-database::execute', { - db: driver, - sql: 'INSERT INTO tg_bad_cursor_table (body) VALUES (?)', - params: ['x'], - }); - await expectSilence(1500); - } finally { - try { handle.unregister(); } catch { /* ignore */ } - await call('iii-database::execute', { db: driver, sql: 'DROP TABLE tg_bad_cursor_table' }); - } - }, - }, - { - name: 'trigger with cursor_column not in result never dispatches', - applies: ['sqlite_db'], - async run({ driver, dialect, iii, call, resetReceived, expectSilence }) { - await call('iii-database::execute', { db: driver, sql: 'DROP TABLE IF EXISTS tg_bad_col' }); - await call('iii-database::execute', { - db: driver, - sql: `CREATE TABLE tg_bad_col (id ${dialect.idColumnDDL()}, body TEXT NOT NULL)`, - }); - resetReceived(); - const ph1 = dialect.placeholder(1); - // SQL only selects `id` and `body`, but cursor_column is `nonexistent`. - // Per query_poll.rs:107-116, run_one_tick errors at column-lookup time - // and the loop logs+swallows. No dispatch ever lands. - const handle = iii.registerTrigger({ - type: 'iii-database::query-poll', - function_id: 'harness::on_outbox_row', - config: { - trigger_id: `harness-bad-col-${driver}`, - db: driver, - sql: `SELECT id, body, '${driver}' AS db FROM tg_bad_col WHERE id > COALESCE(${ph1}, 0) ORDER BY id LIMIT 50`, - interval_ms: 200, - cursor_column: 'nonexistent', - }, - }); - try { - await call('iii-database::execute', { - db: driver, - sql: 'INSERT INTO tg_bad_col (body) VALUES (?)', - params: ['y'], - }); - await expectSilence(1500); - } finally { - try { handle.unregister(); } catch { /* ignore */ } - await call('iii-database::execute', { db: driver, sql: 'DROP TABLE tg_bad_col' }); - } - }, - }, - { - name: 'unregister of already-unregistered trigger does not throw', - applies: ['sqlite_db'], - async run({ driver, dialect, iii }) { - const ph1 = dialect.placeholder(1); - const handle = iii.registerTrigger({ - type: 'iii-database::query-poll', - function_id: 'harness::on_outbox_row', - config: { - trigger_id: `harness-double-unreg-${driver}`, - db: driver, - sql: `SELECT id, body, '${driver}' AS db FROM outbox WHERE id > COALESCE(${ph1}, 0) ORDER BY id LIMIT 50`, - interval_ms: 1000, - cursor_column: 'id', - }, - }); - // First unregister: real cleanup. - handle.unregister(); - // Second unregister: should be a no-op rather than throwing. - let threw: unknown = null; - try { - handle.unregister(); - } catch (e) { - threw = e; - } - expect(threw === null, `second unregister threw: ${threw}`); - }, - }, -]; diff --git a/iii-database/tests/e2e/workers/harness/src/cases-tx-control-bypass.ts b/iii-database/tests/e2e/workers/harness/src/cases-tx-control-bypass.ts new file mode 100644 index 00000000..73b265e9 --- /dev/null +++ b/iii-database/tests/e2e/workers/harness/src/cases-tx-control-bypass.ts @@ -0,0 +1,320 @@ +import type { TestCase } from './cases.ts'; +import { expect, expectEqual } from './cases.ts'; + +/** + * Side-channel finalization bypass repros. + * + * Each case demonstrates a separate way the + * `transactionExecute`/`transactionQuery` defense in + * `iii-database/src/handlers/transaction_execute.rs::is_transaction_control_sql` + * can be bypassed to finalize an interactive transaction without going through + * `commitTransaction` / `rollbackTransaction`. + * + * Each case is structured as: + * - drive the worker through a sequence that SHOULD be rejected with + * `INVALID_PARAM` (per the docstring on `transactionExecute`) + * - if the worker rejects → PASS (defense holds, fix shipped) + * - if the worker accepts AND the transaction-registry/conn state desyncs + * → FAIL with a label describing the observed bypass + * + * Run with: `./run-tests.sh --bypass-only` + * + * Findings (see /review output on branch feat/database-and-skills): + * 1. comment-prefix in transactionExecute (handlers/transaction_execute.rs:42-63) + * 2. transactionQuery missing the guard (handlers/transaction_query.rs) + * 3. `START TRANSACTION` not in keyword set (handlers/transaction_execute.rs:55, MySQL-specific) + * + * These cases will FAIL until the fix is shipped. After the fix, they MUST + * pass on every driver they apply to. + */ +export const TX_CONTROL_BYPASS_CASES: TestCase[] = [ + { + // Finding #1: `/* ... */COMMIT` — the first whitespace-delimited token is + // `/*`, never matches the COMMIT keyword, the SQL is passed to the driver, + // and PG/MySQL strip server-side comments and execute the COMMIT. SQLite + // does the same. The registry still tracks the txn handle; subsequent + // calls see TRANSACTION_NOT_FOUND only after the watcher sweep removes + // the entry (1+ second later), or — worse — succeed against a non-tx conn + // until the watcher fires. + name: 'bypass #1: transactionExecute /* */COMMIT must be rejected', + async run({ driver, dialect, call }) { + const ph1 = dialect.placeholder(1); + const table = 'bypass_comment_commit'; + await call('iii-database::execute', { db: driver, sql: `DROP TABLE IF EXISTS ${table}` }); + await call('iii-database::execute', { + db: driver, + sql: `CREATE TABLE ${table} (id ${dialect.idColumnDDL()}, n INT NOT NULL)`, + }); + + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + + try { + // Stage a row inside the txn. Visible from inside, not yet committed. + await call('iii-database::transactionExecute', { + transaction_id: id, + sql: `INSERT INTO ${table} (n) VALUES (${ph1})`, + params: [1], + }); + + // The bypass attempt. The defense MUST reject this — the user must + // be told to use `commitTransaction`. + let rejected = false; + let acceptedErr: unknown = null; + try { + await call('iii-database::transactionExecute', { + transaction_id: id, + sql: '/* sneak */COMMIT', + }); + } catch (e: any) { + const msg = e?.message ?? String(e); + if (msg.includes('INVALID_PARAM')) { + rejected = true; + } else { + acceptedErr = e; + } + } + + if (!rejected) { + // The worker accepted the SQL. Now prove it desynced the state. + // Read from outside the txn — if the COMMIT landed at the driver + // level, the staged row is visible from a fresh connection. + const outside = await call('iii-database::query', { + db: driver, + sql: `SELECT COUNT(*) AS c FROM ${table}`, + }); + const leakedCount = Number(outside.rows[0].c); + throw new Error( + `BYPASS CONFIRMED [#1 comment-prefix]: '/* */COMMIT' was accepted ` + + `by transactionExecute; outside-tx COUNT=${leakedCount} ` + + `(>0 means the side-channel COMMIT landed at the driver, ` + + `desyncing the registry from the connection state)` + + (acceptedErr ? ` — non-INVALID_PARAM error: ${acceptedErr}` : ''), + ); + } + expect(rejected, 'transactionExecute must reject /* */COMMIT with INVALID_PARAM'); + } finally { + try { + await call('iii-database::rollbackTransaction', { transaction_id: id }); + } catch {/* may already be gone if the bypass succeeded */} + await call('iii-database::execute', { db: driver, sql: `DROP TABLE IF EXISTS ${table}` }); + } + }, + }, + { + // Finding #2: `transactionQuery` has NO transaction-control filter at all. + // PG/MySQL/SQLite all happily execute COMMIT through the prepared-statement + // path that `run_prepared` uses. Once COMMIT lands, the registry still + // holds the entry — subsequent calls operate on a now-non-transactional + // pinned connection until the watcher sweeps. + name: 'bypass #2: transactionQuery COMMIT must be rejected', + async run({ driver, dialect, call }) { + const ph1 = dialect.placeholder(1); + const table = 'bypass_query_commit'; + await call('iii-database::execute', { db: driver, sql: `DROP TABLE IF EXISTS ${table}` }); + await call('iii-database::execute', { + db: driver, + sql: `CREATE TABLE ${table} (id ${dialect.idColumnDDL()}, n INT NOT NULL)`, + }); + + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + + try { + await call('iii-database::transactionExecute', { + transaction_id: id, + sql: `INSERT INTO ${table} (n) VALUES (${ph1})`, + params: [2], + }); + + let rejected = false; + let acceptedErr: unknown = null; + try { + await call('iii-database::transactionQuery', { + transaction_id: id, + sql: 'COMMIT', + }); + } catch (e: any) { + const msg = e?.message ?? String(e); + if (msg.includes('INVALID_PARAM')) { + rejected = true; + } else { + acceptedErr = e; + } + } + + if (!rejected) { + const outside = await call('iii-database::query', { + db: driver, + sql: `SELECT COUNT(*) AS c FROM ${table}`, + }); + const leakedCount = Number(outside.rows[0].c); + throw new Error( + `BYPASS CONFIRMED [#2 transactionQuery]: 'COMMIT' was accepted by ` + + `transactionQuery (no is_transaction_control_sql guard exists ` + + `on the query path); outside-tx COUNT=${leakedCount} ` + + `(>0 means the side-channel COMMIT landed at the driver)` + + (acceptedErr ? ` — non-INVALID_PARAM error: ${acceptedErr}` : ''), + ); + } + expect(rejected, 'transactionQuery must reject COMMIT with INVALID_PARAM'); + } finally { + try { + await call('iii-database::rollbackTransaction', { transaction_id: id }); + } catch {/* ignore */} + await call('iii-database::execute', { db: driver, sql: `DROP TABLE IF EXISTS ${table}` }); + } + }, + }, + { + // Finding #3: `START TRANSACTION` is the MySQL/ANSI SQL-92 spelling of + // `BEGIN`. The keyword set in `is_transaction_control_sql` only catches + // `BEGIN`, not `START`. On MySQL, `START TRANSACTION` while inside an + // existing txn implicitly COMMITs the in-progress one and opens a new + // one — directly breaking the atomicity contract. + // + // On Postgres, `START TRANSACTION` inside a txn issues a NOTICE and + // no-ops, so the worst case is "user is mildly confused" rather than + // "registry desyncs"; we still expect the worker to reject for + // consistency with how `BEGIN` is handled. Gated to mysql_db where the + // observable atomicity violation lives. + name: 'bypass #3: transactionExecute START TRANSACTION must be rejected (mysql)', + applies: ['mysql_db'], + async run({ driver, dialect, call }) { + const ph1 = dialect.placeholder(1); + const table = 'bypass_start_transaction'; + await call('iii-database::execute', { db: driver, sql: `DROP TABLE IF EXISTS ${table}` }); + await call('iii-database::execute', { + db: driver, + sql: `CREATE TABLE ${table} (id ${dialect.idColumnDDL()}, n INT NOT NULL)`, + }); + + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + + try { + // Stage a row in the OUTER txn that — on a healthy worker — would be + // discarded when we `rollbackTransaction` below. If `START TRANSACTION` + // is accepted, MySQL implicitly COMMITs that row before opening the + // new (untracked) txn, leaking the row past the eventual rollback. + await call('iii-database::transactionExecute', { + transaction_id: id, + sql: `INSERT INTO ${table} (n) VALUES (${ph1})`, + params: [3], + }); + + let rejected = false; + let acceptedErr: unknown = null; + try { + await call('iii-database::transactionExecute', { + transaction_id: id, + sql: 'START TRANSACTION', + }); + } catch (e: any) { + const msg = e?.message ?? String(e); + if (msg.includes('INVALID_PARAM')) { + rejected = true; + } else { + acceptedErr = e; + } + } + + if (!rejected) { + // Roll back the (now-second) txn the user thinks they're in. + try { + await call('iii-database::rollbackTransaction', { transaction_id: id }); + } catch {/* ignore */} + + const outside = await call('iii-database::query', { + db: driver, + sql: `SELECT COUNT(*) AS c FROM ${table}`, + }); + const leakedCount = Number(outside.rows[0].c); + throw new Error( + `BYPASS CONFIRMED [#3 START TRANSACTION on MySQL]: ` + + `'START TRANSACTION' was accepted by transactionExecute; ` + + `outside-tx COUNT=${leakedCount} after a rollback that should ` + + `have undone the staged INSERT (>0 means MySQL implicitly ` + + `COMMITed the outer txn when START TRANSACTION ran)` + + (acceptedErr ? ` — non-INVALID_PARAM error: ${acceptedErr}` : ''), + ); + } + expect(rejected, 'transactionExecute must reject START TRANSACTION with INVALID_PARAM'); + } finally { + try { + await call('iii-database::rollbackTransaction', { transaction_id: id }); + } catch {/* may already be gone */} + await call('iii-database::execute', { db: driver, sql: `DROP TABLE IF EXISTS ${table}` }); + } + }, + }, + { + // Extra repro: `--COMMIT` (line-comment-prefixed) — same shape as #1 but + // using `--` instead of `/* */`. The first token after `trim_start_matches` + // is `--COMMIT`, doesn't match the keyword set, falls through. Postgres + // strips line comments server-side and executes COMMIT. + name: 'bypass #1b: transactionExecute --COMMIT (line-comment) must be rejected', + // Skipped on sqlite_db: rusqlite's prepare on a sql string consisting of + // only `--text\n…COMMIT` may interpret the line-comment + COMMIT as a + // single statement OR raise its multi-statement guard; behavior is less + // predictable. PG/MySQL both reliably strip the line comment. + applies: ['pg_db', 'mysql_db'], + async run({ driver, dialect, call }) { + const ph1 = dialect.placeholder(1); + const table = 'bypass_line_comment'; + await call('iii-database::execute', { db: driver, sql: `DROP TABLE IF EXISTS ${table}` }); + await call('iii-database::execute', { + db: driver, + sql: `CREATE TABLE ${table} (id ${dialect.idColumnDDL()}, n INT NOT NULL)`, + }); + + const begin = await call('iii-database::beginTransaction', { db: driver }); + const id = begin.transaction.id; + + try { + await call('iii-database::transactionExecute', { + transaction_id: id, + sql: `INSERT INTO ${table} (n) VALUES (${ph1})`, + params: [4], + }); + + let rejected = false; + let acceptedErr: unknown = null; + try { + // Newline is required after `--` so the server sees COMMIT as a + // statement; otherwise the comment swallows the rest of the line. + await call('iii-database::transactionExecute', { + transaction_id: id, + sql: '-- sneak\nCOMMIT', + }); + } catch (e: any) { + const msg = e?.message ?? String(e); + if (msg.includes('INVALID_PARAM')) { + rejected = true; + } else { + acceptedErr = e; + } + } + + if (!rejected) { + const outside = await call('iii-database::query', { + db: driver, + sql: `SELECT COUNT(*) AS c FROM ${table}`, + }); + const leakedCount = Number(outside.rows[0].c); + throw new Error( + `BYPASS CONFIRMED [#1b line-comment]: '-- sneak\\nCOMMIT' was ` + + `accepted by transactionExecute; outside-tx COUNT=${leakedCount}` + + (acceptedErr ? ` — non-INVALID_PARAM error: ${acceptedErr}` : ''), + ); + } + expectEqual(rejected, true, 'transactionExecute must reject --COMMIT'); + } finally { + try { + await call('iii-database::rollbackTransaction', { transaction_id: id }); + } catch {/* ignore */} + await call('iii-database::execute', { db: driver, sql: `DROP TABLE IF EXISTS ${table}` }); + } + }, + }, +]; diff --git a/iii-database/tests/e2e/workers/harness/src/cases.ts b/iii-database/tests/e2e/workers/harness/src/cases.ts index 8f42a4be..1712ac00 100644 --- a/iii-database/tests/e2e/workers/harness/src/cases.ts +++ b/iii-database/tests/e2e/workers/harness/src/cases.ts @@ -6,10 +6,6 @@ export interface CaseContext { dialect: Dialect; /** Calls a database worker function; returns parsed JSON or throws on engine error. */ call: (functionId: string, payload: unknown) => Promise; - /** Resolves once N rows for `driver` have arrived via the query-poll sink. */ - waitForRows: (n: number, timeoutMs: number) => Promise>>; - /** Resets the per-driver received-rows buffer used by `waitForRows`. */ - resetReceived: () => void; /** Direct SDK access for trigger-config edge-case tests. */ iii: ISdk; /** @@ -19,11 +15,6 @@ export interface CaseContext { * strict JSON parsing across SDK versions. */ expectError: (fn: () => Promise, expectedCode: string) => Promise; - /** - * Resolves true if NO rows arrive on the polling sink within `timeoutMs`. - * Used by trigger validation tests to assert a broken trigger never dispatches. - */ - expectSilence: (timeoutMs: number) => Promise; } export interface TestCase { @@ -46,23 +37,17 @@ export function expect(cond: boolean, msg: string): asserts cond { export const SCHEMA_RESET: TestCase = { name: 'schema-reset', async run({ driver, dialect, call }) { + // `outbox` and `__iii_cursors` are vestigial from the query-poll trigger + // surface that was removed on feat/database-and-skills. Drop defensively + // so re-runs against a stale data volume (docker / podman named volumes + // preserved across `--keep`) still come up clean. await call('iii-database::execute', { db: driver, sql: 'DROP TABLE IF EXISTS outbox' }); - await call('iii-database::execute', { db: driver, sql: 'DROP TABLE IF EXISTS t' }); - // The query-poll trigger persists cursor state in __iii_cursors. Without - // dropping it here, stale cursor values from a prior run survive (Postgres - // and MySQL via docker volumes; SQLite via ./data/iii.db) and cause the - // first poll to filter out the freshly-inserted ids — producing a "got 0 - // rows" timeout. Dropping the table here makes the test idempotent across - // runs without requiring a manual `docker compose down -v && rm data/iii.db`. await call('iii-database::execute', { db: driver, sql: 'DROP TABLE IF EXISTS __iii_cursors' }); + await call('iii-database::execute', { db: driver, sql: 'DROP TABLE IF EXISTS t' }); await call('iii-database::execute', { db: driver, sql: `CREATE TABLE t (id ${dialect.idColumnDDL()}, n INT NOT NULL)`, }); - await call('iii-database::execute', { - db: driver, - sql: `CREATE TABLE outbox (id ${dialect.idColumnDDL()}, body TEXT NOT NULL)`, - }); }, }; @@ -166,36 +151,3 @@ export const FUNCTION_CASES: TestCase[] = [ }, }, ]; - -export const POLLING_CASE: TestCase = { - name: 'query-poll dispatches new rows incrementally', - async run({ driver, dialect, call, waitForRows, resetReceived }) { - resetReceived(); - const ph1 = dialect.placeholder(1); - const ph2 = dialect.placeholder(2); - const ph3 = dialect.placeholder(3); - // Seed 3 rows. - await call('iii-database::execute', { - db: driver, - sql: `INSERT INTO outbox (body) VALUES (${ph1}), (${ph2}), (${ph3})`, - params: ['a', 'b', 'c'], - }); - - const first = await waitForRows(3, 5_000); - expectEqual(first.length, 3, 'first batch row count'); - const bodies1 = first.map((r) => r.body); - expectEqual(bodies1, ['a', 'b', 'c'], 'first batch body order'); - - // Insert 2 more after the first batch was acked. - resetReceived(); - await call('iii-database::execute', { - db: driver, - sql: `INSERT INTO outbox (body) VALUES (${ph1}), (${ph2})`, - params: ['d', 'e'], - }); - const second = await waitForRows(2, 5_000); - expectEqual(second.length, 2, 'second batch row count'); - const bodies2 = second.map((r) => r.body); - expectEqual(bodies2, ['d', 'e'], 'second batch is delta only'); - }, -}; diff --git a/iii-database/tests/e2e/workers/harness/src/runner.ts b/iii-database/tests/e2e/workers/harness/src/runner.ts index 6e9b4373..cfe6c790 100644 --- a/iii-database/tests/e2e/workers/harness/src/runner.ts +++ b/iii-database/tests/e2e/workers/harness/src/runner.ts @@ -5,16 +5,16 @@ import { DRIVER_KEYS, dialects, type DriverKey } from './dialect.ts'; import { SCHEMA_RESET, FUNCTION_CASES, - POLLING_CASE, type CaseContext, type TestCase, } from './cases.ts'; import { BOUNDARY_CASES } from './cases-boundary.ts'; import { PROTOCOL_CASES } from './cases-protocol.ts'; import { TRANSACTION_EDGE_CASES } from './cases-transaction.ts'; +import { INTERACTIVE_TX_CASES } from './cases-interactive-tx.ts'; import { CONCURRENCY_CASES } from './cases-concurrency.ts'; -import { TRIGGER_CASES } from './cases-trigger.ts'; import { ROW_CHANGE_CASES } from './cases-row-change.ts'; +import { TX_CONTROL_BYPASS_CASES } from './cases-tx-control-bypass.ts'; interface CaseResult { driver: DriverKey; @@ -24,75 +24,29 @@ interface CaseResult { duration_ms: number; } -type Pending = { - n: number; - resolve: (rows: Array>) => void; - reject: (err: Error) => void; - timer: NodeJS.Timeout; -}; - -type ReceivedBuffer = { - rows: Array>; - pending: Pending[]; -}; +/** + * Harness mode controls which case groups run: + * + * - `full` — full suite + bypass repros (default; this is what CI runs) + * - `no-bypass` — full suite WITHOUT the bypass repros. Useful when running + * against a worker that hasn't shipped the side-channel- + * finalization fix yet, so the rest of the suite still + * reports clean instead of being drowned in bypass FAILs. + * - `bypass-only` — ONLY the bypass repros. Use this to focus on the + * /review findings (post-fix it must be all-PASS). + */ +export type RunnerMode = 'full' | 'no-bypass' | 'bypass-only'; export interface RunnerOptions { iii: ISdk; reportPath: string; filterDriver?: DriverKey; + mode?: RunnerMode; } export class Runner { - private buffers: Record = { - sqlite_db: { rows: [], pending: [] }, - pg_db: { rows: [], pending: [] }, - mysql_db: { rows: [], pending: [] }, - }; - - /** - * Active poll-trigger handles keyed by driver. We track them so we can - * unregister at end-of-run; without that, every harness invocation against - * a long-running worker leaves a zombie polling task alive in the worker - * process. On the next run, the zombie keeps polling against freshly-reset - * tables and races with the new task that registerPollingTrigger spawns, - * producing "second batch is delta only" failures even though the trigger - * implementation itself is correct. - */ - private triggers: Partial void }>> = {}; - constructor(private opts: RunnerOptions) {} - /** Sink for `iii-database::query-poll` dispatches; routes by `payload.db`. */ - onOutboxBatch = async (payload: any): Promise<{ ack: boolean; commit_cursor?: string }> => { - const db = String(payload?.db ?? '') as DriverKey; - if (!this.buffers[db]) { - console.error(`[harness] unexpected db in dispatch: ${db}`); - return { ack: false }; - } - const rows = (payload.rows ?? []) as Array>; - const buf = this.buffers[db]; - buf.rows.push(...rows); - - // Compute max id seen so the worker advances the cursor. - let maxId: number | undefined; - for (const r of buf.rows) { - const id = Number((r as any).id); - if (Number.isFinite(id)) maxId = maxId === undefined ? id : Math.max(maxId, id); - } - - // Resolve any waiters that are now satisfied. - buf.pending = buf.pending.filter((p) => { - if (buf.rows.length >= p.n) { - clearTimeout(p.timer); - p.resolve(buf.rows.slice(0, p.n)); - return false; - } - return true; - }); - - return { ack: true, commit_cursor: maxId !== undefined ? String(maxId) : undefined }; - }; - private async callOnce(functionId: string, payload: unknown): Promise { return await this.opts.iii.trigger({ function_id: functionId, payload }); } @@ -110,36 +64,12 @@ export class Runner { throw lastErr; } - private waitForRows(driver: DriverKey, n: number, timeoutMs: number): Promise>> { - const buf = this.buffers[driver]; - return new Promise((resolveP, rejectP) => { - if (buf.rows.length >= n) { - resolveP(buf.rows.slice(0, n)); - return; - } - const timer = setTimeout(() => { - buf.pending = buf.pending.filter((p) => p.resolve !== resolveP); - rejectP(new Error(`timeout waiting for ${n} rows on ${driver} (got ${buf.rows.length})`)); - }, timeoutMs); - buf.pending.push({ n, resolve: resolveP, reject: rejectP, timer }); - }); - } - - private resetReceived(driver: DriverKey): void { - const buf = this.buffers[driver]; - for (const p of buf.pending) clearTimeout(p.timer); - buf.pending = []; - buf.rows = []; - } - private async runCase(driver: DriverKey, c: TestCase): Promise { const start = Date.now(); const ctx: CaseContext = { driver, dialect: dialects[driver], call: (id, payload) => this.callOnce(id, payload), - waitForRows: (n, t) => this.waitForRows(driver, n, t), - resetReceived: () => this.resetReceived(driver), iii: this.opts.iii, expectError: async (fn, expectedCode) => { try { @@ -153,19 +83,6 @@ export class Runner { } throw new Error(`expected throw with code "${expectedCode}", but call resolved`); }, - expectSilence: async (timeoutMs) => { - // Reset, wait the window, then assert the per-driver buffer is still empty. - // Used by trigger validation tests to prove a broken trigger never dispatches. - const buf = this.buffers[driver]; - const startLen = buf.rows.length; - await new Promise((r) => setTimeout(r, timeoutMs)); - const drift = buf.rows.length - startLen; - if (drift > 0) { - throw new Error( - `expected silence for ${timeoutMs}ms but received ${drift} rows; latest=${JSON.stringify(buf.rows.slice(-Math.min(drift, 3)))}`, - ); - } - }, }; try { await c.run(ctx); @@ -186,38 +103,8 @@ export class Runner { await this.callWithRetry('iii-database::query', { db: driver, sql: 'SELECT 1' }); } - private async registerPollingTrigger(driver: DriverKey): Promise { - const ph1 = dialects[driver].placeholder(1); - const handle = this.opts.iii.registerTrigger({ - type: 'iii-database::query-poll', - function_id: 'harness::on_outbox_row', - config: { - trigger_id: `harness-poll-${driver}`, - db: driver, - sql: `SELECT id, body FROM outbox WHERE id > COALESCE(${ph1}, 0) ORDER BY id LIMIT 50`, - interval_ms: 500, - cursor_column: 'id', - }, - }); - this.triggers[driver] = handle; - } - - /** Unregister all active poll triggers. Idempotent. */ - private async unregisterAllTriggers(): Promise { - for (const driver of DRIVER_KEYS) { - const t = this.triggers[driver]; - if (t) { - try { - t.unregister(); - } catch (e) { - console.error(`[harness] unregister ${driver}: ${e}`); - } - delete this.triggers[driver]; - } - } - } - async runAll(): Promise<{ pass: number; total: number; results: CaseResult[] }> { + const mode: RunnerMode = this.opts.mode ?? 'full'; const drivers: DriverKey[] = this.opts.filterDriver ? [this.opts.filterDriver] : [...DRIVER_KEYS]; // Wait for the database worker to be reachable on the first driver before kicking off. await this.waitForDatabaseWorker(drivers[0]); @@ -245,71 +132,59 @@ export class Runner { return r; }; - for (const driver of drivers) { - // Always run the schema reset; not a counted case but failures abort this driver. - const reset = record(await this.runCase(driver, SCHEMA_RESET)); - if (reset.status === 'FAIL') continue; - - // Function suite (6 cases). - for (const c of FUNCTION_CASES) { - record(await this.runCase(driver, c)); - } + // The full suite (`full` and `no-bypass`) needs the shared `t` table from + // SCHEMA_RESET. `bypass-only` doesn't, since every bypass case manages its + // own scratch table — skip the reset to avoid letting a stale schema from + // a prior run block the focused repro. + const includeFullSuite = mode === 'full' || mode === 'no-bypass'; + const includeBypassRepros = mode === 'full' || mode === 'bypass-only'; - // Boundary, protocol, transaction-edge, concurrency, row-change cases. - // Each test is self-contained (creates and drops its own scratch tables - // / replication slots) so order doesn't matter. - for (const c of [ - ...BOUNDARY_CASES, - ...PROTOCOL_CASES, - ...TRANSACTION_EDGE_CASES, - ...CONCURRENCY_CASES, - ...ROW_CHANGE_CASES, - ]) { - if (!matchesDriver(driver, c)) continue; - record(await this.runCase(driver, c)); - } + for (const driver of drivers) { + if (includeFullSuite) { + // Always run the schema reset; not a counted case but failures abort this driver. + const reset = record(await this.runCase(driver, SCHEMA_RESET)); + if (reset.status === 'FAIL') continue; + + // Function suite. + for (const c of FUNCTION_CASES) { + record(await this.runCase(driver, c)); + } - // Register the per-driver query-poll trigger before the polling case. - try { - await this.registerPollingTrigger(driver); - } catch (e: any) { - record({ - driver, - case: 'register-poll-trigger', - status: 'FAIL', - error: e?.message ?? String(e), - duration_ms: 0, - }); - continue; + // Boundary, protocol, transaction-edge, interactive-tx, concurrency, + // row-change cases. Each test is self-contained (creates and drops + // its own scratch tables / replication slots) so order doesn't matter. + for (const c of [ + ...BOUNDARY_CASES, + ...PROTOCOL_CASES, + ...TRANSACTION_EDGE_CASES, + ...INTERACTIVE_TX_CASES, + ...CONCURRENCY_CASES, + ...ROW_CHANGE_CASES, + ]) { + if (!matchesDriver(driver, c)) continue; + record(await this.runCase(driver, c)); + } } - record(await this.runCase(driver, POLLING_CASE)); - - // Trigger validation cases (sqlite_db only). Run after the polling case - // so the long-lived polling trigger has already been registered and the - // per-driver buffer state is well-defined. Each trigger case unregisters - // its own ad-hoc trigger; the long-lived polling trigger keeps running. - // We pause the long-lived polling trigger's effect by relying on a - // unique scratch table per case (so the polling trigger's outbox SELECT - // is unaffected by trigger-test inserts), and reset the buffer at the - // start of each case via resetReceived(). - for (const c of TRIGGER_CASES) { - if (!matchesDriver(driver, c)) continue; - record(await this.runCase(driver, c)); + if (includeBypassRepros) { + // Side-channel-finalization repros are kept as their own group so + // they can be re-run in isolation via `--bypass-only` and appear + // together in the per-case summary as the documented coverage for + // the /review findings on this branch. + for (const c of TX_CONTROL_BYPASS_CASES) { + if (!matchesDriver(driver, c)) continue; + record(await this.runCase(driver, c)); + } } } - // Cleanup: unregister all active triggers so re-running the harness - // against the same worker process doesn't leave zombie pollers running. - await this.unregisterAllTriggers(); - - const counted = results.filter((r) => r.case !== 'schema-reset' && r.case !== 'register-poll-trigger'); + const counted = results.filter((r) => r.case !== 'schema-reset'); const pass = counted.filter((r) => r.status === 'PASS').length; mkdirSync(resolve(this.opts.reportPath, '..'), { recursive: true }); writeFileSync( this.opts.reportPath, - JSON.stringify({ pass, total: counted.length, results }, null, 2), + JSON.stringify({ pass, total: counted.length, results, mode }, null, 2), ); return { pass, total: counted.length, results }; diff --git a/iii-database/tests/e2e/workers/harness/src/worker.ts b/iii-database/tests/e2e/workers/harness/src/worker.ts index 4cde2934..27bf5664 100644 --- a/iii-database/tests/e2e/workers/harness/src/worker.ts +++ b/iii-database/tests/e2e/workers/harness/src/worker.ts @@ -1,23 +1,29 @@ import { registerWorker, Logger } from 'iii-sdk'; import { resolve } from 'node:path'; -import { Runner } from './runner.ts'; +import { Runner, type RunnerMode } from './runner.ts'; import type { DriverKey } from './dialect.ts'; const URL = process.env.III_URL ?? 'ws://127.0.0.1:49134'; const REPORT_PATH = resolve(process.env.HARNESS_REPORT_PATH ?? './reports/report.json'); const FILTER = process.env.HARNESS_FILTER as DriverKey | undefined; +// `HARNESS_MODE` controls which case groups run. Validated here so a typo in +// run-tests.sh becomes "ran the full suite" rather than silently disabling +// the bypass repros that CI relies on. +const MODE_ENV = (process.env.HARNESS_MODE ?? 'full') as string; +const MODE: RunnerMode = + MODE_ENV === 'bypass-only' || MODE_ENV === 'no-bypass' ? MODE_ENV : 'full'; + const iii = registerWorker(URL); const logger = new Logger(); -const runner = new Runner({ iii, reportPath: REPORT_PATH, filterDriver: FILTER }); - -iii.registerFunction( - 'harness::on_outbox_row', - async (payload: unknown) => runner.onOutboxBatch(payload), - { description: 'Sink for iii-database::query-poll dispatches; routes by payload.db.' }, -); +const runner = new Runner({ iii, reportPath: REPORT_PATH, filterDriver: FILTER, mode: MODE }); -logger.info('harness: registered, kicking off suite', { url: URL, filter: FILTER ?? 'all', reportPath: REPORT_PATH }); +logger.info('harness: registered, kicking off suite', { + url: URL, + filter: FILTER ?? 'all', + mode: MODE, + reportPath: REPORT_PATH, +}); (async () => { // ANSI colors only when stdout is a TTY — run-tests.sh redirects to a log file, @@ -40,15 +46,6 @@ logger.info('harness: registered, kicking off suite', { url: URL, filter: FILTER console.log(`HARNESS_DONE: ${RED}FAIL${RESET} 0/0`); exitCode = 1; } - // runAll() called runner.unregisterAllTriggers() which writes UnregisterTrigger - // messages to the websocket synchronously. The SDK's Trigger.unregister() is - // fire-and-forget — sendMessage queues bytes but doesn't await the engine ACK. - // Without this drain step, process.exit() terminates before the OS flushes - // the TCP send buffer, the database worker never sees the unregister, and its - // QueryPollTrigger tasks keep polling — causing the engine to log - // "Function not found: harness::on_outbox_row" every 500ms until the worker - // is restarted (or the next harness run evicts the zombie via trigger_id dedup). - // // 200ms grace lets the OS flush ws bytes; iii.shutdown() then closes the ws // and drains OTel queues. iii.shutdown() itself does NOT await the ws close // handshake, hence the explicit delay. diff --git a/iii-database/tests/integration.rs b/iii-database/tests/integration.rs index 5be3ebda..dba1429b 100644 --- a/iii-database/tests/integration.rs +++ b/iii-database/tests/integration.rs @@ -10,7 +10,8 @@ use iii_database::handlers::run_statement::RunReq; use iii_database::handlers::transaction::TxReq; use iii_database::handlers::{execute, prepare, query, run_statement, transaction, AppState}; use iii_database::pool; -use iii_sdk::RegisterFunction; +use iii_database::transaction::TxRegistry; +use iii_sdk::{Logger, RegisterFunction}; use serde_json::json; use std::collections::HashMap; use std::sync::Arc; @@ -26,6 +27,8 @@ async fn build_state() -> AppState { AppState { pools: Arc::new(pools), handles: Arc::new(HandleRegistry::new()), + transactions: TxRegistry::new(), + log: Logger::new(), } } @@ -172,10 +175,40 @@ fn registered_functions_carry_request_and_response_schemas() { async fn _t(_: TxReq) -> Result { unreachable!() } + async fn _bt( + _: iii_database::handlers::begin_transaction::BeginTxReq, + ) -> Result { + unreachable!() + } + async fn _tq( + _: iii_database::handlers::transaction_query::TxQueryReq, + ) -> Result { + unreachable!() + } + async fn _te( + _: iii_database::handlers::transaction_execute::TxExecuteReq, + ) -> Result { + unreachable!() + } + async fn _ct( + _: iii_database::handlers::commit_transaction::CommitTxReq, + ) -> Result { + unreachable!() + } + async fn _rt( + _: iii_database::handlers::rollback_transaction::RollbackTxReq, + ) -> Result { + unreachable!() + } assert_schemas("iii-database::query", _q); assert_schemas("iii-database::execute", _e); assert_schemas("iii-database::prepareStatement", _p); assert_schemas("iii-database::runStatement", _r); assert_schemas("iii-database::transaction", _t); + assert_schemas("iii-database::beginTransaction", _bt); + assert_schemas("iii-database::transactionQuery", _tq); + assert_schemas("iii-database::transactionExecute", _te); + assert_schemas("iii-database::commitTransaction", _ct); + assert_schemas("iii-database::rollbackTransaction", _rt); }