diff --git a/.gitignore b/.gitignore index 300607e..290fa6b 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ test.db *.tfplan *.tfstate .infracost/ +tests/load/results/ diff --git a/Dockerfile b/Dockerfile index 1f5aaec..296276d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,10 @@ FROM ghcr.io/astral-sh/uv:python3.14-bookworm-slim AS buildbox RUN apt-get update && apt-get install -y apt-transport-https curl gnupg2 git gcc -RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - -RUN curl https://packages.microsoft.com/config/ubuntu/18.04/prod.list > /etc/apt/sources.list.d/mssql-release.list +RUN curl -sSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /usr/share/keyrings/microsoft-prod.gpg +RUN curl -sSL https://packages.microsoft.com/config/debian/12/prod.list \ + | sed 's|deb \[|deb [signed-by=/usr/share/keyrings/microsoft-prod.gpg |' \ + > /etc/apt/sources.list.d/mssql-release.list RUN apt-get update \ && apt-get install -y -V openssh-client \ && apt-get install -y -V tesseract-ocr \ diff --git a/app/server/config.py b/app/server/config.py index d838d60..74fa207 100644 --- a/app/server/config.py +++ b/app/server/config.py @@ -91,7 +91,15 @@ class QueueConfig(BaseModel): task: TaskConfig = TaskConfig() store: StoreConfig = RedisConfig() broker: BrokerConfig = RedisConfig() - concurrency: int = 10 + concurrency: int = 5 + max_memory_per_child: int = 1024 * 750 + """The maximum memory per child in KiB. + + The default is tuned to a container with 4GB of RAM. + Before changing this, make sure to consider which celery + worker pool class is used, and what the `concurrency` setting is. + The point is to kill a child process before the container is OOM-killed. + """ class ExperimentsConfig(BaseModel): diff --git a/cli/cli.py b/cli/cli.py index ce2dbf8..b6a9ecb 100644 --- a/cli/cli.py +++ b/cli/cli.py @@ -156,7 +156,11 @@ def worker( with get_liveness_app(host=liveness_host, port=liveness_port).run_in_thread(): name = f"w{liveness_port}@{socket.gethostname()}" queue.Worker( - task_events=monitor, hostname=name, concurrency=concurrency + task_events=monitor, + hostname=name, + concurrency=concurrency, + pool="prefork", + max_memory_per_child=config.queue.max_memory_per_child, ).start() diff --git a/pyproject.toml b/pyproject.toml index 55b9723..25d9089 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "opentelemetry-instrumentation-celery<1.0,>=0.49b1", "certifi<2025.0.0,>=2024.12.14", "alligater>=0.5.0,<1.0.0", - "kombu @ git+https://github.com/jnu/kombu.git@a37798c43c740ac7bc8253915588f675e45bd04b", + "kombu==5.6.2", "celery>=5.6,<6.0.0", "tiktoken>=0.13.0", "bc2", @@ -76,3 +76,4 @@ ignore_errors = true [tool.uv.sources] bc2 = { git = "https://github.com/comppolicylab/bc2.git" } +kombu = { git = "https://github.com/jnu/kombu", rev = "a37798c43c740ac7bc8253915588f675e45bd04b" } diff --git a/tests/load/README.md b/tests/load/README.md new file mode 100644 index 0000000..801dab5 --- /dev/null +++ b/tests/load/README.md @@ -0,0 +1,197 @@ +Load testing +=== + +This directory contains a load-testing harness for investigating performance +and memory problems in the redaction pipeline -- in particular the reported +**out-of-memory (OOM)** failures when redacting **large documents submitted as +`BASE64`** (inline) attachments. + +Unlike the unit/integration tests, this harness runs a *real*, deployment-shaped +stack (real Redis broker + result store, a real Celery worker, the real API +server) so the memory behavior matches production. + +## Why BASE64 is the focus + +With the `LINK` attachment type only a URL travels through the system and the +worker streams the download. With `BASE64`, the entire document is carried +**inline as a Celery task argument**: it is buffered by the API, serialized into +a Celery message, stored in the Redis broker, then re-loaded and decoded by the +worker -- several full-size copies of an already-inflated (~1.33x) payload, with +up to `concurrency` of them in flight at once. That amplification is the prime +suspect for the OOM reports, and this harness exercises exactly that path. + +## Comparing BASE64 vs LINK + +To test the theory that the `LINK` path is far more scalable, pass +`--attachment-type link`. In that mode the driver spins up a temporary local +HTTP file server over the docs directory and hands the worker a +`host.docker.internal` URL to pull each document from -- so only a URL (not the +document bytes) travels through the API/Redis/Celery message path. Run the same +load with both attachment types and `--compare` the resulting worker memory: + +```bash +uv run python tests/load/run_load.py /path/to/big/pdfs \ + --concurrency 8 --num-requests 16 --label base64 +uv run python tests/load/run_load.py /path/to/big/pdfs \ + --concurrency 8 --num-requests 16 --attachment-type link --label link +uv run python tests/load/run_load.py --compare +``` + +The local doc server is reachable from the worker container the same way the +callback server is (via `host.docker.internal`), so no compose changes are +needed. + +## What's here + +| File | Purpose | +| --- | --- | +| `docker-compose.load.yml` | Real `redis` + `api` + `worker` (+ one-shot `init`) with **configurable, hard memory limits** so OOM is reproducible on a laptop. | +| `config.load.toml` | Offline app config (tesseract OCR + `redact:noop`, no auth, no API keys). | +| `run_load.py` | Driver: sends concurrent redaction requests (BASE64 inline or LINK via a local file server), receives callbacks, samples container memory over time, detects OOM kills, prints a report, and writes per-run artifacts (CSV + JSON + memory graph). | +| `results/` | Per-run artifacts, one timestamped subdirectory per run (git-ignored). | + +## Sample documents + +The harness needs large sample PDFs, which we **cannot** check into the repo. +Put them in a directory on your machine and pass that directory as the first +(positional) argument to the driver. Every matching file in that directory +(default: `*.pdf`) is sent as a redaction request (as a `BASE64` inline +attachment by default, or via a `LINK` URL with `--attachment-type link`). + +## Prerequisites + +- Docker (Docker Desktop on macOS; `host.docker.internal` is used so the worker + can call back to the driver running on the host). +- The project's Python env (`uv`). + +## Usage + +1. Bring up the stack (first run also builds the image): + +```bash +cd tests/load +docker compose -f docker-compose.load.yml up --build -d +``` + +2. Run the driver from the **repo root**, passing your docs directory: + +```bash +uv run python tests/load/run_load.py /path/to/big/pdfs \ + --concurrency 8 --num-requests 16 +``` + +3. Inspect logs if something crashed, then tear down: + +```bash +docker compose -f tests/load/docker-compose.load.yml logs worker +docker compose -f tests/load/docker-compose.load.yml down -v +``` + +## Tuning memory limits (reproducing OOM) + +The container memory limits are intentionally small and configurable so you can +dial them down until OOM reproduces. Swap is pinned to the memory limit so the +kernel OOM-killer fires (container exits with code **137** / `State.OOMKilled`) +instead of silently swapping. Set these env vars before `docker compose up`: + +| Env var | Default | Meaning | +| --- | --- | --- | +| `BC_API_MEM` | `768m` | API container memory limit | +| `BC_WORKER_MEM` | `1g` | Worker container memory limit | +| `BC_REDIS_MEM` | `1g` | Redis container memory limit | +| `BC_WORKER_CONCURRENCY` | `4` | Celery worker concurrency (parallel docs in memory) | +| `BC_API_WORKERS` | `1` | Uvicorn workers in the API | + +Example -- squeeze the worker to make OOM likely: + +```bash +BC_WORKER_MEM=512m BC_WORKER_CONCURRENCY=8 \ + docker compose -f docker-compose.load.yml up --build -d +``` + +## Driver options + +The docs directory is a required positional argument (`run_load.py `). +The remaining options also read from an env var: + +| Option | Env var | Default | +| --- | --- | --- | +| `--api-url` | `BC_API_URL` | `http://localhost:8000` | +| `--concurrency` | `BC_LOAD_CONCURRENCY` | `4` | +| `--num-requests` | `BC_LOAD_NUM_REQUESTS` | `0` (one per doc) | +| `--output-format` | `BC_LOAD_OUTPUT_FORMAT` | `PDF` (most memory-intensive) | +| `--attachment-type` | `BC_LOAD_ATTACHMENT_TYPE` | `base64` (or `link`) | +| `--doc-server-host` | `BC_LOAD_DOC_SERVER_HOST` | `0.0.0.0` (link mode only) | +| `--doc-server-port` | `BC_LOAD_DOC_SERVER_PORT` | `9998` (link mode only) | +| `--completion-timeout` | `BC_LOAD_COMPLETION_TIMEOUT` | `600` (seconds) | +| `--extensions` | `BC_LOAD_EXTENSIONS` | `.pdf` | +| `--callback-port` | `BC_LOAD_CALLBACK_PORT` | `9999` | +| `--callback-url-host` | `BC_LOAD_CALLBACK_URL_HOST` | `host.docker.internal` | +| `--results-dir` | `BC_LOAD_RESULTS_DIR` | `tests/load/results` | +| `--label` | `BC_LOAD_LABEL` | `` (empty) | +| `--no-graph` | `BC_LOAD_NO_GRAPH` | off (graph is written) | + +## Per-run artifacts & comparing runs + +Every run writes a timestamped directory under `--results-dir` (default +`tests/load/results/-