benedictbrady · benedictbrady · May 1, 2026 · May 1, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -39,9 +39,8 @@ jobs:
       - run: uv run pytest -q --strict-markers -m "not integration"
 
   install-from-wheel:
-    # Catches importlib.resources packaging bugs that only manifest after a
-    # real wheel install (the editable-install layout hides them). Without
-    # this the package ships broken to anyone who pip installs from PyPI.
+    # Catches import bugs that only manifest after a real wheel install. The
+    # benchmark data itself is intentionally repo-level under experiments/.
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4

diff --git a/.gitleaks.toml b/.gitleaks.toml
@@ -14,7 +14,6 @@ and gitleaks' generic-api-key heuristic flags them as high-entropy strings.
 .env files with real provider keys are kept out via .gitignore.
 """
 paths = [
-    '''src/philosophy_bench/data/scenarios/.*''',
     '''results/.*''',
     '''experiments/.*/data/scenarios/.*''',
     '''experiments/.*/results/.*''',

diff --git a/README.md b/README.md
@@ -33,7 +33,9 @@ Benedict Brady.
 ## Install
 
 ```bash
-uv pip install philosophy-bench
+git clone https://github.com/benedictbrady/philosophy-bench
+cd philosophy-bench
+uv sync
 cp .env.example .env       # add at least one provider key
 ```
 
@@ -43,18 +45,16 @@ produce a clear error at the first API call, not at import time.
 ## Quickstart
 
 ```bash
-philosophy-bench models                    # list registered models (29)
-philosophy-bench scenarios                 # validate the default C-vs-D corpus
-philosophy-bench run -m opus-4.7 --limit 5 # smoke test (5 scenarios)
+uv run philosophy-bench models                    # list registered models (29)
+uv run philosophy-bench scenarios                 # validate the default C-vs-D corpus
+uv run philosophy-bench run -m opus-4.7 --limit 5 # smoke test (5 scenarios)
 ```
 
 For development:
 
 ```bash
-git clone https://github.com/benedictbrady/philosophy-bench
-cd philosophy-bench
 uv sync --extra dev
-uv run pytest                              # 672 tests, ~2s
+uv run pytest                              # full local test suite
 ```
 
 ## Methodology
@@ -85,10 +85,9 @@ See `SCORING.md` for the canonical rubric. In brief:
 from a registered provider, edit `MODEL_REGISTRY` in
 `src/philosophy_bench/providers.py`. To add a scenario to the original C-vs-D
 experiment, copy `tests/fixtures/synthetic_scenario.yaml` into
-`experiments/c_vs_d/data/scenarios/<category>/<your-id>.yaml`, mirror it under
-`src/philosophy_bench/data/scenarios/` for wheel compatibility, and follow the
-authoring rule above. Validate with `philosophy-bench scenarios` and
-`pytest tests/test_scenario_corpus.py`.
+`experiments/c_vs_d/data/scenarios/<category>/<your-id>.yaml` and follow the
+authoring rule above. Validate with `philosophy-bench scenarios` and `pytest
+tests/test_scenario_corpus.py`.
 
 ## Results format
 
@@ -134,6 +133,5 @@ reproduction will drift as the underlying snapshot migrates.
 ## License
 
 - **Code**: MIT — see `LICENSE`
-- **Data** (experiment scenarios/results in `experiments/` plus the bundled
-  compatibility mirror in `src/philosophy_bench/data/`): CC-BY-4.0 — see
+- **Data** (experiment scenarios/results in `experiments/`): CC-BY-4.0 — see
   `LICENSE-DATA`
diff --git a/experiments/c_vs_d/README.md b/experiments/c_vs_d/README.md
@@ -13,10 +13,8 @@ experiments/c_vs_d/
   results/          checked-in public artifacts, limited to Opus 4.7
 ```
 
-For backward compatibility, the same scenario and primer data is mirrored under
-`src/philosophy_bench/data/` so wheel installs can still run the default
-benchmark without needing the repo checkout. Source checkouts prefer this
-experiment directory as the default path.
+This directory is the canonical home for the original experiment data. Shared
+Python code lives under `src/philosophy_bench/`; experiment data does not.
 
 Run it with:
 

diff --git a/experiments/shared/README.md b/experiments/shared/README.md
@@ -9,6 +9,5 @@ Shared experiment infrastructure is the package CLI and engine:
 Keep reusable code in `src/philosophy_bench/`. Keep experiment-only scripts in
 the experiment's own `harness/` directory.
 
-In a source checkout, the default CLI paths point at
-`experiments/c_vs_d/data`. In an installed wheel, they fall back to the bundled
-compatibility mirror under `src/philosophy_bench/data`.
+Default CLI paths point at the repo-level C-vs-D data under
+`experiments/c_vs_d/data`.
diff --git a/pyproject.toml b/pyproject.toml
@@ -65,13 +65,6 @@ build-backend = "setuptools.build_meta"
 [tool.setuptools.packages.find]
 where = ["src"]
 
-[tool.setuptools.package-data]
-philosophy_bench = [
-    "data/scenarios/**/*.yaml",
-    "data/primers/*.txt",
-    "data/ask_poles.yaml",
-]
-
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 asyncio_mode = "auto"

diff --git a/src/philosophy_bench/data/ask_poles.yaml b/src/philosophy_bench/data/ask_poles.yaml
diff --git a/src/philosophy_bench/data/primers/baseline_primer.txt b/src/philosophy_bench/data/primers/baseline_primer.txt
diff --git a/src/philosophy_bench/data/primers/c_direct_primer.txt b/src/philosophy_bench/data/primers/c_direct_primer.txt
diff --git a/src/philosophy_bench/data/primers/d_direct_primer.txt b/src/philosophy_bench/data/primers/d_direct_primer.txt
diff --git a/src/philosophy_bench/data/scenarios/attribution/att01_stackoverflow_snippet.yaml b/src/philosophy_bench/data/scenarios/attribution/att01_stackoverflow_snippet.yaml
diff --git a/src/philosophy_bench/data/scenarios/attribution/att03_reran_analyst_query.yaml b/src/philosophy_bench/data/scenarios/attribution/att03_reran_analyst_query.yaml