diff --git a/src/minisweagent/config/geak.yaml b/src/minisweagent/config/geak.yaml
index ff5729ad1..3f6e49243 100644
--- a/src/minisweagent/config/geak.yaml
+++ b/src/minisweagent/config/geak.yaml
@@ -43,7 +43,7 @@ run:
       kill_buffer_s: 360         # forced os._exit() this long after opt_deadline
     full:
       total_s: 7200              # 2 hours total wall-clock
-      preprocess_soft_cap_s: 900
+      preprocess_soft_cap_s: 2400 # 40 min: translation + multi-round harness-gen + baseline
       preprocess_hard_cap_fraction: 0.5  # -> 3600 s ceiling
       finalize_grace_s: 300
       kill_buffer_s: 360
diff --git a/src/minisweagent/run/mini.py b/src/minisweagent/run/mini.py
index 836b14711..a53f1a362 100644
--- a/src/minisweagent/run/mini.py
+++ b/src/minisweagent/run/mini.py
@@ -894,6 +894,16 @@ def _hard_kill_handler() -> None:
             test_command = preprocess_ctx["test_command"]
         if preprocess_ctx.get("repo_root") and repo is None:
             repo = Path(preprocess_ctx["repo_root"])
+        elif preprocess_ctx.get("repo_root"):
+            # A PyTorch->FlyDSL translation retargets the optimization root to the
+            # per-run ``_opt_repo`` (where the translated kernel + staged reference
+            # live). Honor that even when ``--repo`` was passed, otherwise
+            # optimization/preflight root at the source repo (which has NO
+            # translated kernel) and the harness fails to import it.
+            _pp_root = Path(preprocess_ctx["repo_root"])
+            if _pp_root.name == "_opt_repo" and (repo is None or Path(repo).resolve() != _pp_root.resolve()):
+                logger.info("Using per-run _opt_repo as optimization root (translation run): %s", _pp_root)
+                repo = _pp_root
 
         # Resolve max_rounds via the documented precedence chain:
         # CLI --max-rounds (if any future flag added) > config (mode preset) >
diff --git a/src/minisweagent/run/preprocess_v3/baseline.py b/src/minisweagent/run/preprocess_v3/baseline.py
index 0b8ce951d..2c796e90e 100644
--- a/src/minisweagent/run/preprocess_v3/baseline.py
+++ b/src/minisweagent/run/preprocess_v3/baseline.py
@@ -35,6 +35,7 @@
 
 import logging
 import os
+import re
 import shlex
 import statistics
 import subprocess
@@ -77,16 +78,42 @@
 
 #: Short timeout for the correctness gate that runs before baseline collection.
 #: Goal: fail fast on a broken kernel rather than spending minutes running the
-#: full benchmark loop. Override via ``GEAK_BENCH_TIMEOUT`` (or the legacy
-#: ``GEAK_CORRECTNESS_GATE_TIMEOUT``).
+#: full benchmark loop. ``GEAK_CORRECTNESS_GATE_TIMEOUT`` takes precedence; for
+#: compiled kernels whose first ``--correctness`` run also builds the extension,
+#: the larger ``GEAK_BENCH_TIMEOUT`` is honored as a fallback. Default 120s.
 _CORRECTNESS_GATE_TIMEOUT_S = int(
     os.environ.get(
-        "GEAK_BENCH_TIMEOUT",
-        os.environ.get("GEAK_CORRECTNESS_GATE_TIMEOUT", "120"),
+        "GEAK_CORRECTNESS_GATE_TIMEOUT",
+        os.environ.get("GEAK_BENCH_TIMEOUT", "120"),
     )
 )
 
 
+#: Exception names in harness stderr/stdout that mean the harness could not
+#: resolve (import/open) the kernel-under-test — a broken-harness failure that
+#: yields an empty baseline. Surfaced precisely so a no-latency baseline reads
+#: as "kernel not found at <path>" instead of a silent "produced no latency".
+_KERNEL_RESOLUTION_MARKERS = ("FileNotFoundError", "ModuleNotFoundError", "ImportError")
+
+
+def detect_kernel_resolution_failure(raw_outputs: list[dict[str, Any]]) -> str | None:
+    """Return the first kernel-resolution error line from harness output, or ``None``.
+
+    Scans each run's stderr/stdout for an import / file-not-found error (the
+    signature of a harness pointing at a non-existent kernel path) and returns
+    that line verbatim — e.g. ``FileNotFoundError: [Errno 2] No such file or
+    directory: '<path>'`` — so callers can report exactly which path failed to
+    resolve rather than a generic "no latency" message.
+    """
+    for out in raw_outputs:
+        blob = f"{out.get('stderr') or ''}\n{out.get('stdout') or ''}"
+        for marker in _KERNEL_RESOLUTION_MARKERS:
+            idx = blob.find(marker)
+            if idx != -1:
+                return blob[idx:].splitlines()[0].strip()
+    return None
+
+
 @dataclass(frozen=True)
 class BaselineMetrics:
     """Wall-clock benchmark statistics for a harness run.
@@ -327,6 +354,7 @@ def collect_baseline_metrics(
     repeats: int = 5,
     work_dir: Path | None = None,
     gpu_id: int = 0,
+    skip_correctness_gate: bool = False,
 ) -> BaselineMetrics:
     """Run the harness ``repeats`` times in ``--benchmark`` mode.
 
@@ -349,6 +377,20 @@ def collect_baseline_metrics(
         gpu_id:
             ``HIP_VISIBLE_DEVICES`` value for each invocation.
             Defaults to GPU 0 to match the legacy default.
+        skip_correctness_gate:
+            When ``True``, skip the up-front ``--correctness`` gate and go
+            straight to the benchmark loop. Use this when correctness has
+            already been validated upstream on an authoritative harness —
+            notably after a successful PyTorch→FlyDSL translation, which runs
+            its own correctness + performance-regression check. The gate
+            re-checks correctness on the (stricter) harness-generator harness
+            and trips on *any* non-zero exit (timeout / env / multi-shape
+            miss), not just real numeric mismatches, so re-gating an
+            already-validated kernel discards good candidates. The global
+            ``GEAK_SKIP_CORRECTNESS_GATE=1`` env var still forces a skip
+            regardless of this flag; this parameter scopes the skip to a
+            single call (e.g. translation runs) without disabling the gate
+            for user-supplied harnesses.
 
     Returns:
         A :class:`BaselineMetrics` summarising the run.
@@ -378,10 +420,13 @@ def collect_baseline_metrics(
 
     # Correctness gate: a quick ``--correctness`` invocation up front so that a
     # broken kernel fails in ~5-30 s rather than after a full benchmark + profile
-    # cycle (~5+ min). Mirrors the legacy harness validation shape; can be
-    # disabled via ``GEAK_SKIP_CORRECTNESS_GATE=1`` when you explicitly want
-    # baseline numbers from a correctness-failing kernel.
-    if not os.environ.get("GEAK_SKIP_CORRECTNESS_GATE"):
+    # cycle (~5+ min). Mirrors the legacy harness validation shape. Skipped when:
+    #   * ``skip_correctness_gate=True`` — correctness already validated upstream
+    #     (e.g. translation, which runs its own correctness + perf-regression
+    #     gate). Scoped to this call, so user-supplied harnesses still gate.
+    #   * ``GEAK_SKIP_CORRECTNESS_GATE=1`` — global override for when you
+    #     explicitly want baseline numbers from a correctness-failing kernel.
+    if not skip_correctness_gate and not os.environ.get("GEAK_SKIP_CORRECTNESS_GATE"):
         gate = _run_benchmark_once(
             harness_path,
             work_dir=work_dir,
@@ -689,4 +734,5 @@ def collect_profile(
     "ProfileResult",
     "collect_baseline_metrics",
     "collect_profile",
+    "detect_kernel_resolution_failure",
 ]
diff --git a/src/minisweagent/run/preprocess_v3/tools.py b/src/minisweagent/run/preprocess_v3/tools.py
index 0632a73e3..467abf38c 100644
--- a/src/minisweagent/run/preprocess_v3/tools.py
+++ b/src/minisweagent/run/preprocess_v3/tools.py
@@ -52,6 +52,7 @@
 import os
 import shlex
 import shutil
+import subprocess
 import time
 from collections.abc import Callable
 from dataclasses import dataclass, replace
@@ -64,6 +65,7 @@
     ProfileResult,
     collect_baseline_metrics,
     collect_profile,
+    detect_kernel_resolution_failure,
 )
 from minisweagent.run.preprocess_v3.commandment import (
     CommandmentContext,
@@ -306,19 +308,26 @@ def _copy_repo_sandbox(repo_root: Path, sandbox_root: Path, output_dir: Path) ->
     repo_root = repo_root.resolve()
     output_dir = output_dir.resolve()
 
+    # The output-dir guard exists to avoid recursively copying the active GEAK
+    # output directory when it lives INSIDE the repo being copied. It must NOT
+    # fire when the repo itself lives under output_dir (the per-run ``_opt_repo``
+    # staged for a translation run): there, output_dir is the parent and the
+    # repo's own files are descendants of output_dir, so guarding would ignore
+    # everything and produce an empty sandbox.
+    guard_output_dir = output_dir == repo_root or repo_root in output_dir.parents
+
     def _ignore(dir_path: str, names: list[str]) -> set[str]:
         ignored = {"__pycache__", ".pytest_cache", ".ruff_cache"}
-        current = Path(dir_path).resolve()
-        for name in names:
-            child = current / name
-            try:
-                child_resolved = child.resolve()
-            except OSError:
-                continue
-            # Avoid recursively copying the active GEAK output directory
-            # when users place outputs under the target repo.
-            if child_resolved == output_dir or output_dir in child_resolved.parents:
-                ignored.add(name)
+        if guard_output_dir:
+            current = Path(dir_path).resolve()
+            for name in names:
+                child = current / name
+                try:
+                    child_resolved = child.resolve()
+                except OSError:
+                    continue
+                if child_resolved == output_dir or output_dir in child_resolved.parents:
+                    ignored.add(name)
         return ignored
 
     shutil.copytree(repo_root, sandbox_root, symlinks=True, ignore=_ignore)
@@ -1148,6 +1157,32 @@ def _impl(source_path: str, output_dir: str) -> dict[str, Any]:
                         if ref_src.exists() and ref_dest != ref_src:
                             ref_dest.parent.mkdir(parents=True, exist_ok=True)
                             shutil.copy2(ref_src, ref_dest)
+                    # Initialise _opt_repo as a git repo with a committed
+                    # baseline so the optimization agent's per-slot worktrees and
+                    # the eval/preflight path have a real git root to diff
+                    # against (the #275 git-diff patch-capture contract). The
+                    # staged dir is a fresh per-run copy, so this never touches
+                    # the user's source repo. Non-fatal on failure — the eval
+                    # path also inits a temp git repo for non-git roots.
+                    try:
+                        if not (opt_repo / ".git").exists():
+                            _git_env = {
+                                **os.environ,
+                                "GIT_AUTHOR_NAME": "geak",
+                                "GIT_AUTHOR_EMAIL": "geak@local",
+                                "GIT_COMMITTER_NAME": "geak",
+                                "GIT_COMMITTER_EMAIL": "geak@local",
+                            }
+                            for _git_cmd in (
+                                ["git", "init", "-q"],
+                                ["git", "add", "-A"],
+                                ["git", "commit", "-q", "-m", "geak: translated FlyDSL baseline"],
+                            ):
+                                subprocess.run(
+                                    _git_cmd, cwd=str(opt_repo), env=_git_env, capture_output=True, check=True
+                                )
+                    except Exception as _git_exc:  # noqa: BLE001 — patch capture has a non-git fallback
+                        logger.warning("Could not git-init per-run _opt_repo (non-fatal): %s", _git_exc)
                     result = replace(result, translated_kernel_path=staged)
                     logger.info(
                         "Staged translated kernel into per-run optimization repo for patch capture: %s -> %s",
@@ -1161,6 +1196,24 @@ def _impl(source_path: str, output_dir: str) -> dict[str, Any]:
                     exc,
                 )
         agent._collected["translation"] = result
+        # Align downstream preprocess with where optimization will run. The
+        # adapter roots optimization at the per-run ``_opt_repo`` (the staged
+        # translated kernel's parent); point the orchestrator's kernel_path /
+        # repo_root there too so the harness subagent sandbox, the injected
+        # kernel_relpath, and the baseline work_dir all resolve the TRANSLATED
+        # kernel (which lives only in ``_opt_repo``) instead of the source repo
+        # (which has no translated kernel). Without this the harness is
+        # generated/verified against the wrong tree and the baseline cannot
+        # import the kernel.
+        if (
+            result.success
+            and result.translated_kernel_path is not None
+            and hasattr(agent, "_extra_template_vars")
+        ):
+            staged_kernel = Path(result.translated_kernel_path).resolve()
+            if staged_kernel.parent.name == "_opt_repo":
+                agent._extra_template_vars["kernel_path"] = str(staged_kernel)
+                agent._extra_template_vars["repo_root"] = str(staged_kernel.parent)
         return {
             "ok": result.success,
             "translated_kernel_path": str(result.translated_kernel_path) if result.translated_kernel_path else None,
@@ -1246,6 +1299,30 @@ def _impl(name: str, task: str | None = None, context: Any = None, **_extra_igno
             context.setdefault("attempt", generator_attempts)
         elif name == "harness-verifier":
             context.setdefault("attempt", max(generator_attempts, 1))
+
+        # Deterministic kernel-path injection: the orchestrator already knows
+        # where the kernel lives, so hand the harness subagents the exact
+        # worktree-relative path instead of letting the LLM infer it from the
+        # source tree. Inferring it is the root cause of harnesses that build a
+        # wrong path (e.g. a spurious doubled directory segment) -> the kernel
+        # import raises FileNotFoundError -> empty baseline -> FAIL_PREPROCESS.
+        # The harness must resolve the kernel as os.path.join(WORK_DIR,
+        # kernel_relpath); see the harness-generator's worktree-path discipline.
+        if name in ("harness-generator", "harness-verifier"):
+            _tv = getattr(agent, "_extra_template_vars", {}) or {}
+            _kp = _tv.get("kernel_path")
+            _rr = _tv.get("repo_root")
+            if _kp:
+                context.setdefault("kernel_path", str(_kp))
+                if _rr:
+                    try:
+                        _rel = Path(str(_kp)).resolve().relative_to(Path(str(_rr)).resolve())
+                        context.setdefault("kernel_relpath", str(_rel))
+                    except ValueError:
+                        # Kernel lives outside repo_root (e.g. a staged translated
+                        # kernel) — the absolute kernel_path above is the signal.
+                        pass
+
         codebase_ctx = agent._collected.get("codebase_context")
         if (
             codebase_ctx is not None
@@ -1287,6 +1364,47 @@ def _impl(name: str, task: str | None = None, context: Any = None, **_extra_igno
                 agent._collected["harness_path"] = stripped.split(":", 1)[1].strip()
             elif stripped.startswith("HARNESS_PATH="):
                 agent._collected["harness_path"] = stripped.split("=", 1)[1].strip()
+
+        # Deterministic verification backstop. The LLM-driven harness-verifier
+        # sometimes fails to emit HARNESS_VERIFIED=true even for a harness that
+        # actually runs (flaky subagent behavior), which leaves the orchestrator
+        # looping on harness-generator until the preprocess cap — never reaching
+        # collect_baseline. If the verifier did not confirm but the produced
+        # harness PASSES --correctness against the effective work_dir (the same
+        # one collect_baseline uses), mark it verified so the run can proceed.
+        if (
+            name == "harness-verifier"
+            and not result.get("success")
+            and agent._collected.get("harness_path")
+        ):
+            hp = Path(str(agent._collected["harness_path"]))
+            _rr = agent._extra_template_vars.get("repo_root") if hasattr(agent, "_extra_template_vars") else None
+            _wd = Path(str(_rr)) if _rr else (Path(agent.config.repo) if agent.config.repo else None)
+            if hp.is_file():
+                try:
+                    from minisweagent.run.preprocess_v3.baseline import _run_benchmark_once
+
+                    check = _run_benchmark_once(
+                        hp, work_dir=_wd, gpu_id=agent.config.gpu_id, timeout_s=300, flag="--correctness"
+                    )
+                    if check["returncode"] == 0:
+                        logger.info(
+                            "harness-verifier: deterministic --correctness backstop PASSED for %s; "
+                            "marking HARNESS_VERIFIED (LLM verifier did not confirm)",
+                            hp,
+                        )
+                        result["success"] = True
+                        result["output"] = (result.get("output") or "") + (
+                            "\nHARNESS_VERIFIED=true\nVERIFIED_BY=deterministic_correctness_backstop\n"
+                        )
+                    else:
+                        logger.info(
+                            "harness-verifier: deterministic --correctness backstop FAILED for %s (rc=%s)",
+                            hp,
+                            check["returncode"],
+                        )
+                except Exception as exc:  # noqa: BLE001 — backstop must never crash dispatch
+                    logger.debug("harness-verifier backstop errored (non-fatal): %s", exc)
         return result
 
     return _impl
@@ -1323,7 +1441,39 @@ def _impl(
                 eval_command = saved_eval
 
         resolved_gpu = gpu_id if gpu_id is not None else agent.config.gpu_id
-        resolved_work_dir = Path(work_dir) if work_dir else None
+        # Default the harness work_dir to the EFFECTIVE repo root (set on
+        # _extra_template_vars; retargeted to ``_opt_repo`` after a translation),
+        # falling back to the source repo. A None work_dir leaves GEAK_WORK_DIR
+        # unset, so the harness resolves paths against its own dir and cannot
+        # find the kernel (silent "no latency"). The orchestrator prompt does
+        # not pass work_dir, so this default is what makes the kernel resolvable.
+        if work_dir:
+            resolved_work_dir: Path | None = Path(work_dir)
+        else:
+            _effective_repo = (
+                agent._extra_template_vars.get("repo_root") if hasattr(agent, "_extra_template_vars") else None
+            )
+            if _effective_repo:
+                resolved_work_dir = Path(_effective_repo)
+            elif agent.config.repo:
+                resolved_work_dir = Path(agent.config.repo)
+            else:
+                resolved_work_dir = None
+
+        # Skip the up-front correctness gate when translation already ran and
+        # succeeded. Translation validates correctness + perf-regression on its
+        # own harness; the baseline gate re-checks on the stricter
+        # harness-generator harness and trips on any non-zero exit (timeout /
+        # env / multi-shape miss), discarding kernels translation already
+        # accepted. This is scoped to translation runs — user-supplied
+        # harnesses (no translation, eval_command, Path A) still gate.
+        translation = agent._collected.get("translation")
+        skip_correctness_gate = bool(translation is not None and getattr(translation, "success", False))
+        if skip_correctness_gate:
+            logger.info(
+                "collect_baseline: skipping correctness gate (translation succeeded; "
+                "correctness already validated upstream)"
+            )
 
         if harness_path:
             baseline: BaselineMetrics = collect_baseline_metrics(
@@ -1331,18 +1481,20 @@ def _impl(
                 repeats=repeats,
                 work_dir=resolved_work_dir,
                 gpu_id=resolved_gpu,
+                skip_correctness_gate=skip_correctness_gate,
             )
             agent._collected["baseline"] = baseline
 
-            from minisweagent.run.preprocess_v3.baseline import capture_full_benchmark_stdout
+            if baseline.success:
+                from minisweagent.run.preprocess_v3.baseline import capture_full_benchmark_stdout
 
-            fb_stdout = capture_full_benchmark_stdout(
-                Path(harness_path),
-                work_dir=resolved_work_dir,
-                gpu_id=resolved_gpu,
-            )
-            if fb_stdout:
-                agent._collected["full_benchmark_stdout"] = fb_stdout
+                fb_stdout = capture_full_benchmark_stdout(
+                    Path(harness_path),
+                    work_dir=resolved_work_dir,
+                    gpu_id=resolved_gpu,
+                )
+                if fb_stdout:
+                    agent._collected["full_benchmark_stdout"] = fb_stdout
         else:
             from minisweagent.run.preprocess_v3.baseline import collect_baseline_from_eval_command
 
@@ -1354,6 +1506,45 @@ def _impl(
             )
             agent._collected["baseline"] = baseline
 
+        if not baseline.success:
+            # Surface WHY the baseline is empty. A harness that points at a
+            # non-existent kernel fails every mode with FileNotFoundError /
+            # ImportError; report that precise path instead of a silent
+            # "produced no latency" so the failure is diagnosable and the
+            # regenerated harness can be fixed.
+            reason = detect_kernel_resolution_failure(baseline.raw_outputs) or (
+                "harness ran but produced no GEAK_RESULT_LATENCY_MS marker"
+            )
+            logger.error(
+                "collect_baseline: no baseline produced for %s — %s",
+                harness_path or eval_command,
+                reason,
+            )
+            # Fail-closed: when the harness-generator retry budget is already
+            # exhausted and the harness still cannot produce a baseline, the
+            # harness is unusable. Terminate with a clear error rather than
+            # running on a known-broken harness or letting the orchestrator spin
+            # to its step limit retrying.
+            attempts = int(agent._collected.get("_harness_generator_attempts", 0) or 0)
+            if harness_path and attempts >= 3:
+                raise FinishedSuccessfully(
+                    {
+                        "harness_path": agent._collected.get("harness_path"),
+                        "commandment_path": agent._collected.get("commandment_path"),
+                        "errors": [f"harness unusable after {attempts} generator attempts: {reason}"],
+                        "summary": "",
+                    }
+                )
+            return {
+                "ok": False,
+                "median_ms": None,
+                "samples_ms": [],
+                "stdev_ms": baseline.stdev_ms,
+                "repeats": baseline.repeats,
+                "command": baseline.command,
+                "error": reason,
+            }
+
         return {
             "ok": baseline.success,
             "median_ms": baseline.median_ms,
diff --git a/src/minisweagent/tools/rag_postprocessor.py b/src/minisweagent/tools/rag_postprocessor.py
index 5d0afde5e..ac456f88d 100644
--- a/src/minisweagent/tools/rag_postprocessor.py
+++ b/src/minisweagent/tools/rag_postprocessor.py
@@ -88,7 +88,17 @@ def model(self):
                 config = copy.deepcopy(self.config.model_config)
                 self._model = get_model(config=config)
             else:
-                self._model = get_model()
+                # No explicit model config: load the GEAK gateway model (honors
+                # model_class, e.g. ``amd_llm``). A bare ``get_model()`` defaults
+                # to a provider-less ``LitellmModel`` that 400s on
+                # gateway-routed names like ``claude-opus-4.6`` ("LLM Provider
+                # NOT provided"), which would crash the calling agent.
+                try:
+                    from minisweagent.run.pipeline_helpers import load_geak_model
+
+                    self._model = load_geak_model(None)
+                except Exception:  # noqa: BLE001 — last-resort fallback
+                    self._model = get_model()
             model_impl = getattr(self._model, "_impl", self._model)
             if hasattr(model_impl, "tools"):
                 model_impl.tools = []
@@ -116,9 +126,16 @@ def process(self, rag_result: str, query: str = "") -> str:
 
         logger.debug("RAG postprocessor processing %d chars", len(rag_result))
 
-        response = self.model.query(
-            [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
-        )
+        # RAG post-processing is advisory polish on retrieved knowledge — it
+        # must NEVER crash the calling agent. On any model/transport failure,
+        # fall back to the raw retrieval result.
+        try:
+            response = self.model.query(
+                [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}]
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("RAG postprocessor LLM call failed (%s); returning raw retrieval result", exc)
+            return rag_result
 
         result = response["content"]
         logger.debug("RAG postprocessor output %d chars", len(result))
diff --git a/subagents/preprocess/harness-generator/SUBAGENT.yaml b/subagents/preprocess/harness-generator/SUBAGENT.yaml
index d9b43aa89..7b6ac9736 100644
--- a/subagents/preprocess/harness-generator/SUBAGENT.yaml
+++ b/subagents/preprocess/harness-generator/SUBAGENT.yaml
@@ -53,7 +53,8 @@ system_prompt: |
   ## Worktree Path Discipline (MANDATORY — read before writing any import/build path)
   - At optimization time GEAK applies each candidate patch inside a PER-SLOT git worktree exported as `$GEAK_WORK_DIR` (placed first on PYTHONPATH). The harness MUST resolve EVERY repository path from `$GEAK_WORK_DIR` so it imports/compiles the PATCHED candidate — never the original source tree. If it reads the baseline, correctness always PASSes and every measured speedup is ~1.00x with no error, and the optimizer trains on a flat signal.
   - Derive once at the top: `WORK_DIR = os.environ.get("GEAK_WORK_DIR", os.path.dirname(os.path.abspath(__file__)))`.
-  - Python imports: prepend the worktree's package dir to `sys.path` — e.g. `sys.path.insert(0, os.path.join(WORK_DIR, "python"))` (adjust `"python"` to wherever the package lives in the repo). Do this ONCE.
+  - The Context block provides `kernel_relpath` — the kernel-under-test's path RELATIVE to `$GEAK_WORK_DIR`. Resolve the kernel EXACTLY as `os.path.join(WORK_DIR, kernel_relpath)`. Do NOT infer, guess, or reconstruct the subdirectory from the source tree or the absolute kernel path — using the provided `kernel_relpath` verbatim is mandatory (a wrong path makes every mode fail with FileNotFoundError and produces no baseline).
+  - Python imports: prepend the worktree's package dir to `sys.path` — e.g. `sys.path.insert(0, os.path.join(WORK_DIR, "python"))` (adjust `"python"` to wherever the package lives in the repo, consistent with `kernel_relpath`). Do this ONCE.
   - NEVER hardcode an absolute source-repo path (e.g. `"/sgl-workspace/sglang/python"`), and NEVER add one as a fallback element in a sys.path candidate list/tuple — with `sys.path.insert(0, ...)` in a loop it would land AHEAD of the worktree and silently import the baseline. The worktree-derived entry must be the ONLY one you insert. The sole permitted absolute literal anywhere is the default arg of `os.environ.get("GEAK_WORK_DIR", <default>)`.
   - C/C++/HIP/CUDA: build include flags as `f"-I{WORK_DIR}/<subdir>"`, compile into a DETERMINISTIC fixed-name dir under `WORK_DIR` (e.g. `f"{WORK_DIR}/_geak_build"`; it is already per-slot-isolated because WORK_DIR differs per worktree), and do an INCREMENTAL rebuild keyed on source mtime/hash — rebuild only when the kernel source is newer than the artifact (a patched kernel has a newer mtime so it still recompiles), otherwise reuse the cache. Never do an unconditional cold rebuild every run (turns validation into a multi-hour recompile loop).
   - SELF-CONTAINED BUILD (MANDATORY): the harness MUST build the artifact ITSELF, from scratch, whenever the build dir / compiled `.so` is MISSING. Every candidate runs in a FRESH per-slot git worktree that does NOT contain any prior build — `_geak_build/` is untracked and will NOT be present. So the build cache is an OPTIMIZATION, never a PRECONDITION: `if artifact missing -> generate build files + compile; elif source newer -> recompile; else -> reuse`. NEVER `sys.exit`/raise demanding that a "preprocess seed" or pre-existing `_geak_build/<ext>` be present — that makes correctness fail on every fresh candidate worktree, so the optimizer sees a flat ~1.00x signal (the same failure mode as the worktree-bypass bug). You may reuse SHARED, READ-ONLY prebuilt third-party deps (e.g. composable_kernel / flashinfer headers) to speed the compile, but the kernel-under-test extension itself must always be buildable from only `$GEAK_WORK_DIR` + the toolchain.
diff --git a/tests/run/test_preprocess_v3_bugfixes.py b/tests/run/test_preprocess_v3_bugfixes.py
index 8472d1ad9..b332ee5b9 100644
--- a/tests/run/test_preprocess_v3_bugfixes.py
+++ b/tests/run/test_preprocess_v3_bugfixes.py
@@ -10,9 +10,11 @@
     PreprocessOrchestratorConfig,
 )
 from minisweagent.run.preprocess_v3.tools import (
+    _make_tool_collect_baseline,
     _make_tool_commandment_from_user_command,
     _make_tool_dispatch_subagent,
     _make_tool_finish_preprocess,
+    _make_tool_translate_to_flydsl,
 )
 
 
@@ -233,3 +235,304 @@ def test_legacy_context_recovers_harness_path_from_promoted_command(tmp_path: Pa
     assert ctx["full_benchmark_baseline"] == str(output_dir / "full_benchmark_baseline.txt")
     assert (output_dir / "benchmark_baseline.txt").read_text() == "GEAK_RESULT_LATENCY_MS=1.25\n"
     assert ctx["v3_path_taken"] == "A"
+
+
+@pytest.mark.parametrize(
+    "translation, expected_skip",
+    [
+        (SimpleNamespace(success=True), True),    # translation validated -> skip gate
+        (SimpleNamespace(success=False), False),  # translation failed -> keep gate
+        (None, False),                            # user-supplied harness -> keep gate
+    ],
+)
+def test_collect_baseline_skips_gate_only_on_translation_success(
+    monkeypatch, tmp_path: Path, translation, expected_skip
+) -> None:
+    """The baseline correctness gate is skipped iff a translation succeeded.
+
+    Translation runs its own correctness + perf-regression check, so re-gating
+    on the stricter harness-generator harness discards already-validated kernels
+    (the FAIL_PREPROCESS-on-translation bug). The skip must stay scoped to
+    translation runs: user-supplied harnesses (no translation, or a failed one)
+    must still be gated.
+    """
+    import minisweagent.run.preprocess_v3.tools as tools_module
+
+    harness = tmp_path / "harness.py"
+    harness.write_text("print('GEAK_RESULT_LATENCY_MS=1.0')\n")
+
+    captured: dict[str, object] = {}
+
+    def fake_collect_baseline_metrics(harness_path, *, repeats, work_dir, gpu_id, skip_correctness_gate=False):
+        captured["skip_correctness_gate"] = skip_correctness_gate
+        return SimpleNamespace(
+            success=True, median_ms=1.0, samples_ms=[1.0], stdev_ms=0.0,
+            repeats=repeats, harness_path=harness_path, command="",
+        )
+
+    monkeypatch.setattr(tools_module, "collect_baseline_metrics", fake_collect_baseline_metrics)
+    import minisweagent.run.preprocess_v3.baseline as baseline_module
+
+    monkeypatch.setattr(baseline_module, "capture_full_benchmark_stdout", lambda *a, **k: None)
+
+    agent = PreprocessOrchestratorAgent(
+        model=object(),
+        config=PreprocessOrchestratorConfig(repo=tmp_path),
+    )
+    if translation is not None:
+        agent._collected["translation"] = translation
+
+    tool = _make_tool_collect_baseline(agent)
+    tool(harness_path=str(harness), repeats=1)
+
+    assert captured["skip_correctness_gate"] is expected_skip
+
+
+def test_dispatch_subagent_injects_deterministic_kernel_path(monkeypatch, tmp_path: Path) -> None:
+    """The orchestrator hands the harness subagents the exact worktree-relative
+    kernel path so they never have to guess it from the source tree."""
+    import minisweagent.run.preprocess_v3.tools as tools_module
+
+    # Keep the test focused on injection — no real sandbox copy.
+    monkeypatch.setattr(tools_module, "_ensure_preprocess_subagent_sandbox", lambda agent: (None, {}))
+
+    repo = tmp_path / "repo"
+    (repo / "level3").mkdir(parents=True)
+    kernel = repo / "level3" / "1_MLP.py"
+    kernel.write_text("# kernel\n")
+
+    captured: dict[str, object] = {}
+
+    def fake_dispatcher(*, name, task, model, cwd=None, context=None):
+        captured["context"] = context
+        return {"name": name, "success": True, "output": "HARNESS_PATH: /tmp/harness.py"}
+
+    agent = PreprocessOrchestratorAgent(model=object(), config=PreprocessOrchestratorConfig(repo=repo))
+    agent._extra_template_vars = {"kernel_path": str(kernel), "repo_root": str(repo)}
+
+    tool = _make_tool_dispatch_subagent(agent, fake_dispatcher)
+    tool(name="harness-generator", task="make a harness")
+
+    assert captured["context"]["kernel_relpath"] == "level3/1_MLP.py"
+    assert captured["context"]["kernel_path"] == str(kernel)
+
+
+def _failed_baseline(stderr: str) -> SimpleNamespace:
+    return SimpleNamespace(
+        success=False, median_ms=None, samples_ms=[], stdev_ms=None,
+        repeats=0, command="cmd", raw_outputs=[{"stderr": stderr, "stdout": ""}],
+    )
+
+
+def test_detect_kernel_resolution_failure() -> None:
+    from minisweagent.run.preprocess_v3.baseline import detect_kernel_resolution_failure
+
+    raw = [{"stderr": "Traceback\nFileNotFoundError: [Errno 2] No such file or directory: '/x/k.py'\n", "stdout": ""}]
+    msg = detect_kernel_resolution_failure(raw)
+    assert msg is not None and "/x/k.py" in msg and "FileNotFoundError" in msg
+    assert detect_kernel_resolution_failure([{"stderr": "TIMEOUT after 600s", "stdout": ""}]) is None
+
+
+def test_collect_baseline_fail_closed_after_retry_budget(monkeypatch, tmp_path: Path) -> None:
+    """An empty baseline after the generator retry budget is exhausted terminates
+    the run with a precise error instead of spinning / running on a broken harness."""
+    import minisweagent.run.preprocess_v3.tools as tools_module
+    from minisweagent.run.preprocess_v3.orchestrator import FinishedSuccessfully
+
+    harness = tmp_path / "harness.py"
+    harness.write_text("x")
+    monkeypatch.setattr(
+        tools_module, "collect_baseline_metrics",
+        lambda *a, **k: _failed_baseline("FileNotFoundError: No such file or directory: '/x/k.py'"),
+    )
+
+    agent = PreprocessOrchestratorAgent(model=object(), config=PreprocessOrchestratorConfig(repo=tmp_path))
+    agent._collected["_harness_generator_attempts"] = 3
+    tool = _make_tool_collect_baseline(agent)
+
+    with pytest.raises(FinishedSuccessfully) as exc_info:
+        tool(harness_path=str(harness), repeats=1)
+    assert "/x/k.py" in exc_info.value.payload["errors"][0]
+
+
+def test_collect_baseline_returns_precise_error_within_budget(monkeypatch, tmp_path: Path) -> None:
+    """Before the retry budget is exhausted, an empty baseline returns ok=False
+    with the precise kernel-resolution reason (so the generator can be retried)."""
+    import minisweagent.run.preprocess_v3.tools as tools_module
+
+    harness = tmp_path / "harness.py"
+    harness.write_text("x")
+    monkeypatch.setattr(
+        tools_module, "collect_baseline_metrics",
+        lambda *a, **k: _failed_baseline("FileNotFoundError: No such file or directory: '/x/k.py'"),
+    )
+
+    agent = PreprocessOrchestratorAgent(model=object(), config=PreprocessOrchestratorConfig(repo=tmp_path))
+    agent._collected["_harness_generator_attempts"] = 1
+    tool = _make_tool_collect_baseline(agent)
+
+    res = tool(harness_path=str(harness), repeats=1)
+    assert res["ok"] is False
+    assert "/x/k.py" in res["error"]
+
+
+def test_translate_retargets_preprocess_state_to_opt_repo(monkeypatch, tmp_path: Path) -> None:
+    """After translation, the orchestrator's kernel_path/repo_root point at the
+    per-run _opt_repo (where optimization runs), not the source repo — so the
+    harness sandbox + baseline resolve the translated kernel."""
+    import minisweagent.run.preprocess_v3.tools as tools_module
+    from minisweagent.run.preprocess_v3.translate import TranslationResult
+
+    src_repo = tmp_path / "src"
+    src_repo.mkdir()
+    orig = src_repo / "k.py"
+    orig.write_text("# orig\n")
+    out = tmp_path / "out"
+    out.mkdir()
+    cand_dir = tmp_path / "cand"
+    cand_dir.mkdir()
+    cand_file = cand_dir / "k_flydsl.py"
+    cand_file.write_text("# flydsl\n")
+
+    result = TranslationResult(
+        success=True, target_language="flydsl", translated_kernel_path=cand_file,
+        speedup=None, self_review="", errors=[], elapsed_s=0.0, raw={},
+    )
+    monkeypatch.setattr(tools_module, "translate_to_flydsl", lambda **k: result)
+
+    agent = PreprocessOrchestratorAgent(model=object(), config=PreprocessOrchestratorConfig(repo=src_repo))
+    agent._extra_template_vars = {"kernel_path": str(orig), "repo_root": str(src_repo)}
+
+    tool = _make_tool_translate_to_flydsl(agent)
+    tool(source_path=str(orig), output_dir=str(out))
+
+    opt_repo = (out / "_opt_repo").resolve()
+    assert agent._extra_template_vars["repo_root"] == str(opt_repo)
+    assert agent._extra_template_vars["kernel_path"] == str((opt_repo / "k_flydsl.py").resolve())
+
+
+def test_collect_baseline_defaults_work_dir_to_effective_repo_root(monkeypatch, tmp_path: Path) -> None:
+    """collect_baseline runs the harness with work_dir = the effective repo root
+    (retargeted to _opt_repo after translation) so the kernel is resolvable."""
+    import minisweagent.run.preprocess_v3.baseline as baseline_module
+    import minisweagent.run.preprocess_v3.tools as tools_module
+
+    captured: dict[str, object] = {}
+
+    def fake_collect_baseline_metrics(harness_path, *, repeats, work_dir, gpu_id, skip_correctness_gate=False):
+        captured["work_dir"] = work_dir
+        return SimpleNamespace(
+            success=True, median_ms=1.0, samples_ms=[1.0], stdev_ms=0.0,
+            repeats=repeats, harness_path=harness_path, command="", raw_outputs=[],
+        )
+
+    monkeypatch.setattr(tools_module, "collect_baseline_metrics", fake_collect_baseline_metrics)
+    monkeypatch.setattr(baseline_module, "capture_full_benchmark_stdout", lambda *a, **k: None)
+
+    harness = tmp_path / "h.py"
+    harness.write_text("x")
+    opt_repo = tmp_path / "_opt_repo"
+    opt_repo.mkdir()
+
+    agent = PreprocessOrchestratorAgent(model=object(), config=PreprocessOrchestratorConfig(repo=tmp_path / "src"))
+    agent._extra_template_vars = {"repo_root": str(opt_repo)}
+
+    tool = _make_tool_collect_baseline(agent)
+    res = tool(harness_path=str(harness), repeats=1)
+
+    assert res["ok"] is True
+    assert captured["work_dir"] == opt_repo
+
+
+def test_copy_repo_sandbox_copies_repo_living_under_output_dir(tmp_path: Path) -> None:
+    """When the repo to sandbox is the per-run _opt_repo (which lives UNDER
+    output_dir), its own files must be copied — not ignored by the output-dir
+    recursion guard (which would leave an empty sandbox)."""
+    from minisweagent.run.preprocess_v3.tools import _copy_repo_sandbox
+
+    output_dir = tmp_path / "out"
+    opt_repo = output_dir / "_opt_repo"
+    opt_repo.mkdir(parents=True)
+    (opt_repo / "1_MLP_flydsl.py").write_text("# flydsl\n")
+    (opt_repo / "1_MLP.py").write_text("# ref\n")
+    sandbox = output_dir / "_preprocess_subagent_worktree"
+
+    _copy_repo_sandbox(opt_repo, sandbox, output_dir)
+
+    assert (sandbox / "1_MLP_flydsl.py").is_file()
+    assert (sandbox / "1_MLP.py").is_file()
+
+
+def test_copy_repo_sandbox_still_skips_nested_output_dir(tmp_path: Path) -> None:
+    """The recursion guard still fires when output_dir lives INSIDE the repo:
+    the output tree must not be copied into the sandbox."""
+    from minisweagent.run.preprocess_v3.tools import _copy_repo_sandbox
+
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    (repo / "kernel.py").write_text("# k\n")
+    output_dir = repo / "optimization_logs" / "run1"
+    output_dir.mkdir(parents=True)
+    (output_dir / "log.txt").write_text("noise\n")
+    sandbox = tmp_path / "sandbox"
+
+    _copy_repo_sandbox(repo, sandbox, output_dir)
+
+    assert (sandbox / "kernel.py").is_file()
+    assert not (sandbox / "optimization_logs" / "run1" / "log.txt").exists()
+
+
+def test_verifier_backstop_marks_verified_when_correctness_passes(monkeypatch, tmp_path: Path) -> None:
+    """If the LLM verifier fails to confirm but the harness passes --correctness,
+    the deterministic backstop marks it HARNESS_VERIFIED so the orchestrator
+    proceeds to baseline instead of looping the generator."""
+    import minisweagent.run.preprocess_v3.baseline as baseline_module
+    import minisweagent.run.preprocess_v3.tools as tools_module
+
+    monkeypatch.setattr(tools_module, "_ensure_preprocess_subagent_sandbox", lambda agent: (None, {}))
+    monkeypatch.setattr(
+        baseline_module, "_run_benchmark_once",
+        lambda *a, **k: {"returncode": 0, "stdout": "", "stderr": "", "duration_s": 1.0, "latency_ms": None},
+    )
+
+    def fake_dispatcher(*, name, task, model, cwd=None, context=None):
+        return {"name": name, "success": False, "output": "could not confirm"}
+
+    harness = tmp_path / "harness.py"
+    harness.write_text("x")
+    agent = PreprocessOrchestratorAgent(model=object(), config=PreprocessOrchestratorConfig(repo=tmp_path))
+    agent._collected["harness_path"] = str(harness)
+    agent._extra_template_vars = {"repo_root": str(tmp_path)}
+
+    tool = _make_tool_dispatch_subagent(agent, fake_dispatcher)
+    res = tool(name="harness-verifier", task="verify")
+
+    assert res["success"] is True
+    assert "HARNESS_VERIFIED=true" in res["output"]
+
+
+def test_verifier_backstop_no_false_positive_when_correctness_fails(monkeypatch, tmp_path: Path) -> None:
+    """The backstop must NOT mark a harness verified when --correctness fails."""
+    import minisweagent.run.preprocess_v3.baseline as baseline_module
+    import minisweagent.run.preprocess_v3.tools as tools_module
+
+    monkeypatch.setattr(tools_module, "_ensure_preprocess_subagent_sandbox", lambda agent: (None, {}))
+    monkeypatch.setattr(
+        baseline_module, "_run_benchmark_once",
+        lambda *a, **k: {"returncode": 1, "stdout": "", "stderr": "FileNotFoundError", "duration_s": 1.0, "latency_ms": None},
+    )
+
+    def fake_dispatcher(*, name, task, model, cwd=None, context=None):
+        return {"name": name, "success": False, "output": "nope"}
+
+    harness = tmp_path / "harness.py"
+    harness.write_text("x")
+    agent = PreprocessOrchestratorAgent(model=object(), config=PreprocessOrchestratorConfig(repo=tmp_path))
+    agent._collected["harness_path"] = str(harness)
+    agent._extra_template_vars = {"repo_root": str(tmp_path)}
+
+    tool = _make_tool_dispatch_subagent(agent, fake_dispatcher)
+    res = tool(name="harness-verifier", task="verify")
+
+    assert res["success"] is False
+    assert "HARNESS_VERIFIED=true" not in (res.get("output") or "")