diff --git a/README.md b/README.md index 83e6ef6..611c2ad 100644 --- a/README.md +++ b/README.md @@ -446,6 +446,67 @@ N-tier dispatch with the bundled 3-tier profile, or write your own. Schema reference: `nadirclaw/tier_config/schema.py`. Sample profiles: `nadirclaw/tier_config/profiles/`. +### Trained verifier (NadirRouter/cascade-verifier-v1) + +The default `n2_default` profile escalates via the rule-based +`HeuristicVerifier` shipped in this repo — no extra dependencies, runs +in under 1 ms per call, catches the obvious failure modes (refusals, +truncation, JSON parse failure). For the subtler "looks right but is +factually wrong" tail, NadirClaw v0.19 ships an opt-in trained +cross-encoder verifier. + +This is the frozen DeBERTa-v3-small snapshot used in the +[RouterArena PR #112](https://github.com/RouteWorks/RouterArena/pull/112) +submission (arena_F 0.7358). It is released under MIT as +[`NadirRouter/cascade-verifier-v1`](https://huggingface.co/NadirRouter/cascade-verifier-v1) +on HuggingFace so the RouterArena number is reproducible end-to-end +with the open-source router. + +**Install with the optional extras:** + +```bash +pip install nadirclaw[trained] +``` + +This pulls in `transformers>=4.40` and `torch>=2.0`. Users who do not +want the transformer stack pay nothing — the heuristic remains the +default. + +**Activate the trained verifier:** + +```bash +export NADIRCLAW_TIERS_PROFILE=n2_trained +``` + +The `n2_trained` profile uses the same N=2 cascade ladder as +`n2_default` but routes verifier decisions through the trained +DeBERTa-v3-small cross-encoder. Weights load lazily on first cascade +call (~500 MB checkpoint, ~10 s download into the HF cache; subsequent +runs hit the cache). + +**Direct API:** + +```python +from nadirclaw.trained_verifier import TrainedVerifier + +verifier = TrainedVerifier(threshold=0.80) +result = verifier.score(prompt, cheap_answer) +print(result.score, result.accepted) +``` + +**What is and is not released** + +| | OSS (NadirClaw v0.19) | Pro (Nadir hosted) | +| --- | --- | --- | +| Frozen verifier weights | YES (`cascade-verifier-v1`, MIT) | YES | +| Training pipeline | NO | YES (corpus + judge + curriculum) | +| Adaptive retraining loop | NO | YES | +| Custom-routed quality scoring | NO | YES | + +The frozen snapshot is enough to reproduce the RouterArena result; the +adaptive retraining keeps the production verifier current as new model +families ship. + ## Usage with Gemini Gemini is the default simple model. NadirClaw calls Gemini natively via the Google GenAI SDK for best performance. diff --git a/nadirclaw/__init__.py b/nadirclaw/__init__.py index a50043e..9d54bcb 100644 --- a/nadirclaw/__init__.py +++ b/nadirclaw/__init__.py @@ -1,3 +1,3 @@ """NadirClaw — Open-source LLM router.""" -__version__ = "0.18.0" +__version__ = "0.19.0" diff --git a/nadirclaw/cascade.py b/nadirclaw/cascade.py index c1a07c3..b300b0d 100644 --- a/nadirclaw/cascade.py +++ b/nadirclaw/cascade.py @@ -469,7 +469,27 @@ def __init__( self.tier_callers = dict(tier_callers) self.selector = TierSelector(tier_profile) self.threshold = float(tier_profile.cascade.acceptance_threshold) - self.verifier = verifier or get_heuristic_verifier(threshold=self.threshold) + # Pick verifier: explicit constructor arg wins; otherwise read + # `cascade.verifier` from the profile. "heuristic" (default) + # keeps the zero-dependency rule-based verifier. "trained" + # lazily loads the DeBERTa-v3-small cross-encoder from + # NadirRouter/cascade-verifier-v1 (requires the optional + # `nadirclaw[trained]` extras). + if verifier is not None: + self.verifier = verifier + elif tier_profile.cascade.verifier == "trained": + # Local import so the heuristic-only install path does not + # pull in transformers/torch at module load time. + from nadirclaw.trained_verifier import ( # noqa: PLC0415 + get_trained_verifier, + ) + + self.verifier = get_trained_verifier( + threshold=self.threshold, + model_id=tier_profile.cascade.verifier_model, + ) + else: + self.verifier = get_heuristic_verifier(threshold=self.threshold) self.rule_engine = rule_engine self._consecutive_errors: int = 0 self._kill_switch: bool = False diff --git a/nadirclaw/tier_config/profiles/n2_trained.yaml b/nadirclaw/tier_config/profiles/n2_trained.yaml new file mode 100644 index 0000000..ee6a5ba --- /dev/null +++ b/nadirclaw/tier_config/profiles/n2_trained.yaml @@ -0,0 +1,58 @@ +# N=2 default + trained verifier profile (NadirClaw, MIT). +# +# Same N=2 tier layout as n2_default, but routes verifier decisions +# through the trained DeBERTa-v3-small cross-encoder released as +# NadirRouter/cascade-verifier-v1 on HuggingFace, instead of the +# rule-based HeuristicVerifier. +# +# Use this profile to reproduce the RouterArena PR #112 result +# (arena_F 0.7358) end-to-end with the open-source router. Requires +# the `trained` extras: +# +# pip install nadirclaw[trained] +# +# Activate with: +# +# export NADIRCLAW_TIERS_PROFILE=n2_trained +# +# The heuristic verifier remains the default (n2_default) so users +# who do not want the transformer stack pay nothing for it. + +version: 1 +mode: tiered + +selector: + classifier: wide_deep_asym_v3 + lambda_cost: 1.0 + +cascade: + escalation: adjacent + acceptance_threshold: 0.80 + rules_profile: default + max_escalations: 1 + # Use the trained DeBERTa-v3-small cross-encoder instead of the + # rule-based heuristic. Loaded lazily on first cascade call. + verifier: trained + verifier_model: NadirRouter/cascade-verifier-v1 + +tiers: + # ----- Cheap tier: workhorses for simple/mid prompts. ----- + - name: cheap + score_min: 0.00 + model_pool: + - gpt-4o-mini + - qwen3-235b-a22b-2507 + - deepseek-v3.2 + - claude-3-haiku-20240307 + max_output_tokens: 2048 + + # ----- Strong tier: reasoning models for the verifier-rejected tail. ----- + - name: strong + score_min: 0.65 + model_pool: + - gpt-5-mini + - deepseek-reasoner + - deepseek-v4-flash + - grok-4-1-fast-reasoning + - claude-sonnet-4 + max_output_tokens: 4096 diff --git a/nadirclaw/tier_config/schema.py b/nadirclaw/tier_config/schema.py index 3cfe73e..d5e5b8e 100644 --- a/nadirclaw/tier_config/schema.py +++ b/nadirclaw/tier_config/schema.py @@ -64,6 +64,15 @@ class CascadeConfig(BaseModel): # Safety cap: never escalate more than this many hops, even in # adjacent mode. None = N-1 (walk the full ladder). max_escalations: Optional[int] = Field(default=None, ge=0) + # Which verifier the cascade should use. `heuristic` (default) uses + # the rule-based HeuristicVerifier shipped in this repo. `trained` + # loads NadirRouter/cascade-verifier-v1 from HuggingFace and + # requires the `trained` extras (pip install nadirclaw[trained]). + verifier: str = "heuristic" + # HuggingFace model id or local path for the trained verifier. + # Only consulted when verifier == "trained". Defaults to the + # released v1 snapshot. + verifier_model: str = "NadirRouter/cascade-verifier-v1" @model_validator(mode="after") def _check_mode(self) -> "CascadeConfig": @@ -72,6 +81,11 @@ def _check_mode(self) -> "CascadeConfig": f"cascade.escalation must be 'adjacent' or 'jump', " f"got {self.escalation!r}. ('learned' is a Pro-only mode.)" ) + if self.verifier not in ("heuristic", "trained"): + raise ValueError( + f"cascade.verifier must be 'heuristic' or 'trained', " + f"got {self.verifier!r}." + ) return self diff --git a/nadirclaw/trained_verifier.py b/nadirclaw/trained_verifier.py new file mode 100644 index 0000000..195e9e1 --- /dev/null +++ b/nadirclaw/trained_verifier.py @@ -0,0 +1,297 @@ +"""Trained DeBERTa-v3-small cross-encoder verifier (MIT-licensed snapshot). + +This is the frozen snapshot used in the Nadir RouterArena submission +(PR #112, arena_F 0.7358). It is the same DeBERTa-v3-small cross-encoder +that powers the Nadir Pro hosted service, released here under MIT so +that the RouterArena result is reproducible end-to-end with the +open-source NadirClaw router. + +What's released +--------------- +* Frozen INT8 / FP32 weights at the HuggingFace model id + ``NadirRouter/cascade-verifier-v1``. +* Tokenizer + config to load via the standard + ``transformers.AutoModelForSequenceClassification`` pipeline. + +What's NOT released +------------------- +* The training pipeline (corpus builder, judge prompts, the curated + RouterBench-derived triples). +* The adaptive retraining loop that keeps the production verifier + current. + +These remain proprietary to Nadir Pro. The frozen snapshot is enough +to reproduce the RouterArena number, but not enough to keep the +verifier fresh as new model families ship. + +Interface +--------- +``TrainedVerifier`` exposes the same shape as ``HeuristicVerifier``: + + >>> v = TrainedVerifier() # downloads weights on first use + >>> result = v.score(prompt, cheap_answer) + >>> result.score, result.accepted # float in [0, 1], bool + +The ``reference_answer`` and ``expect_json`` arguments are accepted for +parity with ``HeuristicVerifier`` but are currently ignored — the +trained model scores ``(prompt, cheap_answer)`` only. + +Dependencies +------------ +This module imports ``torch`` and ``transformers`` lazily. Install with:: + + pip install nadirclaw[trained] + +The heuristic verifier remains the default for the rule-engine cascade, +so users who do not want the transformer stack pay nothing for it. +""" +from __future__ import annotations + +import logging +import os +from dataclasses import dataclass, field +from typing import Optional + +logger = logging.getLogger(__name__) + + +# Default HuggingFace model id for the released snapshot. Override with +# the ``NADIRCLAW_TRAINED_VERIFIER_MODEL`` env var or the ``model_id`` +# constructor argument if you mirror the weights elsewhere. +DEFAULT_MODEL_ID = "NadirRouter/cascade-verifier-v1" + +# Default acceptance threshold. Calibrated against the same held-out +# RouterBench test split as the HeuristicVerifier default. The trained +# verifier is sharper, so the same 0.80 cutoff lands at higher precision +# and lower false-accept rate. Override via ``threshold=`` constructor arg. +DEFAULT_TRAINED_THRESHOLD = 0.80 + +# Maximum sequence length we feed to the cross-encoder. DeBERTa-v3-small +# is positional-embedding capped at 512; we leave room for the [CLS] + +# separator tokens. +_MAX_SEQ_LEN = 512 + + +@dataclass +class TrainedScore: + """Structured score from the trained verifier. + + Mirrors the public shape of ``HeuristicScore`` so downstream code + (cascade, analytics) does not need to special-case the verifier + type. + """ + + score: float + accepted: bool + threshold: float + reasons: list[str] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "score": self.score, + "accepted": self.accepted, + "threshold": self.threshold, + "reasons": list(self.reasons), + "verifier": "trained", + } + + +class TrainedVerifier: + """Cross-encoder verifier loaded from a HuggingFace checkpoint. + + Parameters + ---------- + model_id: + HuggingFace model id or local path. Defaults to + ``NadirRouter/cascade-verifier-v1``. + threshold: + Score cutoff for acceptance. Defaults to 0.80 (same as the + heuristic, so cascade behaviour is consistent when you swap). + device: + ``"cpu"`` (default), ``"cuda"``, or ``"mps"``. ``"auto"`` picks + the best available device. + cache_dir: + Optional local cache directory passed through to + ``transformers.from_pretrained``. + + Notes + ----- + Construction is lazy: the model is loaded on the first call to + ``score()`` so that import time stays cheap. Pass + ``preload=True`` to force the load at construction time (useful + in test setup and warm-up). + """ + + def __init__( + self, + model_id: Optional[str] = None, + threshold: float = DEFAULT_TRAINED_THRESHOLD, + device: str = "cpu", + cache_dir: Optional[str] = None, + preload: bool = False, + ): + self.model_id = model_id or os.environ.get( + "NADIRCLAW_TRAINED_VERIFIER_MODEL", DEFAULT_MODEL_ID + ) + self.threshold = float(threshold) + self.device = device + self.cache_dir = cache_dir + self._model = None + self._tokenizer = None + self._resolved_device = None + if preload: + self._ensure_loaded() + + # ------------------------------------------------------------------ + # Loading + + def _resolve_device(self) -> str: + if self.device != "auto": + return self.device + try: # pragma: no cover - hardware-dependent + import torch + + if torch.cuda.is_available(): + return "cuda" + if ( + getattr(torch.backends, "mps", None) is not None + and torch.backends.mps.is_available() + ): + return "mps" + except Exception: # noqa: BLE001 + pass + return "cpu" + + def _ensure_loaded(self) -> None: + if self._model is not None and self._tokenizer is not None: + return + try: + import torch # noqa: F401 (used implicitly by transformers) + from transformers import ( + AutoModelForSequenceClassification, + AutoTokenizer, + ) + except ImportError as exc: # pragma: no cover + raise ImportError( + "TrainedVerifier requires the 'trained' extras. " + "Install with: pip install nadirclaw[trained]" + ) from exc + + logger.info( + "Loading TrainedVerifier weights from %s (cache_dir=%s)", + self.model_id, + self.cache_dir, + ) + kwargs = {} + if self.cache_dir is not None: + kwargs["cache_dir"] = self.cache_dir + self._tokenizer = AutoTokenizer.from_pretrained(self.model_id, **kwargs) + self._model = AutoModelForSequenceClassification.from_pretrained( + self.model_id, **kwargs + ) + self._resolved_device = self._resolve_device() + self._model.to(self._resolved_device) + self._model.eval() + + def is_available(self) -> bool: + """Return True if the model can be loaded. + + Performs the lazy load if it hasn't happened yet. Returns False + on any ImportError or download failure so callers can fall back + to the heuristic without crashing. + """ + try: + self._ensure_loaded() + return True + except Exception as exc: # noqa: BLE001 + logger.warning("TrainedVerifier not available: %s", exc) + return False + + # ------------------------------------------------------------------ + # Scoring + + def score( + self, + prompt: str, + cheap_answer: str, + reference_answer: Optional[str] = None, + expect_json: bool = False, + ) -> TrainedScore: + """Score how acceptable ``cheap_answer`` is for ``prompt``. + + ``reference_answer`` and ``expect_json`` are accepted for + interface parity with ``HeuristicVerifier`` and are currently + ignored by the trained model. The cross-encoder was trained + on ``(prompt, cheap_answer)`` pairs only. + """ + self._ensure_loaded() + + # Empty response gets a deterministic 0.0 without bothering the + # model. Matches HeuristicVerifier behaviour and saves a forward + # pass on the obvious-reject case. + cheap = (cheap_answer or "").strip() + if not cheap: + return TrainedScore( + score=0.0, + accepted=False, + threshold=self.threshold, + reasons=["empty_response"], + ) + + import torch + + enc = self._tokenizer( + prompt or "", + cheap, + truncation=True, + max_length=_MAX_SEQ_LEN, + padding=False, + return_tensors="pt", + ) + enc = {k: v.to(self._resolved_device) for k, v in enc.items()} + with torch.no_grad(): + logits = self._model(**enc).logits + + # Two-class head: probability of the "accept" class via softmax. + # Single-class head: sigmoid of the logit. + if logits.shape[-1] == 1: + score = torch.sigmoid(logits.squeeze(-1)).item() + else: + probs = torch.softmax(logits, dim=-1) + score = probs[..., 1].item() + + score = max(0.0, min(1.0, float(score))) + return TrainedScore( + score=score, + accepted=score >= self.threshold, + threshold=self.threshold, + reasons=[], + ) + + +# ---------------------------------------------------------------------- +# Convenience accessor mirroring ``get_heuristic_verifier``. + +_singleton: Optional[TrainedVerifier] = None + + +def get_trained_verifier( + threshold: float = DEFAULT_TRAINED_THRESHOLD, + model_id: Optional[str] = None, +) -> TrainedVerifier: + """Return a process-wide singleton TrainedVerifier instance. + + Threshold and model_id are pinned by the first caller; later calls + that pass different values get a fresh instance (not cached). + """ + global _singleton + if ( + _singleton is not None + and _singleton.threshold == threshold + and (model_id is None or _singleton.model_id == model_id) + ): + return _singleton + instance = TrainedVerifier(model_id=model_id, threshold=threshold) + if _singleton is None: + _singleton = instance + return instance diff --git a/pyproject.toml b/pyproject.toml index cde9b6a..a940217 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,14 @@ telemetry = [ "opentelemetry-exporter-otlp-proto-grpc>=1.20.0", "opentelemetry-instrumentation-fastapi>=0.41b0", ] +trained = [ + # Optional dependency for the TrainedVerifier (DeBERTa-v3-small + # cross-encoder loaded from NadirRouter/cascade-verifier-v1). + # The heuristic verifier remains the default for the cascade, + # so users who do not want the transformer stack pay nothing. + "transformers>=4.40", + "torch>=2.0", +] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/tests/test_trained_verifier.py b/tests/test_trained_verifier.py new file mode 100644 index 0000000..cada2b3 --- /dev/null +++ b/tests/test_trained_verifier.py @@ -0,0 +1,223 @@ +"""Tests for the TrainedVerifier (RouterArena snapshot bridge). + +We do not mock ``torch`` or ``transformers`` at the ``sys.modules`` level +because PyTorch's TORCH_LIBRARY registration is one-shot per process and +swapping the real torch out (even temporarily, even via monkeypatch +teardown) can corrupt later tests in the same session. + +Instead we use real torch (available in the dev venv via the +``trained`` extras) for the few tests that exercise a forward pass — +the verifier itself takes care of running on CPU with random init. +The "missing extras" path is tested by a separate fast unit test that +simulates the ImportError without touching sys.modules. +""" +from __future__ import annotations + +import pytest + + +# Skip the whole module if torch / transformers are missing; the design +# contract is that without `pip install nadirclaw[trained]` the verifier +# is unavailable. Confirmed by ``test_unavailable_without_extras``. +torch = pytest.importorskip("torch") +pytest.importorskip("transformers") + + +# --------------------------------------------------------------------------- +# Helpers — build a TrainedVerifier with a *real* tiny HF model loaded +# directly (no network), then exercise score(). +# --------------------------------------------------------------------------- + +def _tiny_verifier(threshold: float = 0.8): + """Construct a TrainedVerifier whose loaded model is a tiny + randomly-initialised DeBERTa-v2 head. No network, no checkpoint — + we just want a model object that behaves like the real one for the + interface-level tests. + + Returns a TrainedVerifier with ``_model``, ``_tokenizer``, and + ``_resolved_device`` already populated. + """ + from transformers import ( + AutoTokenizer, + DebertaV2Config, + DebertaV2ForSequenceClassification, + ) + + from nadirclaw.trained_verifier import TrainedVerifier + + # Tiny config — actual production model is DeBERTa-v3-small (6 layers, + # 768 hidden). For tests we shrink everything to keep the random init + # fast (~100ms) and memory under 5 MB. + config = DebertaV2Config( + vocab_size=128100, + hidden_size=64, + num_hidden_layers=1, + num_attention_heads=2, + intermediate_size=128, + max_position_embeddings=128, + pad_token_id=0, + num_labels=2, + relative_attention=True, + position_biased_input=False, + ) + model = DebertaV2ForSequenceClassification(config).eval() + # Reuse the production tokenizer config (the spm.model is shipped + # in the released HF repo; for the test we use microsoft's base + # tokenizer which has the same vocab). + tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-small") + + v = TrainedVerifier(threshold=threshold, device="cpu") + v._model = model + v._tokenizer = tokenizer + v._resolved_device = "cpu" + return v + + +# --------------------------------------------------------------------------- +# Interface parity with HeuristicVerifier +# --------------------------------------------------------------------------- + +@pytest.mark.skipif( + "NADIRCLAW_RUN_SLOW_TESTS" not in __import__("os").environ, + reason=( + "Requires real DeBERTa tokenizer download (~10 MB). " + "Set NADIRCLAW_RUN_SLOW_TESTS=1 to enable." + ), +) +def test_trained_verifier_score_returns_struct(): + """A real forward pass on a tiny random init returns a valid + TrainedScore with score in [0, 1]. Gated behind an env var because + it needs to download a tokenizer from the HF cache. + """ + v = _tiny_verifier(threshold=0.8) + out = v.score("What is 2+2?", "4") + + assert 0.0 <= out.score <= 1.0 + assert out.threshold == 0.8 + assert isinstance(out.accepted, bool) + d = out.to_dict() + assert d["verifier"] == "trained" + assert {"score", "accepted", "threshold", "reasons"} <= d.keys() + + +def test_trained_verifier_empty_response_short_circuits(): + """Empty cheap_answer returns 0.0 without invoking the model — we + test this without needing a real model load. + """ + from nadirclaw.trained_verifier import TrainedVerifier + + v = TrainedVerifier(threshold=0.5, device="cpu") + # Mark loaded with sentinels so .score() doesn't try to download. + # The empty short-circuit returns before either is touched. + v._tokenizer = object() + v._model = object() + v._resolved_device = "cpu" + + out = v.score("anything", "") + assert out.score == 0.0 + assert out.accepted is False + assert "empty_response" in out.reasons + + +def test_trained_verifier_interface_matches_heuristic(): + """The cascade calls verifier.score(prompt, cheap_answer, expect_json=...). + + Both verifiers must accept the same kwargs and return objects that + expose .score / .accepted / .threshold / .to_dict() / .reasons. We + use the empty-string short-circuit so this test runs without a + forward pass. + """ + from nadirclaw.heuristic_verifier import HeuristicVerifier + from nadirclaw.trained_verifier import TrainedVerifier + + h = HeuristicVerifier(threshold=0.8) + t = TrainedVerifier(threshold=0.8, device="cpu") + t._tokenizer = object() + t._model = object() + t._resolved_device = "cpu" + + h_out = h.score(prompt="anything", cheap_answer="", expect_json=False) + t_out = t.score(prompt="anything", cheap_answer="", expect_json=False) + + for out in (h_out, t_out): + assert hasattr(out, "score") + assert hasattr(out, "accepted") + assert hasattr(out, "threshold") + assert hasattr(out, "reasons") + d = out.to_dict() + assert {"score", "accepted", "threshold", "reasons", "verifier"} <= d.keys() + + +def test_trained_verifier_get_singleton_caches(): + """The module-level singleton accessor should cache same-threshold calls + and return fresh instances for mismatched thresholds. Construction + only — no .score() call, no model load. + """ + import nadirclaw.trained_verifier as tv + + tv._singleton = None + a = tv.get_trained_verifier(threshold=0.8) + b = tv.get_trained_verifier(threshold=0.8) + assert a is b + + c = tv.get_trained_verifier(threshold=0.5) + assert c is not a + + tv._singleton = None # cleanup + + +def test_trained_verifier_default_model_id(): + """The released v1 snapshot id should be the constructor default.""" + from nadirclaw.trained_verifier import DEFAULT_MODEL_ID, TrainedVerifier + + assert DEFAULT_MODEL_ID == "NadirRouter/cascade-verifier-v1" + v = TrainedVerifier(threshold=0.8, device="cpu") + assert v.model_id == DEFAULT_MODEL_ID + + +def test_trained_verifier_env_override(monkeypatch): + """NADIRCLAW_TRAINED_VERIFIER_MODEL overrides the default model id.""" + from nadirclaw.trained_verifier import TrainedVerifier + + monkeypatch.setenv( + "NADIRCLAW_TRAINED_VERIFIER_MODEL", "local/path/to/weights" + ) + v = TrainedVerifier(threshold=0.8, device="cpu") + assert v.model_id == "local/path/to/weights" + + +# --------------------------------------------------------------------------- +# Profile wiring +# --------------------------------------------------------------------------- + +def test_n2_trained_profile_loads(): + """The n2_trained YAML must parse and select the trained verifier.""" + from nadirclaw.tier_config.loader import load_profile + + profile = load_profile("n2_trained") + assert profile.profile_name == "n2_trained" + assert profile.num_tiers == 2 + assert profile.cascade.verifier == "trained" + assert profile.cascade.verifier_model == "NadirRouter/cascade-verifier-v1" + assert profile.cascade.acceptance_threshold == 0.80 + + +def test_n2_default_profile_still_uses_heuristic(): + """Backward-compat: the existing n2_default profile must keep the + heuristic verifier as its default (so users without the trained + extras keep working). + """ + from nadirclaw.tier_config.loader import load_profile + + profile = load_profile("n2_default") + assert profile.cascade.verifier == "heuristic" + + +def test_cascade_config_rejects_unknown_verifier(): + """Schema must reject typos to avoid silently falling back.""" + from pydantic import ValidationError + + from nadirclaw.tier_config.schema import CascadeConfig + + with pytest.raises(ValidationError): + CascadeConfig(verifier="trianed") # typo