Bartok9 · Bartok9 · May 30, 2026 · May 30, 2026 · cursor · May 30, 2026
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
@@ -1403,14 +1403,37 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
 
     # Resolve API keys using the canonical precedence: os.environ first, then
     # ~/.hermes/.env (see hermes_cli.config.get_env_value). A live shell export
-    # must win over a stale .env value so an operator can override a persisted
-    # key for the current process without editing the file. This matches the
-    # auth-side resolver (_resolve_api_key_provider_secret) so the pool and the
-    # auth path never disagree about which key is active.
+    # wins over a .env value so an operator can override a persisted key for
+    # the current process without editing the file. This matches the auth-side
+    # resolver (_resolve_api_key_provider_secret) so the pool and the auth
+    # path never disagree about which key is active.
     def _get_env_value_stripped(key: str) -> str:
         val = get_env_value(key) or ""
         return val.strip()
 
+    # OpenRouter-only resolver: ~/.hermes/.env wins over os.environ.
+    #
+    # Regression #18254 was specific to OpenRouter key rotation: after the
+    # user rotated their OpenRouter key and wrote the fresh value into
+    # ~/.hermes/.env, a *stale* OPENROUTER_API_KEY still exported in the
+    # parent shell (or inherited into a long-lived process's os.environ)
+    # shadowed it. Seeding the pool from the stale os.environ value silently
+    # persisted it into auth.json and produced unrecoverable 401s. For this
+    # one provider the freshly-edited .env file is authoritative.
+    #
+    # When ~/.hermes/.env does NOT define the key (Docker / K8s / systemd
+    # deployments that inject the key at runtime), fall back to os.environ so
+    # production runtime-injected keys still seed the pool.
+    def _get_dotenv_first_stripped(key: str) -> str:
+        try:
+            from hermes_cli.config import load_env as _load_env
+            dotenv_val = (_load_env() or {}).get(key)
+        except Exception:
+            dotenv_val = None
+        if dotenv_val is not None and str(dotenv_val).strip():
+            return str(dotenv_val).strip()
+        return (os.environ.get(key) or "").strip()
+
     # Honour user suppression — `hermes auth remove <provider> <N>` for an
     # env-seeded credential marks the env:<VAR> source as suppressed so it
     # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
@@ -1422,8 +1445,9 @@ def _get_env_value_stripped(key: str) -> str:
         def _is_source_suppressed(_p, _s):  # type: ignore[misc]
             return False
     if provider == "openrouter":
-        # os.environ wins, then ~/.hermes/.env (see _get_env_value_stripped).
-        token = _get_env_value_stripped("OPENROUTER_API_KEY")
+        # ~/.hermes/.env wins over os.environ for OpenRouter (regression
+        # #18254 — see _get_dotenv_first_stripped).
+        token = _get_dotenv_first_stripped("OPENROUTER_API_KEY")
         if token:
             source = "env:OPENROUTER_API_KEY"
             if _is_source_suppressed(provider, source):

diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py
@@ -163,7 +163,16 @@ def _get_session_db() -> Optional[Any]:
     if cached is not None:
         return cached
     try:
-        db = SessionDB()
+        # Resolve the DB path from the *live* HERMES_HOME rather than relying
+        # on SessionDB's import-time DEFAULT_DB_PATH. hermes_state computes
+        # DEFAULT_DB_PATH = get_hermes_home() / "state.db" at module import,
+        # so a bare SessionDB() is permanently pinned to whatever HERMES_HOME
+        # was set when hermes_state first imported. That made the GoalManager
+        # ignore profile/HERMES_HOME switches at runtime and (in tests) leak
+        # goal state across cases that point HERMES_HOME at fresh temp dirs.
+        # Passing the path explicitly keeps the per-home cache correct.
+        from pathlib import Path as _Path
+        db = SessionDB(db_path=_Path(home) / "state.db")
     except Exception as exc:  # pragma: no cover
         logger.debug("GoalManager: SessionDB() raised (%s)", exc)
         return None

diff --git a/plugins/platforms/google_chat/adapter.py b/plugins/platforms/google_chat/adapter.py
@@ -2856,18 +2856,26 @@ def _validate_config(config: PlatformConfig) -> bool:
 
 
 def _check_for_registry() -> bool:
-    """``check_fn`` for the platform registry pass — stricter than the
-    deps-only ``check_google_chat_requirements``.
+    """``check_fn`` for the platform registry pass.
 
     The registry pass at ``gateway/config.py:_apply_env_overrides`` adds
     the platform to ``cfg.platforms`` whenever ``check_fn`` returns True.
-    For backward compat with the pre-plugin behavior, we ALSO require
-    the minimum Pub/Sub env vars so an unconfigured user doesn't
-    accidentally see ``google_chat`` enabled. This matches the legacy
-    ``if gc_project and gc_subscription`` gate.
+    Enablement is driven purely by the minimum Pub/Sub env vars so an
+    unconfigured user doesn't accidentally see ``google_chat`` enabled.
+    This matches the legacy ``if gc_project and gc_subscription`` gate.
+
+    Note: we intentionally do NOT gate on ``check_google_chat_requirements()``
+    (the optional ``google-cloud-pubsub`` SDK) here. Enablement reflects
+    *configuration intent* — whether the user pointed Hermes at a Pub/Sub
+    project + subscription. The missing-dependency case is surfaced with a
+    clear, actionable error at adapter construction/connect time (see
+    ``GoogleChatAdapter.__init__`` / ``connect``), not by silently dropping a
+    configured platform from ``cfg.platforms``. Gating enablement on an
+    import-time module flag also made the result depend on plugin-loader
+    import ordering (the loader imports the adapter under a separate module
+    name from the one tests patch), producing flaky enablement under
+    parallel test execution.
     """
-    if not check_google_chat_requirements():
-        return False
     project = (
         os.getenv("GOOGLE_CHAT_PROJECT_ID")
         or os.getenv("GOOGLE_CLOUD_PROJECT")

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -427,6 +427,35 @@ def _reset_module_state():
     except Exception:
         pass
 
+    # --- agent.i18n — per-process catalog + language caches ---
+    # i18n._catalog_cache memoizes the flattened locale YAML per language for
+    # the process lifetime. Tests that point i18n._locales_dir at a temp
+    # directory (e.g. test_t_missing_key_in_non_english_falls_back_to_english)
+    # populate the cache with a *fake* catalog. monkeypatch restores
+    # _locales_dir afterward, but the stale fake catalog stays cached — so a
+    # later test's t("gateway.draining", lang="en") returns the raw key
+    # instead of the translated string. reset_language_cache() forces the
+    # real catalogs to reload from the on-disk locales/ dir.
+    try:
+        from agent import i18n as _i18n_mod
+        _i18n_mod.reset_language_cache()
+    except Exception:
+        pass
+
+    # --- tools.skill_provenance — write-origin ContextVar ---
+    # run_agent.process_message() binds this to "assistant_tool" (the
+    # foreground default for a normal AIAgent) on whatever thread runs a
+    # turn, and never resets it. Any test that exercises a turn therefore
+    # leaves the main context's write origin set to "assistant_tool", which
+    # then leaks into copy_context() snapshots taken by later tests (e.g.
+    # test_default_origin_is_foreground). Reset to the fresh-process default
+    # so the ContextVar's own default("foreground") governs untouched tests.
+    try:
+        from tools import skill_provenance as _prov_mod
+        _prov_mod._write_origin.set("foreground")
+    except Exception:
+        pass
+
     # --- tools.file_tools — per-task read history + file-ops cache ---
     # _read_tracker accumulates per-task_id read history for loop detection,
     # capped by _READ_HISTORY_CAP. If entries from a prior test persist, the

diff --git a/tests/gateway/test_agent_cache.py b/tests/gateway/test_agent_cache.py
@@ -967,9 +967,24 @@ def test_concurrent_inserts_settle_at_cap(self, monkeypatch):
         N_THREADS = 8
         PER_THREAD = 20  # 8 * 20 = 160 inserts into a 16-slot cache
 
+        # Pre-build the real AIAgent instances BEFORE the timed/joined
+        # section. The behavior under test is concurrent cache insertion +
+        # _enforce_agent_cache_cap() under the shared lock, not agent
+        # construction. Each AIAgent() costs ~0.5s of GIL-bound I/O, so
+        # building 160 of them inside worker threads pushed the per-thread
+        # join window past its 30s timeout on shared CI runners (the cause
+        # of the spurious "possible deadlock?" timeout). Constructing them
+        # up front keeps the concurrency stress identical while removing the
+        # unrelated construction cost from the join window.
+        prebuilt = {
+            (tid, j): self._real_agent()
+            for tid in range(N_THREADS)
+            for j in range(PER_THREAD)
+        }
+
         def worker(tid: int):
             for j in range(PER_THREAD):
-                a = self._real_agent()
+                a = prebuilt[(tid, j)]
                 key = f"t{tid}-s{j}"
                 with runner._agent_cache_lock:
                     runner._agent_cache[key] = (a, "sig")