From bde50d08322b606d6e65e448a28e42827cdf554f Mon Sep 17 00:00:00 2001 From: Will Drach Date: Fri, 5 Jun 2026 07:11:25 -0600 Subject: [PATCH 01/25] feat(api-rs): serve tools + gerard overlay to agent sandboxes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit api-rs sandboxes had no tools and no overlay. Give api-rs-spawned agents the same base + overlay tools and overlay system-prompt the chart already wires for the api-rs pod, using upstream's CLI-shim tool model rather than a sidecar. Upstream direction: tools are shell CLI shims, not an HTTP registry. The agent image's install-tool-shims (services/sandbox/install_tool_shims.py) scans TOOL_DIRS at entrypoint and `uvx`-installs each pyproject [project.scripts] as a CLI; the SYSTEM_PROMPT points agents at those CLIs and `centaur-tools list`. The old `call ` HTTP registry is deprecated to control-plane-only. Tool secrets are already handled upstream: codex_app_server_env_template pushes the tool placeholder creds onto the agent env, iron-control grants the per-sandbox principal the real secrets, and Postgres rides proxied `*_DSN` env from apply_proxy_env. So the agent needs only the tool SOURCES at the right paths — no sidecar, no HMAC sandbox token, no loopback tool server. - tools.rs (replaces tool_server.rs): a `tools-bootstrap` init container copies /app/tools out of the shared centaur-api image into an emptyDir mounted at /app/tools in the agent, and an `overlay-bootstrap` init container copies the org overlay tree into overlay-root mounted at overlay.mountPath (the same path the api-rs Deployment uses) and stages the overlay's SYSTEM_PROMPT.md as $HOME/AGENTS_OVERLAY.md, which the sandbox entrypoint appends to the base prompt. TOOL_DIRS is set on the agent env to /app/tools (or /app/tools:/tools with the overlay) — identical to the value the api-rs pod computes for its own tool discovery, set deterministically in the spec builder rather than via passthrough env. - lib.rs: build_agent_sandbox layers the tools/overlay env over spec.env, mounts the bootstrapped sources read-only into the agent, and appends the tools-bootstrap + overlay-bootstrap init containers and their volumes. No sidecar container, no token minting. - args.rs: a minimal ToolsArgs (source image/pull-policy, reusing the KUBERNETES_TOOL_SERVER_IMAGE* env the chart sets from the shared api image) and OverlayArgs (image/pull-policy/source-path/mount-path) wired into AgentSandboxConfig. Explicit clap arg ids avoid id collisions with the other flattened arg structs. - chart apirs.yaml: render the tools source image (api.image.*, gated on toolServer.enabled) and overlay (overlay.*) onto the api-rs env, replacing the KUBERNETES_TOOL_SERVER_* sidecar block. Gone vs the sidecar port: tool_server.rs, the sbx1 HMAC token minting and its SANDBOX_SIGNING_KEY requirement, CENTAUR_TOOLS_URL, the sidecar pg-DSN/proxy-env collection, and the hmac/base64/sha2 dependency additions (nothing else in the agent-k8s crate uses them). Warm-pool sandboxes route through the same build_agent_sandbox path, so they get the tools/overlay init containers and volumes for free. Co-Authored-By: Claude Opus 4.8 (1M context) --- contrib/chart/templates/apirs.yaml | 22 ++ .../crates/centaur-api-server/src/args.rs | 112 ++++++- .../centaur-sandbox-agent-k8s/src/lib.rs | 75 ++++- .../centaur-sandbox-agent-k8s/src/tools.rs | 308 ++++++++++++++++++ 4 files changed, 513 insertions(+), 4 deletions(-) create mode 100644 services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs diff --git a/contrib/chart/templates/apirs.yaml b/contrib/chart/templates/apirs.yaml index 71acb8a8..99fd6686 100644 --- a/contrib/chart/templates/apirs.yaml +++ b/contrib/chart/templates/apirs.yaml @@ -205,6 +205,28 @@ spec: name: {{ include "centaur.secretEnvName" . }} key: {{ printf "%sIRON_CONTROL_INITIAL_API_KEY" .Values.secretManager.envPrefix }} {{- end }} +{{- if .Values.toolServer.enabled }} + # Base tools source image (the shared api image). A tools-bootstrap + # init container copies its /app/tools into each sandbox so the agent + # installs the same tool CLI shims api-rs discovers, at the same path. + - name: KUBERNETES_TOOL_SERVER_IMAGE + value: {{ printf "%s:%s" .Values.api.image.repository .Values.api.image.tag | quote }} + - name: KUBERNETES_TOOL_SERVER_IMAGE_PULL_POLICY + value: {{ .Values.api.image.pullPolicy | quote }} +{{- end }} +{{- if .Values.overlay.image.repository }} + # Org overlay (tools/workflows/skills/system-prompt) mounted into + # api-rs sandboxes at overlay.mountPath via an overlay-bootstrap init + # container — the same path api-rs's own TOOL_DIRS points at. + - name: CENTAUR_OVERLAY_IMAGE + value: {{ printf "%s:%s" .Values.overlay.image.repository .Values.overlay.image.tag | quote }} + - name: CENTAUR_OVERLAY_IMAGE_PULL_POLICY + value: {{ .Values.overlay.image.pullPolicy | quote }} + - name: CENTAUR_OVERLAY_IMAGE_SOURCE_PATH + value: {{ .Values.overlay.image.sourcePath | quote }} + - name: CENTAUR_OVERLAY_MOUNT_PATH + value: {{ .Values.overlay.mountPath | quote }} +{{- end }} {{- range $name, $value := .Values.apiRs.extraEnv }} - name: {{ $name }} value: {{ $value | quote }} diff --git a/services/api-rs/crates/centaur-api-server/src/args.rs b/services/api-rs/crates/centaur-api-server/src/args.rs index bd32d4c8..8ff6c44c 100644 --- a/services/api-rs/crates/centaur-api-server/src/args.rs +++ b/services/api-rs/crates/centaur-api-server/src/args.rs @@ -15,7 +15,8 @@ use centaur_iron_proxy::{ ProxyFragment, SourceKind, SourcePolicy, harness_auth_fragment, infra_fragment, }; use centaur_sandbox_agent_k8s::{ - AgentSandboxBackend, AgentSandboxConfig, IronControlSettings, IronProxyConfig, + AgentSandboxBackend, AgentSandboxConfig, IronControlSettings, IronProxyConfig, OverlayConfig, + ToolsConfig, }; use centaur_sandbox_core::{Mount, MountKind}; use centaur_sandbox_local::LocalSandboxBackend; @@ -196,6 +197,10 @@ struct SandboxArgs { iron_proxy: IronProxyArgs, #[command(flatten)] iron_control: IronControlArgs, + #[command(flatten)] + tools_source: ToolsArgs, + #[command(flatten)] + overlay: OverlayArgs, } impl SandboxArgs { @@ -442,6 +447,8 @@ impl TryFrom<&SandboxArgs> for AgentSandboxConfig { proxy.fragments = fragments; } config.iron_control = args.iron_control.settings(); + config.tools = args.tools_source.to_config(); + config.overlay = args.overlay.to_config(); // iron-control is the only proxy mode: a per-sandbox proxy syncs its // secrets from the control plane, so configuring iron-proxy without // iron-control would produce a non-functional proxy. Fail fast. @@ -455,6 +462,85 @@ impl TryFrom<&SandboxArgs> for AgentSandboxConfig { } } +#[derive(Debug, ClapArgs)] +struct ToolsArgs { + // Explicit `id`s avoid clap arg-id collisions with the other flattened + // structs (IronProxyArgs/OverlayArgs also carry `image`/`image_pull_policy`). + // The source image (the shared `centaur-api` image) carries `/app/tools`, + // which a `tools-bootstrap` init container copies into each sandbox. + #[arg( + id = "tools_source_image", + long = "kubernetes-tool-server-image", + env = "KUBERNETES_TOOL_SERVER_IMAGE" + )] + image: Option, + #[arg( + id = "tools_source_image_pull_policy", + long = "kubernetes-tool-server-image-pull-policy", + env = "KUBERNETES_TOOL_SERVER_IMAGE_PULL_POLICY" + )] + image_pull_policy: Option, +} + +impl ToolsArgs { + /// `None` when no source image is configured (tools disabled). + fn to_config(&self) -> Option { + let image = clean_optional_value(self.image.as_deref())?; + let mut config = ToolsConfig::new(image); + config.image_pull_policy = self.image_pull_policy.clone(); + Some(config) + } +} + +#[derive(Debug, ClapArgs)] +struct OverlayArgs { + #[arg( + id = "overlay_image", + long = "centaur-overlay-image", + env = "CENTAUR_OVERLAY_IMAGE" + )] + image: Option, + #[arg( + id = "overlay_image_pull_policy", + long = "centaur-overlay-image-pull-policy", + env = "CENTAUR_OVERLAY_IMAGE_PULL_POLICY" + )] + image_pull_policy: Option, + #[arg( + id = "overlay_image_source_path", + long = "centaur-overlay-image-source-path", + env = "CENTAUR_OVERLAY_IMAGE_SOURCE_PATH", + default_value = "/overlay" + )] + source_path: String, + // The overlay tree mounts at the same path the api-rs pod uses + // (`overlay.mountPath`), so the agent's `/tools` matches the path + // api-rs discovered tools at. + #[arg( + id = "overlay_mount_path", + long = "centaur-overlay-mount-path", + env = "CENTAUR_OVERLAY_MOUNT_PATH", + default_value = "/app/overlay/org" + )] + mount_path: String, +} + +impl OverlayArgs { + /// `None` when no overlay image is configured (overlay disabled). + fn to_config(&self) -> Option { + let image = clean_optional_value(self.image.as_deref())?; + let mut config = OverlayConfig::new(image); + config.image_pull_policy = self.image_pull_policy.clone(); + if let Some(path) = clean_optional_value(Some(self.source_path.as_str())) { + config.source_path = path; + } + if let Some(path) = clean_optional_value(Some(self.mount_path.as_str())) { + config.mount_path = path; + } + Some(config) + } +} + #[derive(Debug, ClapArgs)] struct IronProxyArgs { #[arg( @@ -881,6 +967,30 @@ mod tests { assert!(config.iron_proxy.is_none()); } + #[test] + fn tools_and_overlay_config_read_from_flags() { + let args = Args::try_parse_from([ + "centaur-api-server", + "--database-url", + "postgres://postgres:postgres@localhost/centaur", + "--session-sandbox-backend", + "agent-k8s", + "--kubernetes-sandbox-iron-proxy-mode", + "disabled", + "--kubernetes-tool-server-image", + "centaur-api:test", + "--centaur-overlay-image", + "centaur-overlay:test", + ]) + .unwrap(); + let config = AgentSandboxConfig::try_from(&args.sandbox).unwrap(); + let tools = config.tools.expect("tools should be Some"); + assert_eq!(tools.image, "centaur-api:test"); + let overlay = config.overlay.expect("overlay should be Some"); + assert_eq!(overlay.image, "centaur-overlay:test"); + assert_eq!(overlay.mount_path, "/app/overlay/org"); + } + #[test] fn codex_app_server_env_template_injects_auth_mode_and_placeholder() { let args = Args::try_parse_from([ diff --git a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs index 46b6b594..0ba429bb 100644 --- a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs +++ b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs @@ -25,9 +25,11 @@ use tokio::time::{Instant, sleep}; pub use generated::agents_x_k8s_io as crd; pub use iron_proxy::IronProxyConfig; +pub use tools::{OverlayConfig, ToolsConfig}; pub mod generated; mod iron_proxy; +mod tools; const BACKEND_NAME: &str = "agent-sandbox-k8s"; const DEFAULT_CONTAINER_NAME: &str = "agent"; @@ -52,6 +54,13 @@ pub struct AgentSandboxConfig { pub state_volume: Option, pub iron_proxy: Option, pub iron_control: Option, + /// When set, every sandbox gets a `tools-bootstrap` init container that + /// copies the source image's `/app/tools` into the agent's `/app/tools`, and + /// `TOOL_DIRS` is set so the agent's shim installer finds them. + pub tools: Option, + /// When set, the gerard overlay tree is mounted into the sandbox (tools, + /// workflows, skills, system-prompt overlay). + pub overlay: Option, pub ready_timeout: Duration, } @@ -81,6 +90,8 @@ impl AgentSandboxConfig { state_volume: None, iron_proxy: None, iron_control: None, + tools: None, + overlay: None, ready_timeout: Duration::from_secs(60), } } @@ -99,6 +110,16 @@ impl AgentSandboxConfig { self.iron_control = Some(iron_control); self } + + pub fn tools(mut self, tools: ToolsConfig) -> Self { + self.tools = Some(tools); + self + } + + pub fn overlay(mut self, overlay: OverlayConfig) -> Self { + self.overlay = Some(overlay); + self + } } #[derive(Clone, Debug, Eq, PartialEq)] @@ -450,13 +471,26 @@ fn build_agent_sandbox( "args", (!spec.args.is_empty()).then(|| spec.args.clone()), ); + // Agent container env: spec env + tools/overlay wiring (deduped). `TOOL_DIRS` + // is set deterministically here (not via passthrough) so it always matches + // the value the api-rs pod computes for its own tool discovery. + let mut agent_env: Vec<(String, String)> = spec + .env + .iter() + .map(|env| (env.name.clone(), env.value.clone())) + .collect(); + if config.tools.is_some() { + for (name, value) in tools::agent_env(config.overlay.as_ref()) { + upsert_env(&mut agent_env, &name, value); + } + } insert_optional( &mut container, "env", - (!spec.env.is_empty()).then(|| { - spec.env + (!agent_env.is_empty()).then(|| { + agent_env .iter() - .map(|env| json!({ "name": env.name, "value": env.value })) + .map(|(name, value)| json!({ "name": name, "value": value })) .collect::>() }), ); @@ -474,17 +508,42 @@ fn build_agent_sandbox( volume_mounts.push(iron_proxy::sandbox_ca_volume_mount_json()); volumes.push(iron_proxy::sandbox_ca_volume_json(iron_proxy)); } + // Tool sources (and the overlay tree) are bootstrapped into emptyDirs by init + // containers and mounted read-only into the agent at the same paths `TOOL_DIRS` + // points at. + if config.tools.is_some() { + volume_mounts.extend(tools::agent_volume_mounts_json( + true, + config.overlay.as_ref(), + )); + volumes.extend(tools::volumes_json(true, config.overlay.is_some())); + } insert_optional( &mut container, "volumeMounts", (!volume_mounts.is_empty()).then_some(volume_mounts), ); + // Init containers: tools-bootstrap copies /app/tools out of the source image; + // overlay-bootstrap populates the overlay tree and stages AGENTS_OVERLAY.md. + let mut init_containers: Vec = Vec::new(); + if let Some(tools) = &config.tools { + init_containers.push(tools::tools_init_container_json(tools)); + } + if let Some(overlay) = &config.overlay { + init_containers.push(tools::overlay_init_container_json(overlay)); + } + let mut pod_spec = json!({ "containers": [container], "restartPolicy": "Never", "automountServiceAccountToken": false, }); + insert_optional( + &mut pod_spec, + "initContainers", + (!init_containers.is_empty()).then_some(init_containers), + ); insert_optional( &mut pod_spec, "volumes", @@ -605,6 +664,16 @@ where } } +/// Override-or-append an env entry, so the agent container never emits a +/// duplicate env name when we layer tools/overlay wiring over `spec.env`. +fn upsert_env(env: &mut Vec<(String, String)>, name: &str, value: String) { + if let Some(entry) = env.iter_mut().find(|(existing, _)| existing == name) { + entry.1 = value; + } else { + env.push((name.to_owned(), value)); + } +} + fn next_sandbox_name() -> String { let millis = SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs new file mode 100644 index 00000000..7996a556 --- /dev/null +++ b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs @@ -0,0 +1,308 @@ +//! Tool sources + gerard overlay wiring for agent sandboxes. +//! +//! api-rs serves no `/tools` HTTP registry and the agent's `call ` HTTP +//! registry is deprecated upstream (control-plane-only). Instead the agent +//! image installs each tool as a shell CLI shim at entrypoint +//! (`services/sandbox/install_tool_shims.py`) by scanning `TOOL_DIRS` for +//! `pyproject.toml [project.scripts]` and `uvx`-installing each. Secrets ride +//! proxied env (tool placeholder creds + `*_DSN` from `apply_proxy_env`, +//! granted per-sandbox by iron-control) — none of that lives here. +//! +//! What this module provides is the *sources* the shims install from, mounted +//! INTO the agent container at the SAME paths the api-rs pod's own `TOOL_DIRS` +//! points at (so api-rs's `tool_discovery` and the agent agree on tool paths): +//! +//! * a `tools-bootstrap` init container copies `/app/tools` out of the shared +//! `centaur-api` image into an emptyDir mounted at `/app/tools`; +//! * an `overlay-bootstrap` init container copies the org overlay image's tree +//! into the overlay-root emptyDir, mounted at the overlay `mount_path` (and +//! stages the overlay's `SYSTEM_PROMPT.md` as `$HOME/AGENTS_OVERLAY.md`, which +//! the sandbox entrypoint appends to the base prompt). +//! +//! `TOOL_DIRS` is set explicitly on the agent env to `/app/tools` (or +//! `/app/tools:/tools` when the overlay is configured), matching the +//! value the api-rs Deployment computes for itself. + +use serde_json::{Value, json}; + +const AGENT_UID: i64 = 1001; + +/// Base tools path inside both the api-rs pod and the agent sandbox. +pub(crate) const BASE_TOOL_DIR: &str = "/app/tools"; +/// emptyDir the `tools-bootstrap` init container populates from the source image. +const TOOLS_VOLUME: &str = "tools-root"; + +/// Shared overlay-tree volume (populated by `overlay-bootstrap`). +const OVERLAY_VOLUME: &str = "overlay-root"; + +// The overlay's `SYSTEM_PROMPT.md` is staged by the init container into a tiny +// shared volume and surfaced to the agent at `$HOME/AGENTS_OVERLAY.md`, which the +// sandbox entrypoint appends to the base prompt. +const OVERLAY_PROMPT_VOLUME: &str = "overlay-prompt"; +const OVERLAY_PROMPT_DIR: &str = "/overlay-prompt"; +const OVERLAY_PROMPT_FILE: &str = "AGENTS_OVERLAY.md"; +const AGENT_OVERLAY_PROMPT_PATH: &str = "/home/agent/AGENTS_OVERLAY.md"; +const OVERLAY_SYSTEM_PROMPT_REL: &str = "services/sandbox/SYSTEM_PROMPT.md"; + +/// Source image carrying the base tools at `/app/tools` (the shared +/// `centaur-api` image). When set, every sandbox gets a `tools-bootstrap` init +/// container that copies those tools into the agent's `/app/tools`. +#[derive(Clone, Debug)] +pub struct ToolsConfig { + pub image: String, + pub image_pull_policy: Option, +} + +impl ToolsConfig { + pub fn new(image: impl Into) -> Self { + Self { + image: image.into(), + image_pull_policy: None, + } + } +} + +/// Org overlay image + where its tree lands in the sandbox. `mount_path` matches +/// the api-rs pod's `overlay.mountPath` so the agent's `/tools` is +/// the same path api-rs discovered tools at. +#[derive(Clone, Debug)] +pub struct OverlayConfig { + pub image: String, + pub image_pull_policy: Option, + /// Path the overlay tree is copied from inside the overlay image. + pub source_path: String, + /// Path the overlay tree is mounted at in the sandbox (e.g. `/app/overlay/org`). + pub mount_path: String, +} + +impl OverlayConfig { + pub fn new(image: impl Into) -> Self { + Self { + image: image.into(), + image_pull_policy: None, + source_path: "/overlay".to_owned(), + mount_path: "/app/overlay/org".to_owned(), + } + } + + /// Parent dir the overlay-root emptyDir is mounted at (so the copy lands at + /// `mount_path`). Falls back to `mount_path` itself if it has no parent. + fn overlay_root(&self) -> &str { + match self.mount_path.rfind('/') { + Some(0) | None => &self.mount_path, + Some(idx) => &self.mount_path[..idx], + } + } +} + +fn security_context_json() -> Value { + json!({ + "allowPrivilegeEscalation": false, + "capabilities": {"drop": ["ALL"]}, + "runAsGroup": AGENT_UID, + "runAsNonRoot": true, + "runAsUser": AGENT_UID, + "seccompProfile": {"type": "RuntimeDefault"}, + }) +} + +/// `TOOL_DIRS` for the agent: base tools plus the overlay's tools when present. +/// Matches the value the api-rs Deployment computes for its own `TOOL_DIRS`. +pub(crate) fn agent_tool_dirs(overlay: Option<&OverlayConfig>) -> String { + match overlay { + Some(overlay) => format!("{BASE_TOOL_DIR}:{}/tools", overlay.mount_path), + None => BASE_TOOL_DIR.to_owned(), + } +} + +/// Agent env added for tools/overlay wiring: `TOOL_DIRS` (always) and +/// `CENTAUR_OVERLAY_DIR` (when the overlay is configured). +pub(crate) fn agent_env(overlay: Option<&OverlayConfig>) -> Vec<(String, String)> { + let mut env = vec![("TOOL_DIRS".to_owned(), agent_tool_dirs(overlay))]; + if let Some(overlay) = overlay { + env.push(("CENTAUR_OVERLAY_DIR".to_owned(), overlay.mount_path.clone())); + } + env +} + +/// The `tools-bootstrap` init container: copies `/app/tools` out of the source +/// image into the shared `tools-root` emptyDir mounted at `/app/tools`. +pub(crate) fn tools_init_container_json(tools: &ToolsConfig) -> Value { + let script = format!( + "src=\"{BASE_TOOL_DIR}\"\n\ + target=\"{BASE_TOOL_DIR}\"\n\ + mkdir -p \"$target\"\n\ + cp -R \"$src\"/. \"$target\"/", + ); + let mut container = json!({ + "name": "tools-bootstrap", + "image": tools.image, + "command": ["/bin/sh", "-ec", script], + "volumeMounts": [ + {"name": TOOLS_VOLUME, "mountPath": BASE_TOOL_DIR}, + ], + "securityContext": security_context_json(), + }); + if let Some(policy) = &tools.image_pull_policy { + container["imagePullPolicy"] = json!(policy); + } + container +} + +/// The `overlay-bootstrap` init container: copies the overlay image's tree into +/// the shared `overlay-root` emptyDir, and stages the overlay's +/// `SYSTEM_PROMPT.md` as `AGENTS_OVERLAY.md` in a small shared volume. +pub(crate) fn overlay_init_container_json(overlay: &OverlayConfig) -> Value { + let script = format!( + "src=\"{src}\"\n\ + target=\"{target}\"\n\ + mkdir -p \"$target\"\n\ + cp -R \"$src\"/. \"$target\"/\n\ + if [ -f \"$target/{prompt_rel}\" ]; then\n\ + \x20 cp \"$target/{prompt_rel}\" \"{prompt_dir}/{prompt_file}\"\n\ + else\n\ + \x20 : > \"{prompt_dir}/{prompt_file}\"\n\ + fi", + src = overlay.source_path, + target = overlay.mount_path, + prompt_rel = OVERLAY_SYSTEM_PROMPT_REL, + prompt_dir = OVERLAY_PROMPT_DIR, + prompt_file = OVERLAY_PROMPT_FILE, + ); + let mut container = json!({ + "name": "overlay-bootstrap", + "image": overlay.image, + "command": ["/bin/sh", "-ec", script], + "volumeMounts": [ + {"name": OVERLAY_VOLUME, "mountPath": overlay.overlay_root()}, + {"name": OVERLAY_PROMPT_VOLUME, "mountPath": OVERLAY_PROMPT_DIR}, + ], + "securityContext": security_context_json(), + }); + if let Some(policy) = &overlay.image_pull_policy { + container["imagePullPolicy"] = json!(policy); + } + container +} + +/// Volumes added to the pod for tool sources (and, when enabled, the overlay +/// tree + prompt-handoff volume). +pub(crate) fn volumes_json(tools: bool, overlay: bool) -> Vec { + let mut volumes = Vec::new(); + if tools { + volumes.push(json!({"name": TOOLS_VOLUME, "emptyDir": {}})); + } + if overlay { + volumes.push(json!({"name": OVERLAY_VOLUME, "emptyDir": {}})); + volumes.push(json!({"name": OVERLAY_PROMPT_VOLUME, "emptyDir": {}})); + } + volumes +} + +/// Volume mounts added to the AGENT container: the base tools tree at +/// `/app/tools` and, when the overlay is enabled, the overlay tree plus the +/// staged overlay prompt at `$HOME/AGENTS_OVERLAY.md`. +pub(crate) fn agent_volume_mounts_json(tools: bool, overlay: Option<&OverlayConfig>) -> Vec { + let mut mounts = Vec::new(); + if tools { + mounts.push(json!({"name": TOOLS_VOLUME, "mountPath": BASE_TOOL_DIR, "readOnly": true})); + } + if let Some(overlay) = overlay { + mounts.push(json!({ + "name": OVERLAY_VOLUME, + "mountPath": overlay.overlay_root(), + "readOnly": true, + })); + mounts.push(json!({ + "name": OVERLAY_PROMPT_VOLUME, + "mountPath": AGENT_OVERLAY_PROMPT_PATH, + "subPath": OVERLAY_PROMPT_FILE, + "readOnly": true, + })); + } + mounts +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tool_dirs_match_api_rs_pod_value() { + assert_eq!(agent_tool_dirs(None), "/app/tools"); + let overlay = OverlayConfig::new("centaur-overlay:test"); + assert_eq!( + agent_tool_dirs(Some(&overlay)), + "/app/tools:/app/overlay/org/tools" + ); + } + + #[test] + fn agent_env_sets_tool_dirs_and_overlay_dir() { + let env = agent_env(None); + assert_eq!(env, vec![("TOOL_DIRS".to_owned(), "/app/tools".to_owned())]); + + let overlay = OverlayConfig::new("centaur-overlay:test"); + let env = agent_env(Some(&overlay)); + assert!(env.contains(&( + "TOOL_DIRS".to_owned(), + "/app/tools:/app/overlay/org/tools".to_owned() + ))); + assert!(env.contains(&( + "CENTAUR_OVERLAY_DIR".to_owned(), + "/app/overlay/org".to_owned() + ))); + } + + #[test] + fn overlay_root_is_mount_path_parent() { + let overlay = OverlayConfig::new("img"); + assert_eq!(overlay.overlay_root(), "/app/overlay"); + + let mut shallow = OverlayConfig::new("img"); + shallow.mount_path = "/overlay".to_owned(); + assert_eq!(shallow.overlay_root(), "/overlay"); + } + + #[test] + fn tools_init_copies_base_tools_into_emptydir() { + let tools = ToolsConfig::new("centaur-api:test"); + let c = tools_init_container_json(&tools); + assert_eq!(c["name"], "tools-bootstrap"); + assert_eq!(c["image"], "centaur-api:test"); + let mount = &c["volumeMounts"][0]; + assert_eq!(mount["name"], TOOLS_VOLUME); + assert_eq!(mount["mountPath"], "/app/tools"); + } + + #[test] + fn overlay_init_stages_prompt_and_mounts_root() { + let overlay = OverlayConfig::new("centaur-overlay:test"); + let c = overlay_init_container_json(&overlay); + assert_eq!(c["name"], "overlay-bootstrap"); + let script = c["command"][2].as_str().unwrap(); + assert!(script.contains("target=\"/app/overlay/org\"")); + assert!(script.contains("services/sandbox/SYSTEM_PROMPT.md")); + assert!(script.contains("AGENTS_OVERLAY.md")); + let root_mount = &c["volumeMounts"][0]; + assert_eq!(root_mount["mountPath"], "/app/overlay"); + } + + #[test] + fn agent_mounts_tools_and_overlay_prompt() { + let overlay = OverlayConfig::new("centaur-overlay:test"); + let mounts = agent_volume_mounts_json(true, Some(&overlay)); + // base tools, overlay tree, overlay prompt + assert_eq!(mounts.len(), 3); + assert!(mounts.iter().any(|m| m["mountPath"] == "/app/tools")); + assert!(mounts.iter().any(|m| m["mountPath"] == "/app/overlay")); + let prompt = mounts + .iter() + .find(|m| m["mountPath"] == AGENT_OVERLAY_PROMPT_PATH) + .unwrap(); + assert_eq!(prompt["subPath"], "AGENTS_OVERLAY.md"); + + let mounts = agent_volume_mounts_json(true, None); + assert_eq!(mounts.len(), 1); + } +} From aa9f848564ddae038b60703deaa86c584f9fa229 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Sat, 6 Jun 2026 12:46:45 +0000 Subject: [PATCH 02/25] fix(api-rs): stage tools-bootstrap copy outside /app/tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tools-bootstrap init container mounted the tools emptyDir at /app/tools — the same path it copies FROM. The mount shadows the source image's tools tree, so the script self-copies the empty volume and GNU cp rejects it (exit 1); every sandbox dies with 'reached terminal state before running' and no agent ever starts. Mount the volume at /tools-bootstrap instead (mirroring how overlay-bootstrap stages to a distinct target) and copy the image's /app/tools into it. The agent container keeps mounting the same volume at /app/tools, so TOOL_DIRS and the shim installer are unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../centaur-sandbox-agent-k8s/src/tools.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs index 7996a556..6dd77336 100644 --- a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs +++ b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs @@ -31,6 +31,12 @@ const AGENT_UID: i64 = 1001; pub(crate) const BASE_TOOL_DIR: &str = "/app/tools"; /// emptyDir the `tools-bootstrap` init container populates from the source image. const TOOLS_VOLUME: &str = "tools-root"; +/// Staging path where `tools-bootstrap` mounts the tools emptyDir. Must differ +/// from `BASE_TOOL_DIR`: mounting the volume at `/app/tools` would shadow the +/// source image's own tools tree, so the copy would read the empty volume and +/// `cp` would reject the self-copy (exit 1, sandbox never starts). The agent +/// container mounts the same volume at `BASE_TOOL_DIR`. +const TOOLS_BOOTSTRAP_DIR: &str = "/tools-bootstrap"; /// Shared overlay-tree volume (populated by `overlay-bootstrap`). const OVERLAY_VOLUME: &str = "overlay-root"; @@ -130,7 +136,7 @@ pub(crate) fn agent_env(overlay: Option<&OverlayConfig>) -> Vec<(String, String) pub(crate) fn tools_init_container_json(tools: &ToolsConfig) -> Value { let script = format!( "src=\"{BASE_TOOL_DIR}\"\n\ - target=\"{BASE_TOOL_DIR}\"\n\ + target=\"{TOOLS_BOOTSTRAP_DIR}\"\n\ mkdir -p \"$target\"\n\ cp -R \"$src\"/. \"$target\"/", ); @@ -139,7 +145,7 @@ pub(crate) fn tools_init_container_json(tools: &ToolsConfig) -> Value { "image": tools.image, "command": ["/bin/sh", "-ec", script], "volumeMounts": [ - {"name": TOOLS_VOLUME, "mountPath": BASE_TOOL_DIR}, + {"name": TOOLS_VOLUME, "mountPath": TOOLS_BOOTSTRAP_DIR}, ], "securityContext": security_context_json(), }); @@ -270,9 +276,14 @@ mod tests { let c = tools_init_container_json(&tools); assert_eq!(c["name"], "tools-bootstrap"); assert_eq!(c["image"], "centaur-api:test"); + let script = c["command"][2].as_str().unwrap(); + assert!(script.contains("src=\"/app/tools\"")); + assert!(script.contains("target=\"/tools-bootstrap\"")); + // The staging mount must NOT shadow the source image's /app/tools — + // that would make the copy a self-copy of the empty volume. let mount = &c["volumeMounts"][0]; assert_eq!(mount["name"], TOOLS_VOLUME); - assert_eq!(mount["mountPath"], "/app/tools"); + assert_eq!(mount["mountPath"], "/tools-bootstrap"); } #[test] From 074b69af165e86cf4b515b26b5c7997e374ef71d Mon Sep 17 00:00:00 2001 From: Will Drach Date: Mon, 8 Jun 2026 11:16:14 -0600 Subject: [PATCH 03/25] fix: wire sandbox overlays without tools Gate overlay env, volumes, and mounts independently from the tools source image so overlay-only sandbox configs produce valid pod specs. --- .../centaur-sandbox-agent-k8s/src/lib.rs | 57 ++++++++++++++++--- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs index 0ba429bb..96a5bbfa 100644 --- a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs +++ b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs @@ -479,7 +479,7 @@ fn build_agent_sandbox( .iter() .map(|env| (env.name.clone(), env.value.clone())) .collect(); - if config.tools.is_some() { + if config.tools.is_some() || config.overlay.is_some() { for (name, value) in tools::agent_env(config.overlay.as_ref()) { upsert_env(&mut agent_env, &name, value); } @@ -508,15 +508,18 @@ fn build_agent_sandbox( volume_mounts.push(iron_proxy::sandbox_ca_volume_mount_json()); volumes.push(iron_proxy::sandbox_ca_volume_json(iron_proxy)); } - // Tool sources (and the overlay tree) are bootstrapped into emptyDirs by init - // containers and mounted read-only into the agent at the same paths `TOOL_DIRS` - // points at. - if config.tools.is_some() { + // Tool sources and overlay sources are bootstrapped independently into + // emptyDirs by init containers and mounted read-only into the agent at the + // same paths `TOOL_DIRS` points at. + if config.tools.is_some() || config.overlay.is_some() { volume_mounts.extend(tools::agent_volume_mounts_json( - true, + config.tools.is_some(), config.overlay.as_ref(), )); - volumes.extend(tools::volumes_json(true, config.overlay.is_some())); + volumes.extend(tools::volumes_json( + config.tools.is_some(), + config.overlay.is_some(), + )); } insert_optional( &mut container, @@ -739,6 +742,46 @@ mod tests { assert!(container.resources.as_ref().unwrap().limits.is_some()); } + #[test] + fn builds_agent_sandbox_spec_with_overlay_without_tools() { + let spec = SandboxSpec::new("centaur-agent:latest"); + let config = AgentSandboxConfig::new("centaur").overlay(OverlayConfig::new("overlay:test")); + + let sandbox = build_agent_sandbox(&SandboxId::new("asbx-test"), &spec, &config).unwrap(); + let pod_spec = &sandbox.spec.pod_template.spec; + let container = &pod_spec.containers[0]; + + let env = container.env.as_ref().unwrap(); + assert!(env.iter().any(|env| { + env.name == "TOOL_DIRS" + && env.value.as_deref() == Some("/app/tools:/app/overlay/org/tools") + })); + assert!(env.iter().any(|env| { + env.name == "CENTAUR_OVERLAY_DIR" && env.value.as_deref() == Some("/app/overlay/org") + })); + + let volumes = pod_spec.volumes.as_ref().unwrap(); + assert!( + volumes + .iter() + .any(|volume| { volume.name == "overlay-root" && volume.empty_dir.is_some() }) + ); + assert!( + volumes + .iter() + .any(|volume| { volume.name == "overlay-prompt" && volume.empty_dir.is_some() }) + ); + assert!(!volumes.iter().any(|volume| volume.name == "tools-root")); + + let mounts = container.volume_mounts.as_ref().unwrap(); + assert!(mounts.iter().any(|mount| mount.name == "overlay-root")); + assert!(mounts.iter().any(|mount| mount.name == "overlay-prompt")); + + let init_containers = pod_spec.init_containers.as_ref().unwrap(); + assert_eq!(init_containers.len(), 1); + assert_eq!(init_containers[0].name, "overlay-bootstrap"); + } + #[test] fn maps_agent_sandbox_replicas_and_pod_readiness_to_status() { let ready_pod = pod_with_phase_and_ready("Running", true); From 96c247fb320d369799649518345a444a68ac64ba Mon Sep 17 00:00:00 2001 From: Will Drach Date: Mon, 8 Jun 2026 11:17:37 -0600 Subject: [PATCH 04/25] fix: make sandbox bootstrap volumes writable Set an fsGroup on sandbox pods that use tools or overlays so non-root bootstrap init containers can populate their emptyDir mounts. --- .../centaur-sandbox-agent-k8s/src/lib.rs | 19 +++++++++++++++++++ .../centaur-sandbox-agent-k8s/src/tools.rs | 7 +++++++ 2 files changed, 26 insertions(+) diff --git a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs index 96a5bbfa..3284f328 100644 --- a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs +++ b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/lib.rs @@ -542,6 +542,9 @@ fn build_agent_sandbox( "restartPolicy": "Never", "automountServiceAccountToken": false, }); + if config.tools.is_some() || config.overlay.is_some() { + pod_spec["securityContext"] = tools::pod_security_context_json(); + } insert_optional( &mut pod_spec, "initContainers", @@ -782,6 +785,22 @@ mod tests { assert_eq!(init_containers[0].name, "overlay-bootstrap"); } + #[test] + fn bootstrap_empty_dirs_are_writable_by_agent_uid() { + let spec = SandboxSpec::new("centaur-agent:latest"); + let config = AgentSandboxConfig::new("centaur").tools(ToolsConfig::new("api:test")); + + let sandbox = build_agent_sandbox(&SandboxId::new("asbx-test"), &spec, &config).unwrap(); + let pod_spec = &sandbox.spec.pod_template.spec; + + let security_context = pod_spec.security_context.as_ref().unwrap(); + assert_eq!(security_context.fs_group, Some(1001)); + assert_eq!( + security_context.fs_group_change_policy.as_deref(), + Some("OnRootMismatch") + ); + } + #[test] fn maps_agent_sandbox_replicas_and_pod_readiness_to_status() { let ready_pod = pod_with_phase_and_ready("Running", true); diff --git a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs index 6dd77336..dcee8ede 100644 --- a/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs +++ b/services/api-rs/crates/centaur-sandbox-agent-k8s/src/tools.rs @@ -112,6 +112,13 @@ fn security_context_json() -> Value { }) } +pub(crate) fn pod_security_context_json() -> Value { + json!({ + "fsGroup": AGENT_UID, + "fsGroupChangePolicy": "OnRootMismatch", + }) +} + /// `TOOL_DIRS` for the agent: base tools plus the overlay's tools when present. /// Matches the value the api-rs Deployment computes for its own `TOOL_DIRS`. pub(crate) fn agent_tool_dirs(overlay: Option<&OverlayConfig>) -> String { From 615ba221e88c755b4d7d441db3514c7862dbac59 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Mon, 8 Jun 2026 12:43:15 -0600 Subject: [PATCH 05/25] fix(api-rs): source sandbox tools image from the api-rs image The tools-bootstrap init container copied /app/tools from .Values.api.image (centaur-api), but api-rs discovers its tools from /app/tools in its own container (.Values.apiRs.image). Sourcing from a different image risked the agent installing a different tool set than api-rs granted per-sandbox creds for. Source from the same api-rs image the Deployment runs so the two match by construction. Co-Authored-By: Claude Opus 4.8 (1M context) --- contrib/chart/templates/apirs.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/contrib/chart/templates/apirs.yaml b/contrib/chart/templates/apirs.yaml index 99fd6686..75340621 100644 --- a/contrib/chart/templates/apirs.yaml +++ b/contrib/chart/templates/apirs.yaml @@ -206,13 +206,15 @@ spec: key: {{ printf "%sIRON_CONTROL_INITIAL_API_KEY" .Values.secretManager.envPrefix }} {{- end }} {{- if .Values.toolServer.enabled }} - # Base tools source image (the shared api image). A tools-bootstrap - # init container copies its /app/tools into each sandbox so the agent - # installs the same tool CLI shims api-rs discovers, at the same path. + # Base tools source image: the same api-rs image this Deployment runs. + # api-rs discovers its tools from /app/tools in its own container, so + # sourcing the tools-bootstrap copy from the identical image guarantees + # each sandbox installs the exact tool set api-rs granted creds for — + # any other image risks a silent /app/tools content drift. - name: KUBERNETES_TOOL_SERVER_IMAGE - value: {{ printf "%s:%s" .Values.api.image.repository .Values.api.image.tag | quote }} + value: {{ printf "%s:%s" .Values.apiRs.image.repository .Values.apiRs.image.tag | quote }} - name: KUBERNETES_TOOL_SERVER_IMAGE_PULL_POLICY - value: {{ .Values.api.image.pullPolicy | quote }} + value: {{ .Values.apiRs.image.pullPolicy | quote }} {{- end }} {{- if .Values.overlay.image.repository }} # Org overlay (tools/workflows/skills/system-prompt) mounted into From 72f322c121c738c3d9912b37baa392d31181d1b9 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Wed, 3 Jun 2026 16:43:10 -0600 Subject: [PATCH 06/25] feat(discordbot): Discord chat ingress as a clone of slackbotv2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebased onto api-rs-control-plane. discordbot is a direct clone of the current slackbotv2 — its own inlined session-api (no shared bridge package, to avoid coupling to slackbotv2's churn) — pointing at the api-rs session control plane, including the upstream handoff-before-ack flow, retryable SessionApiError classification, execute idempotency keys, client_message_id append dedupe, and render obligations with on-start recovery. Discord-specific pieces: createDiscordAdapter, a long-lived Gateway controller, fail-closed guild allowlist + DM-deny, native-thread naming, and a typing-indicator keepalive in place of Slack status/title. Where slackbotv2 answers 503 to request a Slack webhook retry, the Gateway has no re-delivery, so handoff failures render in-thread. Carries the prior P1 fixes (concurrency:'drop', generic api-rs error messages). 21 unit tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- .npmrc | 4 + pnpm-lock.yaml | 244 +++++- pnpm-workspace.yaml | 1 + services/discordbot/README.md | 89 ++ services/discordbot/package.json | 29 + services/discordbot/src/discord-allowlist.ts | 78 ++ services/discordbot/src/discord-threading.ts | 76 ++ services/discordbot/src/gateway.ts | 81 ++ services/discordbot/src/index.ts | 761 ++++++++++++++++++ services/discordbot/src/server.ts | 115 +++ services/discordbot/src/session-api.ts | 666 +++++++++++++++ services/discordbot/src/types.ts | 166 ++++ services/discordbot/src/utils.ts | 61 ++ .../discordbot/test/discord-allowlist.test.ts | 141 ++++ .../discordbot/test/discord-threading.test.ts | 108 +++ services/discordbot/test/gateway.test.ts | 80 ++ services/discordbot/tsconfig.json | 24 + 17 files changed, 2722 insertions(+), 2 deletions(-) create mode 100644 .npmrc create mode 100644 services/discordbot/README.md create mode 100644 services/discordbot/package.json create mode 100644 services/discordbot/src/discord-allowlist.ts create mode 100644 services/discordbot/src/discord-threading.ts create mode 100644 services/discordbot/src/gateway.ts create mode 100644 services/discordbot/src/index.ts create mode 100644 services/discordbot/src/server.ts create mode 100644 services/discordbot/src/session-api.ts create mode 100644 services/discordbot/src/types.ts create mode 100644 services/discordbot/src/utils.ts create mode 100644 services/discordbot/test/discord-allowlist.test.ts create mode 100644 services/discordbot/test/discord-threading.test.ts create mode 100644 services/discordbot/test/gateway.test.ts create mode 100644 services/discordbot/tsconfig.json diff --git a/.npmrc b/.npmrc new file mode 100644 index 00000000..1d90af05 --- /dev/null +++ b/.npmrc @@ -0,0 +1,4 @@ +# Hoist tslib to the root node_modules. discord.js (used by services/discordbot) ships its +# source and requires tslib; with pnpm strict layout, Bun cannot resolve the nested .pnpm symlink +# ("Cannot find package 'tslib'"). Public-hoisting makes it resolvable by walking up to root. +public-hoist-pattern[]=tslib diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9271da46..817b9a00 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -55,6 +55,49 @@ importers: specifier: 5.9.3 version: 5.9.3 + services/discordbot: + dependencies: + '@centaur/harness-events': + specifier: workspace:* + version: link:../../packages/harness-events + '@centaur/rendering': + specifier: workspace:* + version: link:../../packages/rendering + '@chat-adapter/discord': + specifier: ^4.30.0 + version: 4.30.0(zod@4.4.3) + '@chat-adapter/state-pg': + specifier: ^4.30.0 + version: 4.30.0(zod@4.4.3) + chat: + specifier: ^4.30.0 + version: 4.30.0(zod@4.4.3) + discord.js: + specifier: ^14.25.1 + version: 14.26.4 + hono: + specifier: ^4.12.18 + version: 4.12.19 + tslib: + specifier: ^2.8.1 + version: 2.8.1 + devDependencies: + '@chat-adapter/state-memory': + specifier: ^4.30.0 + version: 4.30.0(zod@4.4.3) + '@types/bun': + specifier: ^1.3.13 + version: 1.3.14 + '@types/node': + specifier: ^25.7.0 + version: 25.9.0 + '@typescript/native-preview': + specifier: ^7.0.0-dev.20260512.1 + version: 7.0.0-dev.20260518.1 + typescript: + specifier: ^6.0.3 + version: 6.0.3 + services/slackbot: dependencies: '@opentelemetry/api': @@ -170,6 +213,10 @@ importers: packages: + '@chat-adapter/discord@4.30.0': + resolution: {integrity: sha512-hWMwCwVgMeQWB5F2JL91GLTWcF8rlE4eewzXzfPSoAsM7Y71yBDCfQ9QfL5VODwPKtYnVPU5Go2pGsuauaZ/yw==} + engines: {node: '>=20'} + '@chat-adapter/shared@4.30.0': resolution: {integrity: sha512-IuYtbn/p1FBXvp7JYGEMLCt07GHOMlyjx7OlZXPJwLTravcyJuP7Q6N31r6c1yubMhM8PLb8eT8l/YnjwYjs9Q==} engines: {node: '>=20'} @@ -186,6 +233,34 @@ packages: resolution: {integrity: sha512-8qymxX34Fg7B0PJCoYi60Bck68Gnd4cW4U8hgvJBc5ZHm8K4XT4srV8G3AeDT7rVSmrW5diidcmROo8Z1ke6iw==} engines: {node: '>=20'} + '@discordjs/builders@1.14.1': + resolution: {integrity: sha512-gSKkhXLqs96TCzk66VZuHHl8z2bQMJFGwrXC0f33ngK+FLNau4hU1PYny3DNJfNdSH+gVMzE85/d5FQ2BpcNwQ==} + engines: {node: '>=16.11.0'} + + '@discordjs/collection@1.5.3': + resolution: {integrity: sha512-SVb428OMd3WO1paV3rm6tSjM4wC+Kecaa1EUGX7vc6/fddvw/6lg90z4QtCqm21zvVe92vMMDt9+DkIvjXImQQ==} + engines: {node: '>=16.11.0'} + + '@discordjs/collection@2.1.1': + resolution: {integrity: sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg==} + engines: {node: '>=18'} + + '@discordjs/formatters@0.6.2': + resolution: {integrity: sha512-y4UPwWhH6vChKRkGdMB4odasUbHOUwy7KL+OVwF86PvT6QVOwElx+TiI1/6kcmcEe+g5YRXJFiXSXUdabqZOvQ==} + engines: {node: '>=16.11.0'} + + '@discordjs/rest@2.6.1': + resolution: {integrity: sha512-wwQdgjeaoYFiaG+atbqx6aJDpqW7JHAo0HrQkBTbYzM3/PJ3GweQIpgElNcGZ26DCUOXMyawYd0YF7vtr+fZXg==} + engines: {node: '>=18'} + + '@discordjs/util@1.2.0': + resolution: {integrity: sha512-3LKP7F2+atl9vJFhaBjn4nOaSWahZ/yWjOvA4e5pnXkt2qyXRCHLxoBQy81GFtLGCq7K9lPm9R517M1U+/90Qg==} + engines: {node: '>=18'} + + '@discordjs/ws@1.2.3': + resolution: {integrity: sha512-wPlQDxEmlDg5IxhJPuxXr3Vy9AjYq5xCvFWGJyD7w7Np8ZGu+Mc+97LCoEc/+AYCo2IDpKioiH0/c/mj5ZR9Uw==} + engines: {node: '>=16.11.0'} + '@emnapi/core@1.8.1': resolution: {integrity: sha512-AvT9QFpxK0Zd8J0jopedNm+w/2fIzvtPKPjqyw9jwvBaReTTqPBk9Hixaz7KbjimP+QNz605/XnjFcDAL2pqBg==} @@ -639,6 +714,22 @@ packages: '@rolldown/pluginutils@1.0.0-rc.9': resolution: {integrity: sha512-w6oiRWgEBl04QkFZgmW+jnU1EC9b57Oihi2ot3HNWIQRqgHp5PnYDia5iZ5FF7rpa4EQdiqMDXjlqKGXBhsoXw==} + '@sapphire/async-queue@1.5.5': + resolution: {integrity: sha512-cvGzxbba6sav2zZkH8GPf2oGk9yYoD5qrNWdu9fRehifgnFZJMV+nuy2nON2roRO4yQQ+v7MK/Pktl/HgfsUXg==} + engines: {node: '>=v14.0.0', npm: '>=7.0.0'} + + '@sapphire/shapeshift@4.0.0': + resolution: {integrity: sha512-d9dUmWVA7MMiKobL3VpLF8P2aeanRTu6ypG2OIaEv/ZHH/SUQ2iHOVyi5wAPjQ+HmnMuL0whK9ez8I/raWbtIg==} + engines: {node: '>=v16'} + + '@sapphire/snowflake@3.5.3': + resolution: {integrity: sha512-jjmJywLAFoWeBi1W7994zZyiNWPIiqRRNAmSERxyg93xRGzNYvGjlZ0gR6x0F4gPRi2+0O6S71kOZYyr3cxaIQ==} + engines: {node: '>=v14.0.0', npm: '>=7.0.0'} + + '@sapphire/snowflake@3.5.5': + resolution: {integrity: sha512-xzvBr1Q1c4lCe7i6sRnrofxeO1QTP/LKQ6A6qy0iB4x5yfiSfARMEQEghojzTNALDTcv8En04qYNIco9/K9eZQ==} + engines: {node: '>=v14.0.0', npm: '>=7.0.0'} + '@slack/logger@4.0.1': resolution: {integrity: sha512-6cmdPrV/RYfd2U0mDGiMK8S7OJqpCTm7enMLRR3edccsPX8j7zXTLnaEF4fhxxJJTAIOil6+qZrnUPTuaLvwrQ==} engines: {node: '>= 18', npm: '>= 8.6.0'} @@ -780,6 +871,10 @@ packages: '@vitest/utils@4.1.0': resolution: {integrity: sha512-XfPXT6a8TZY3dcGY8EdwsBulFCIw+BeeX0RZn2x/BtiY/75YGh8FeWGG8QISN/WhaqSrE2OrlDgtF8q5uhOTmw==} + '@vladfrangu/async_event_emitter@2.4.7': + resolution: {integrity: sha512-Xfe6rpCTxSxfbswi/W/Pz7zp1WWSNn4A0eW4mLkQUewCrXXtMj31lCg+iQyTkh/CkusZSq9eDflu7tjEDXUY6g==} + engines: {node: '>=v14.0.0', npm: '>=7.0.0'} + '@workflow/serde@4.1.0-beta.2': resolution: {integrity: sha512-8kkeoQKLDaKXefjV5dbhBj2aErfKp1Mc4pb6tj8144cF+Em5SPbyMbyLCHp+BVrFfFVCBluCtMx+jjvaFVZGww==} @@ -870,6 +965,20 @@ packages: devlop@1.1.0: resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==} + discord-api-types@0.37.120: + resolution: {integrity: sha512-7xpNK0EiWjjDFp2nAhHXezE4OUWm7s1zhc/UXXN6hnFFU8dfoPHgV0Hx0RPiCa3ILRpdeh152icc68DGCyXYIw==} + + discord-api-types@0.38.48: + resolution: {integrity: sha512-WFUE/2o0lBlLeCQonQ+Pu2RqHAqbytBJ2RlXR91gzk05InSS6k9ShzzLYoymrA4c2oRgRKGE7/VqQJNNdGWSxQ==} + + discord-interactions@4.4.0: + resolution: {integrity: sha512-jjJx8iwAeJcj8oEauV43fue9lNqkf38fy60aSs2+G8D1nJmDxUIrk08o3h0F3wgwuBWWJUZO+X/VgfXsxpCiJA==} + engines: {node: '>=18.4.0'} + + discord.js@14.26.4: + resolution: {integrity: sha512-4oBp8tc6Kf8IDBwAHhbsMaAqx1b5fob9SNasZT7V6yyyUydoO5i5fGuX7TmvRtR+q/WgKRnRViRoAWnG7fNyvA==} + engines: {node: '>=18'} + dunder-proto@1.0.1: resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} engines: {node: '>= 0.4'} @@ -921,6 +1030,9 @@ packages: extend@3.0.2: resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} + fast-deep-equal@3.1.3: + resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} + fdir@6.5.0: resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} engines: {node: '>=12.0.0'} @@ -1077,9 +1189,18 @@ packages: resolution: {integrity: sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==} engines: {node: '>= 12.0.0'} + lodash.snakecase@4.1.1: + resolution: {integrity: sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw==} + + lodash@4.18.1: + resolution: {integrity: sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==} + longest-streak@3.1.0: resolution: {integrity: sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==} + magic-bytes.js@1.13.0: + resolution: {integrity: sha512-afO2mnxW7GDTXMm5/AoN1WuOcdoKhtgXjIvHmobqTD1grNplhGdv3PFOyjCVmrnOZBIT/gD/koDKpYG+0mvHcg==} + magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} @@ -1397,6 +1518,9 @@ packages: trough@2.2.0: resolution: {integrity: sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==} + ts-mixer@6.0.4: + resolution: {integrity: sha512-ufKpbmrugz5Aou4wcr5Wc1UUFWOLhq+Fm6qa6P0w0K5Qw2yhaUoiWszhCVuNQyNwrlGiscHOmqYoAox1PtvgjA==} + tslib@2.8.1: resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} @@ -1413,6 +1537,10 @@ packages: undici-types@7.24.6: resolution: {integrity: sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==} + undici@6.24.1: + resolution: {integrity: sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA==} + engines: {node: '>=18.17'} + unified@11.0.5: resolution: {integrity: sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==} @@ -1546,6 +1674,20 @@ packages: snapshots: + '@chat-adapter/discord@4.30.0(zod@4.4.3)': + dependencies: + '@chat-adapter/shared': 4.30.0(zod@4.4.3) + chat: 4.30.0(zod@4.4.3) + discord-api-types: 0.37.120 + discord-interactions: 4.4.0 + discord.js: 14.26.4 + transitivePeerDependencies: + - ai + - bufferutil + - supports-color + - utf-8-validate + - zod + '@chat-adapter/shared@4.30.0(zod@4.4.3)': dependencies: chat: 4.30.0(zod@4.4.3) @@ -1586,6 +1728,55 @@ snapshots: - supports-color - zod + '@discordjs/builders@1.14.1': + dependencies: + '@discordjs/formatters': 0.6.2 + '@discordjs/util': 1.2.0 + '@sapphire/shapeshift': 4.0.0 + discord-api-types: 0.38.48 + fast-deep-equal: 3.1.3 + ts-mixer: 6.0.4 + tslib: 2.8.1 + + '@discordjs/collection@1.5.3': {} + + '@discordjs/collection@2.1.1': {} + + '@discordjs/formatters@0.6.2': + dependencies: + discord-api-types: 0.38.48 + + '@discordjs/rest@2.6.1': + dependencies: + '@discordjs/collection': 2.1.1 + '@discordjs/util': 1.2.0 + '@sapphire/async-queue': 1.5.5 + '@sapphire/snowflake': 3.5.5 + '@vladfrangu/async_event_emitter': 2.4.7 + discord-api-types: 0.38.48 + magic-bytes.js: 1.13.0 + tslib: 2.8.1 + undici: 6.24.1 + + '@discordjs/util@1.2.0': + dependencies: + discord-api-types: 0.38.48 + + '@discordjs/ws@1.2.3': + dependencies: + '@discordjs/collection': 2.1.1 + '@discordjs/rest': 2.6.1 + '@discordjs/util': 1.2.0 + '@sapphire/async-queue': 1.5.5 + '@types/ws': 8.18.1 + '@vladfrangu/async_event_emitter': 2.4.7 + discord-api-types: 0.38.48 + tslib: 2.8.1 + ws: 8.21.0 + transitivePeerDependencies: + - bufferutil + - utf-8-validate + '@emnapi/core@1.8.1': dependencies: '@emnapi/wasi-threads': 1.1.0 @@ -1878,6 +2069,17 @@ snapshots: '@rolldown/pluginutils@1.0.0-rc.9': {} + '@sapphire/async-queue@1.5.5': {} + + '@sapphire/shapeshift@4.0.0': + dependencies: + fast-deep-equal: 3.1.3 + lodash: 4.18.1 + + '@sapphire/snowflake@3.5.3': {} + + '@sapphire/snowflake@3.5.5': {} + '@slack/logger@4.0.1': dependencies: '@types/node': 25.9.0 @@ -2040,6 +2242,8 @@ snapshots: convert-source-map: 2.0.0 tinyrainbow: 3.1.0 + '@vladfrangu/async_event_emitter@2.4.7': {} + '@workflow/serde@4.1.0-beta.2': {} agent-base@6.0.2: @@ -2127,6 +2331,31 @@ snapshots: dependencies: dequal: 2.0.3 + discord-api-types@0.37.120: {} + + discord-api-types@0.38.48: {} + + discord-interactions@4.4.0: {} + + discord.js@14.26.4: + dependencies: + '@discordjs/builders': 1.14.1 + '@discordjs/collection': 1.5.3 + '@discordjs/formatters': 0.6.2 + '@discordjs/rest': 2.6.1 + '@discordjs/util': 1.2.0 + '@discordjs/ws': 1.2.3 + '@sapphire/snowflake': 3.5.3 + discord-api-types: 0.38.48 + fast-deep-equal: 3.1.3 + lodash.snakecase: 4.1.1 + magic-bytes.js: 1.13.0 + tslib: 2.8.1 + undici: 6.24.1 + transitivePeerDependencies: + - bufferutil + - utf-8-validate + dunder-proto@1.0.1: dependencies: call-bind-apply-helpers: 1.0.2 @@ -2175,6 +2404,8 @@ snapshots: extend@3.0.2: {} + fast-deep-equal@3.1.3: {} + fdir@6.5.0(picomatch@4.0.3): optionalDependencies: picomatch: 4.0.3 @@ -2293,8 +2524,14 @@ snapshots: lightningcss-win32-arm64-msvc: 1.32.0 lightningcss-win32-x64-msvc: 1.32.0 + lodash.snakecase@4.1.1: {} + + lodash@4.18.1: {} + longest-streak@3.1.0: {} + magic-bytes.js@1.13.0: {} + magic-string@0.30.21: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 @@ -2817,8 +3054,9 @@ snapshots: trough@2.2.0: {} - tslib@2.8.1: - optional: true + ts-mixer@6.0.4: {} + + tslib@2.8.1: {} typescript@5.9.3: {} @@ -2826,6 +3064,8 @@ snapshots: undici-types@7.24.6: {} + undici@6.24.1: {} + unified@11.0.5: dependencies: '@types/unist': 3.0.3 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 8fb502e0..03fab927 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -2,3 +2,4 @@ packages: - 'packages/*' - 'services/slackbot' - 'services/slackbotv2' + - 'services/discordbot' diff --git a/services/discordbot/README.md b/services/discordbot/README.md new file mode 100644 index 00000000..595c7823 --- /dev/null +++ b/services/discordbot/README.md @@ -0,0 +1,89 @@ +# discordbot + +Discord chat ingress for the Centaur agent. Mirrors `slackbotv2` (streamed, session-backed +replies to `@`-mentions) using Vercel's Chat SDK Discord adapter. Shares the platform-agnostic +session logic with Slack via `@centaur/chat-session-bridge`; the Rust `api-rs` control plane is +unchanged (`discord:…` thread keys flow through identically). + +## Behavior + +- **`@`-mention in a channel** → the adapter creates a **public thread from that message**, the + bot streams the answer inside it, and the thread is renamed to the message text. The session is + keyed by the new thread (`discord:{guild}:{channel}:{threadId}`). +- **`@`-mention inside an existing thread** → the bot answers in that thread. +- **Follow-ups inside an active thread** append to the same session without a re-mention. + +## Ingress model + +Discord delivers normal messages over a **Gateway WebSocket** (outbound), not HTTP webhooks. The +bot opens a single long-lived Gateway connection in "direct mode" (`startGatewayListener` with a +large duration; discord.js maintains the session with native RESUME). There is **no public +ingress** — only a `GET /health` endpoint that reflects the Gateway connection state. + +> ⚠️ **Run exactly one replica.** Two pods on the same bot token open two Gateway sessions and +> every message is handled twice. Deploy with `replicas: 1` + `strategy: Recreate`, never autoscale. + +> ⚠️ **Do not proxy the Gateway.** discord.js ignores `HTTPS_PROXY` for the WebSocket. Give the pod +> direct `:443` egress to Discord and exclude Discord hosts via `NO_PROXY`. + +## Environment + +| Var | Required | Notes | +|-----|----------|-------| +| `DISCORD_BOT_TOKEN` | ✅ | Bot token (account-level credential — keep secret). | +| `DISCORD_PUBLIC_KEY` | ✅ | Ed25519 public key (used by the adapter for any HTTP interactions). | +| `DISCORD_APPLICATION_ID` | ✅ | Doubles as the bot user id for mention detection. | +| `DISCORDBOT_GUILD_ALLOWLIST` | ✅ to do anything | Comma/space-separated guild IDs. **Fail-closed: empty ⇒ the bot ignores all messages.** | +| `DISCORDBOT_API_KEY` | – | Bearer to api-rs (falls back to `CENTAUR_API_KEY`). Use a dedicated key, not the Slack one. | +| `CENTAUR_API_URL` | – | api-rs base URL (default `http://127.0.0.1:8080`). | +| `DISCORDBOT_DATABASE_URL` / `DATABASE_URL` / `POSTGRES_URL` | – | Thread-state store. | +| `DISCORD_MENTION_ROLE_IDS` | – | Role mentions that also trigger the bot. | +| `DISCORDBOT_NAME_THREADS` | – | Set `false` to keep the adapter's generic thread names. | +| `DISCORD_API_URL` | – | Override Discord API base. | +| `PORT` | – | Health server port (default 3001). | +| `SESSION_IDLE_TIMEOUT_MS` / `SESSION_MAX_DURATION_MS` | – | Forwarded to api-rs execute. | + +DMs are denied unconditionally (DM intents are not requested). + +## Discord application setup + +1. **Create the application** at . Note the + **Application ID** and **Public Key** (General Information). +2. **Bot** tab → reveal/reset the **token** (`DISCORD_BOT_TOKEN`). +3. **Bot → Privileged Gateway Intents** → enable **Message Content Intent**. Without it, + non-mention messages arrive with empty content and follow-ups break. (Bots in 100+ servers must + apply for it; below that it's a toggle.) +4. **Invite the bot** (OAuth2 → URL Generator) with scope `bot` and permissions: + _View Channels_, _Send Messages_, _Send Messages in Threads_, **Create Public Threads**, + _Embed Links_, _Read Message History_. +5. Set `DISCORDBOT_GUILD_ALLOWLIST` to the server(s) you invited it to — the bot is **inert** until + this is set. + +## Runtime assumptions (validated 2026-06-02) + +A throwaway spike confirmed the three things the static build couldn't prove: discord.js's Gateway +runs under Bun, a Gateway `MESSAGE_CREATE` dispatches in-process to `chat.onNewMention`, and a +channel mention auto-creates a thread that the bot streams into. An `@`-mention produced a threaded +reply end-to-end. The spike has served its purpose and been removed. + +## Develop / test + +```bash +bun run check:types # tsgo +bun test test # allowlist, threading, gateway controller (no Discord needed) +bun run dev # run the server locally (needs env above) +``` + +## Known limitations + +- The Gateway listener can't expose the precise close code on a fatal end; an unexpected + disconnect exits the process so Kubernetes restarts it (CrashLoopBackOff surfaces bad + token/intents). `/health` liveness is "listener still running", not a deep socket probe. +- Concurrency is `'drop'`: the per-thread lock serializes handling so two near-simultaneous mentions + can't double-execute. The tradeoff is that a follow-up sent *while a stream is still running* is + dropped rather than appended mid-stream; send it again once the reply finishes. +- Thread renaming is best-effort and applies on the first execution; a first mention inside a + user-created thread will rename that thread (set `DISCORDBOT_NAME_THREADS=false` to disable). +- A Gateway RESUME that replays a channel mention before state commits could, in rare cases, let + the adapter create a second thread (the dedup guards execution, but thread creation happens + inside the adapter). See the plan's invariant #2. diff --git a/services/discordbot/package.json b/services/discordbot/package.json new file mode 100644 index 00000000..91c5f4c5 --- /dev/null +++ b/services/discordbot/package.json @@ -0,0 +1,29 @@ +{ + "name": "discordbot", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "bun --hot --watch src/server.ts", + "start": "bun src/server.ts", + "test": "bun test test", + "check:types": "bun tsgo --project tsconfig.json --noEmit" + }, + "dependencies": { + "@centaur/harness-events": "workspace:*", + "@centaur/rendering": "workspace:*", + "@chat-adapter/discord": "^4.30.0", + "@chat-adapter/state-pg": "^4.30.0", + "chat": "^4.30.0", + "discord.js": "^14.25.1", + "hono": "^4.12.18", + "tslib": "^2.8.1" + }, + "devDependencies": { + "@chat-adapter/state-memory": "^4.30.0", + "@types/bun": "^1.3.13", + "@types/node": "^25.7.0", + "@typescript/native-preview": "^7.0.0-dev.20260512.1", + "typescript": "^6.0.3" + } +} diff --git a/services/discordbot/src/discord-allowlist.ts b/services/discordbot/src/discord-allowlist.ts new file mode 100644 index 00000000..9d5cec1e --- /dev/null +++ b/services/discordbot/src/discord-allowlist.ts @@ -0,0 +1,78 @@ +import type { Logger, Message } from "chat"; +import type { DiscordbotOptions } from "./types"; + +/** + * Decode a Discord thread key `discord:{guildId}:{channelId}[:{threadId}]` into parts. + * Returns an empty object if the id is not a Discord thread key. + */ +export function parseDiscordThreadKey(threadKey: string): { + guildId?: string; + channelId?: string; + threadId?: string; +} { + const parts = threadKey.split(":"); + if (parts[0] !== "discord") return {}; + return { guildId: parts[1], channelId: parts[2], threadId: parts[3] }; +} + +/** + * Authorization gate for inbound Discord messages. + * + * Unlike the Slack allowlist (which is fail-open), this is intentionally **fail-closed**: + * the api-rs control plane has no ingress auth, so this guard is the primary authorization + * boundary. Direct messages are denied outright, and an empty/unset guild allowlist means the + * bot is inert until configured. + */ +export function isAllowedDiscordMessage( + message: Message, + options: DiscordbotOptions, + logger: Logger, +): boolean { + if (message.author.isBot === true || message.author.isMe === true) { + return false; + } + + const { guildId } = parseDiscordThreadKey(message.threadId); + if (!guildId || guildId === "@me") { + logger.warn("discordbot_message_ignored_dm", { + message_id: message.id, + thread_id: message.threadId, + }); + return false; + } + + const allowlist = + options.guildAllowlist ?? + splitEnvList(process.env.DISCORDBOT_GUILD_ALLOWLIST); + if (allowlist.length === 0) { + logger.warn("discordbot_message_ignored_allowlist_empty", { + message_id: message.id, + guild_id: guildId, + }); + return false; + } + if (!new Set(allowlist).has(guildId)) { + logger.warn("discordbot_message_ignored_guild_not_allowlisted", { + message_id: message.id, + guild_id: guildId, + }); + return false; + } + + return true; +} + +/** True when the bot has no guild allowlist configured and will ignore every message. */ +export function isGuildAllowlistEmpty(options: DiscordbotOptions): boolean { + const allowlist = + options.guildAllowlist ?? + splitEnvList(process.env.DISCORDBOT_GUILD_ALLOWLIST); + return allowlist.length === 0; +} + +function splitEnvList(value: string | undefined): string[] { + return (value ?? "") + .split(/[\s,]+/) + .map((part) => part.trim()) + .filter(Boolean); +} diff --git a/services/discordbot/src/discord-threading.ts b/services/discordbot/src/discord-threading.ts new file mode 100644 index 00000000..71dfc0b1 --- /dev/null +++ b/services/discordbot/src/discord-threading.ts @@ -0,0 +1,76 @@ +import type { Logger } from "chat"; +import { parseDiscordThreadKey } from "./discord-allowlist"; +import type { DiscordbotOptions } from "./types"; + +const DISCORD_THREAD_NAME_LIMIT = 100; +const DEFAULT_DISCORD_API_URL = "https://discord.com/api/v10"; + +/** + * Derive a Discord thread name from the triggering message text. The `@chat-adapter/discord` + * adapter auto-creates a thread on a channel mention but names it generically + * (`Thread `); this reproduces the Slack "assistant title" feel by naming the thread + * after what the user actually asked. + */ +export function deriveThreadName(text: string, userName = "centaur"): string { + const mentionless = text + .replace(/<@!?\d+>/g, "") // user mentions <@123> / <@!123> + .replace(/<@&\d+>/g, "") // role mentions <@&123> + .replace( + new RegExp(`^\\s*@?${escapeRegExp(userName)}\\b[:,]?\\s*`, "i"), + "", + ) + .trim(); + return clipOneLine(mentionless || "Centaur task", DISCORD_THREAD_NAME_LIMIT); +} + +/** + * Best-effort rename of the thread the session lives in. No-ops when the key carries no thread + * segment (i.e. the message was not threaded). Failures are swallowed — naming is cosmetic and + * must never block streaming. + */ +export async function renameThreadFromMessage( + options: DiscordbotOptions, + threadKey: string, + name: string, + logger: Logger, +): Promise { + const { threadId } = parseDiscordThreadKey(threadKey); + if (!threadId) return; + + const fetchFn = options.fetch ?? fetch; + const apiBase = (options.discordApiUrl ?? DEFAULT_DISCORD_API_URL).replace( + /\/$/, + "", + ); + try { + const response = await fetchFn(`${apiBase}/channels/${threadId}`, { + method: "PATCH", + headers: { + authorization: `Bot ${options.botToken}`, + "content-type": "application/json", + }, + body: JSON.stringify({ name }), + }); + if (!response.ok) { + logger.warn("discordbot_thread_rename_failed", { + status: response.status, + thread_id: threadId, + }); + } + } catch (error) { + logger.warn("discordbot_thread_rename_error", { + error: error instanceof Error ? error.message : String(error), + thread_id: threadId, + }); + } +} + +function clipOneLine(value: string, max: number): string { + const oneLine = value.replace(/\s+/g, " ").trim(); + if (oneLine.length <= max) return oneLine; + return `${oneLine.slice(0, Math.max(0, max - 1)).trimEnd()}…`; +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} diff --git a/services/discordbot/src/gateway.ts b/services/discordbot/src/gateway.ts new file mode 100644 index 00000000..0bf77c85 --- /dev/null +++ b/services/discordbot/src/gateway.ts @@ -0,0 +1,81 @@ +import type { Chat, Logger } from "chat"; +import type { GatewayCapableAdapter } from "./types"; + +/** + * One year. `startGatewayListener` treats `durationMs` as a self-destruct timer; within that + * window discord.js maintains a single Gateway session with native RESUME, so a very large value + * gives us one long-lived connection rather than a re-IDENTIFY loop (which would burn the + * 1000/24h IDENTIFY budget). If the connection ends before this elapses it's a fatal/login error + * and we let the process exit so Kubernetes restarts the pod. + */ +const LONG_RUNNING_MS = 365 * 24 * 60 * 60 * 1000; + +export type GatewayController = { + /** True once the listener has started and the connection has not ended. */ + isActive(): boolean; + /** Initialize the chat instance and open the single long-lived Gateway connection. */ + start(chat: Chat, adapter: GatewayCapableAdapter): Promise; + /** Stop accepting Gateway work and wait for the connection to close. */ + shutdown(): Promise; +}; + +type GatewayControllerDeps = { + logger: Logger; + /** Override for tests — defaults to `process.exit`. */ + onFatalEnd?: () => void; +}; + +export function createGatewayController( + deps: GatewayControllerDeps, +): GatewayController { + const { logger } = deps; + const onFatalEnd = deps.onFatalEnd ?? (() => process.exit(1)); + const abort = new AbortController(); + let active = false; + let shuttingDown = false; + let monitor: Promise | undefined; + + return { + isActive: () => active, + + async start(chat, adapter) { + // Adapters initialize lazily (normally on the first webhook). Direct-mode Gateway + // processing needs the adapter wired to the chat instance up front. + await chat.initialize(); + + const tracked: Array> = []; + // Direct mode: no webhookUrl, so MessageCreate is dispatched through Chat in-process. + await adapter.startGatewayListener( + { + waitUntil: (promise) => + tracked.push(Promise.resolve(promise).catch(() => undefined)), + }, + LONG_RUNNING_MS, + abort.signal, + undefined, + ); + active = true; + logger.info("discordbot_gateway_started"); + + monitor = Promise.all(tracked) + .then(() => undefined) + .finally(() => { + active = false; + if (shuttingDown) { + logger.info("discordbot_gateway_stopped"); + return; + } + // A single long-lived connection ended on its own — almost always a fatal error + // (invalid token / disallowed intents). Exit so k8s restarts with backoff. + logger.error("discordbot_gateway_ended_unexpectedly"); + onFatalEnd(); + }); + }, + + async shutdown() { + shuttingDown = true; + abort.abort(); + if (monitor) await monitor; + }, + }; +} diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts new file mode 100644 index 00000000..24a34a16 --- /dev/null +++ b/services/discordbot/src/index.ts @@ -0,0 +1,761 @@ +import { randomUUID } from "node:crypto"; +import { + codexAppServerToChatSdkStream, + type CodexAppServerToChatStreamOptions, + type RendererEvent, +} from "@centaur/rendering"; +import { createDiscordAdapter } from "@chat-adapter/discord"; +import { createPostgresState } from "@chat-adapter/state-pg"; +import { + Chat, + StreamingPlan, + type Adapter, + type Logger, + type Message as ChatMessage, + type StateAdapter, + type Thread, +} from "chat"; +import { Hono } from "hono"; +import { + isAllowedDiscordMessage, + isGuildAllowlistEmpty, +} from "./discord-allowlist"; +import { deriveThreadName, renameThreadFromMessage } from "./discord-threading"; +import { + collectInitialContext, + forwardToSessionApi, + isRetryableSessionApiError, + openSessionEventStream, + serializeMessage, + sessionStreamError, + startingStreamNotification, +} from "./session-api"; +import type { + Discordbot, + DiscordbotApiMessage, + DiscordbotExecuteSessionResponse, + DiscordbotMessageMode, + DiscordbotOptions, + DiscordbotRenderObligation, + DiscordbotRendererSource, + DiscordbotThreadState, + DiscordbotTrace, + ForwardSessionInput, + TypingCapableAdapter, +} from "./types"; +import { elapsedMs, errorMessage, noopLogger, nowMs, traceLog } from "./utils"; + +export type { + Discordbot, + DiscordbotApiAttachment, + DiscordbotApiAuthor, + DiscordbotApiMessage, + DiscordbotAppendMessagesRequest, + DiscordbotCreateSessionRequest, + DiscordbotExecuteSessionRequest, + DiscordbotExecuteSessionResponse, + DiscordbotFetch, + DiscordbotOptions, + DiscordbotSessionMessage, + DiscordbotSessionMessageRole, +} from "./types"; + +const TYPING_KEEPALIVE_MS = 8000; +const RENDER_OBLIGATION_INDEX_KEY = "discordbot:render:index"; +const RENDER_OBLIGATION_INDEX_MAX_LENGTH = 2000; +const RENDER_INDEX_TTL_MS = 30 * 24 * 60 * 60 * 1000; +const RENDER_RECOVERY_LEASE_TTL_MS = 2 * 60 * 1000; +const RENDER_RETRY_INITIAL_DELAY_MS = 250; +const RENDER_RETRY_MAX_DELAY_MS = 5_000; + +export function createDiscordbot(options: DiscordbotOptions): Discordbot { + const userName = options.userName ?? "centaur"; + const logger = options.logger ?? noopLogger; + + if (isGuildAllowlistEmpty(options)) { + logger.warn("discordbot_guild_allowlist_empty_inert", { + hint: "Set DISCORDBOT_GUILD_ALLOWLIST; the bot ignores all messages until configured.", + }); + } + + const discord = createDiscordAdapter({ + apiUrl: options.discordApiUrl, + applicationId: options.applicationId, + botToken: options.botToken, + publicKey: options.publicKey, + mentionRoleIds: options.mentionRoleIds, + userName, + logger, + }); + const state = options.state ?? createDefaultState(options, logger); + const chat = new Chat<{ discord: typeof discord }, DiscordbotThreadState>({ + userName, + adapters: { discord }, + state, + // Serialize handlers per thread via the SDK's per-thread lock. The deprecated + // `onLockConflict: 'force'` force-released the lock so two handlers ran concurrently on one + // thread — two near-simultaneous mentions could both pass the `activeExecution` check and + // double-execute. `'drop'` keeps the lock: a second message that lands while a handler holds the + // thread lock is dropped rather than run in parallel. Same code path as before for the + // no-contention case, so single-message streaming is unchanged. + concurrency: "drop", + logger, + }); + + chat.onNewMention(async (thread, message) => { + if (!isAllowedDiscordMessage(message, options, logger)) return; + await thread.subscribe(); + await syncThreadMessageToSession(thread, message, { + mode: "execute", + options, + state, + }); + }); + + chat.onSubscribedMessage(async (thread, message) => { + if (!isAllowedDiscordMessage(message, options, logger)) return; + await syncThreadMessageToSession(thread, message, { + mode: message.isMention === true ? "execute" : "append", + options, + state, + }); + }); + + const app = new Hono(); + app.get("/health", (c) => { + const gatewayActive = options.isGatewayActive + ? options.isGatewayActive() + : true; + return c.json( + { ok: gatewayActive, service: "discordbot", gateway: gatewayActive }, + gatewayActive ? 200 : 503, + ); + }); + + if (options.recoverRenderObligationsOnStart !== false) { + scheduleRenderObligationRecovery(chat, state, options); + } + + return { app, chat, adapter: discord }; +} + +function createDefaultState( + options: DiscordbotOptions, + logger: Logger, +): StateAdapter { + return createPostgresState({ + url: options.postgresUrl, + keyPrefix: options.stateKeyPrefix ?? "centaur-discordbot", + logger: logger.child("postgres-state"), + }); +} + +/** + * Persists a Discord thread update into the session API. In execute mode the create/append/execute + * handoff completes before the handler returns; SSE rendering continues in background. + */ +async function syncThreadMessageToSession( + thread: Thread, + message: ChatMessage, + input: { + mode: DiscordbotMessageMode; + options: DiscordbotOptions; + state: StateAdapter; + }, +): Promise { + const traceStartedAtMs = nowMs(); + const state = (await thread.state) ?? {}; + const messageIds = new Set(state.forwardedMessageIds ?? []); + const executedMessageIds = new Set(state.executedMessageIds ?? []); + const shouldStartExecution = + input.mode === "execute" && + state.activeExecution !== true && + !executedMessageIds.has(message.id); + const shouldIncludeContext = + shouldStartExecution && state.historyForwarded !== true; + const isDuplicateIncrementalMessage = + messageIds.has(message.id) && + !shouldStartExecution && + !shouldIncludeContext; + const trace: DiscordbotTrace = { + includeContext: shouldIncludeContext, + messageId: message.id, + mode: input.mode, + openStream: shouldStartExecution, + startedAtMs: traceStartedAtMs, + threadId: thread.id, + }; + if (isDuplicateIncrementalMessage) { + traceLog(input.options, "discordbot_forward_duplicate_skipped", trace); + return; + } + traceLog(input.options, "discordbot_forward_started", trace, { + active_execution: state.activeExecution === true, + history_forwarded: state.historyForwarded === true, + }); + + const serializeStartedAtMs = nowMs(); + const serializedMessage = await serializeMessage(message); + traceLog(input.options, "discordbot_forward_message_serialized", trace, { + attachment_count: serializedMessage.attachments.length, + phase_ms: elapsedMs(serializeStartedAtMs), + }); + let context: DiscordbotApiMessage[] | undefined; + + if (shouldIncludeContext && !state.historyForwarded) { + const contextStartedAtMs = nowMs(); + context = await collectInitialContext(thread, message); + traceLog(input.options, "discordbot_forward_context_collected", trace, { + message_count: context.length, + phase_ms: elapsedMs(contextStartedAtMs), + }); + } else { + traceLog(input.options, "discordbot_forward_context_skipped", trace, { + message_count: 1, + }); + } + + let lastEventId = state.lastEventId ?? 0; + const candidateMessages = context ?? [serializedMessage]; + const messagesToAppend = candidateMessages.filter( + (item) => !messageIds.has(item.id), + ); + + const forwardInput: ForwardSessionInput = { + afterEventId: lastEventId, + executeMessage: shouldStartExecution ? serializedMessage : undefined, + messages: messagesToAppend, + onEventId: (eventId) => { + lastEventId = Math.max(lastEventId, eventId); + }, + openStream: false, + threadId: thread.id, + trace, + }; + + const commitMessagesAppended = async (): Promise => { + const latest = (await thread.state) ?? {}; + const latestMessageIds = new Set(latest.forwardedMessageIds ?? []); + for (const item of messagesToAppend) latestMessageIds.add(item.id); + await thread.setState({ + forwardedMessageIds: Array.from(latestMessageIds).slice(-1000), + historyForwarded: latest.historyForwarded || shouldIncludeContext, + lastEventId, + }); + traceLog(input.options, "discordbot_forward_messages_committed", trace, { + appended_message_count: messagesToAppend.length, + forwarded_message_count: Math.min(latestMessageIds.size, 1000), + }); + }; + + const commitExecutionStarted = async ( + execution: DiscordbotExecuteSessionResponse, + ): Promise => { + const latest = (await thread.state) ?? {}; + const latestExecutedMessageIds = new Set(latest.executedMessageIds ?? []); + latestExecutedMessageIds.add(serializedMessage.id); + await thread.setState({ + activeExecution: true, + executedMessageIds: Array.from(latestExecutedMessageIds).slice(-1000), + lastEventId, + renderObligation: { + afterEventId: lastEventId, + executionId: execution.execution_id, + message: serializedMessage, + }, + }); + await indexRenderObligation(input.state, { + options: input.options, + threadId: thread.id, + trace, + }); + traceLog(input.options, "discordbot_forward_execution_committed", trace, { + execution_id: execution.execution_id, + executed_message_count: Math.min(latestExecutedMessageIds.size, 1000), + }); + }; + + if (!shouldStartExecution) { + if (messagesToAppend.length > 0) { + await forwardToSessionApi(input.options, forwardInput, { + onMessagesAppended: commitMessagesAppended, + }); + } + traceLog(input.options, "discordbot_forward_complete", trace); + return; + } + + try { + await thread.setState({ activeExecution: true }); + traceLog( + input.options, + "discordbot_forward_active_execution_marked", + trace, + ); + await forwardToSessionApi(input.options, forwardInput, { + onExecutionStarted: commitExecutionStarted, + onMessagesAppended: commitMessagesAppended, + }); + scheduleExecutionRender( + thread, + serializedMessage, + input.options, + forwardInput, + () => lastEventId, + shouldIncludeContext, + trace, + ); + traceLog(input.options, "discordbot_forward_complete", trace, { + last_event_id: lastEventId, + }); + } catch (error) { + const latest = (await thread.state) ?? {}; + await thread.setState({ + activeExecution: false, + lastEventId: Math.max(latest.lastEventId ?? 0, lastEventId), + }); + // Discord ingress arrives via the Gateway, so there is no webhook retry to request + // (slackbotv2 answers 503 to make Slack re-deliver). Surface the failure in-thread instead. + await renderExecutionStream( + thread, + streamError(error), + serializedMessage, + input.options, + false, + trace, + ); + traceLog(input.options, "discordbot_forward_complete", trace, { + latest_active_execution: latest.activeExecution === true, + last_event_id: lastEventId, + }); + } +} + +function scheduleExecutionRender( + thread: Thread, + message: DiscordbotApiMessage, + options: DiscordbotOptions, + input: ForwardSessionInput, + getLastEventId: () => number, + isInitialExecution: boolean, + trace?: DiscordbotTrace, +): void { + const promise = (async () => { + let attempt = 0; + while (true) { + const result = await renderExecutionAttempt( + thread, + message, + options, + input, + getLastEventId, + isInitialExecution, + trace, + ); + if (result === "complete") return; + const delayMs = renderRetryDelayMs(attempt); + attempt += 1; + traceLog(options, "discordbot_render_retry_scheduled", trace, { + retry_delay_ms: delayMs, + retry_attempt: attempt, + }); + await sleep(delayMs); + } + })(); + backgroundWaitUntil(promise); +} + +async function renderExecutionAttempt( + thread: Thread, + message: DiscordbotApiMessage, + options: DiscordbotOptions, + input: ForwardSessionInput, + getLastEventId: () => number, + isInitialExecution: boolean, + trace?: DiscordbotTrace, +): Promise<"complete" | "retry"> { + let rendered = false; + let retry = false; + try { + await renderExecutionStream( + thread, + streamSessionAfterHandoff(options, input), + message, + options, + isInitialExecution, + trace, + ); + rendered = true; + traceLog(options, "discordbot_render_complete", trace); + return "complete"; + } catch (error) { + if (isRetryableSessionApiError(error)) { + retry = true; + traceLog(options, "discordbot_render_deferred", trace, { + error: errorMessage(error), + last_event_id: getLastEventId(), + }); + return "retry"; + } + traceLog(options, "discordbot_render_failed", trace, { + error: errorMessage(error), + }); + throw error; + } finally { + const latest = (await thread.state) ?? {}; + await thread.setState({ + activeExecution: retry, + lastEventId: Math.max(latest.lastEventId ?? 0, getLastEventId()), + ...(rendered ? { renderObligation: null } : {}), + }); + traceLog(options, "discordbot_render_finalized", trace, { + obligation_cleared: rendered, + retry_scheduled: retry, + last_event_id: getLastEventId(), + }); + } +} + +function scheduleRenderObligationRecovery( + chat: Chat, DiscordbotThreadState>, + state: StateAdapter, + options: DiscordbotOptions, +): void { + backgroundWaitUntil(recoverRenderObligationsWithRetry(chat, state, options)); +} + +async function recoverRenderObligationsWithRetry( + chat: Chat, DiscordbotThreadState>, + state: StateAdapter, + options: DiscordbotOptions, +): Promise { + let attempt = 0; + while (true) { + try { + const deferredCount = await recoverRenderObligations( + chat, + state, + options, + ); + if (deferredCount === 0) return; + const delayMs = renderRetryDelayMs(attempt); + attempt += 1; + traceLog( + options, + "discordbot_render_recovery_retry_scheduled", + undefined, + { + deferred_count: deferredCount, + retry_delay_ms: delayMs, + retry_attempt: attempt, + }, + ); + await sleep(delayMs); + } catch (error) { + traceLog(options, "discordbot_render_recovery_failed", undefined, { + error: errorMessage(error), + }); + return; + } + } +} + +async function recoverRenderObligations( + chat: Chat, DiscordbotThreadState>, + state: StateAdapter, + options: DiscordbotOptions, +): Promise { + const startedAtMs = nowMs(); + await chat.initialize(); + const indexedThreadIds = await state.getList( + RENDER_OBLIGATION_INDEX_KEY, + ); + const threadIds = Array.from(new Set(indexedThreadIds)); + let deferredCount = 0; + traceLog(options, "discordbot_render_recovery_scan", undefined, { + obligation_count: threadIds.length, + phase_ms: elapsedMs(startedAtMs), + }); + + for (const threadId of threadIds) { + const thread = chat.thread(threadId); + const threadState = await thread.state; + const obligation = threadState?.renderObligation; + if (!obligation) continue; + + const leaseToken = randomUUID(); + const leaseAcquired = await state.setIfNotExists( + renderRecoveryLeaseKey(threadId), + leaseToken, + RENDER_RECOVERY_LEASE_TTL_MS, + ); + if (!leaseAcquired) { + traceLog(options, "discordbot_render_recovery_lease_skipped", undefined, { + thread_id: threadId, + }); + continue; + } + + try { + if ( + await recoverRenderObligation( + chat, + state, + options, + threadId, + obligation, + ) + ) { + deferredCount += 1; + } + } finally { + const activeLeaseToken = await state.get( + renderRecoveryLeaseKey(threadId), + ); + if (activeLeaseToken === leaseToken) { + await state.delete(renderRecoveryLeaseKey(threadId)); + } + } + } + return deferredCount; +} + +async function recoverRenderObligation( + chat: Chat, DiscordbotThreadState>, + state: StateAdapter, + options: DiscordbotOptions, + threadId: string, + obligation: DiscordbotRenderObligation, +): Promise { + const trace: DiscordbotTrace = { + includeContext: false, + messageId: obligation.message.id, + mode: "execute", + openStream: true, + startedAtMs: nowMs(), + threadId, + }; + const thread = chat.thread(threadId); + const threadState = (await thread.state) ?? {}; + let lastEventId = Math.max( + threadState.lastEventId ?? 0, + obligation.afterEventId, + ); + const input: ForwardSessionInput = { + afterEventId: lastEventId, + messages: [], + onEventId: (eventId) => { + lastEventId = Math.max(lastEventId, eventId); + }, + openStream: false, + threadId, + trace, + }; + + let openedStream: AsyncIterable; + try { + openedStream = await openSessionEventStream(options, input); + } catch (error) { + const retryable = isRetryableSessionApiError(error); + traceLog(options, "discordbot_render_recovery_deferred", trace, { + error: errorMessage(error), + last_event_id: lastEventId, + retryable, + }); + if (retryable) return true; + await renderRecoveredExecutionStream( + thread, + streamError(error), + obligation.message, + options, + trace, + ); + await thread.setState({ + activeExecution: false, + lastEventId, + renderObligation: null, + }); + return false; + } + + let rendered = false; + try { + await thread.setState({ + activeExecution: true, + lastEventId, + }); + await renderRecoveredExecutionStream( + thread, + streamOpenedSession(input, openedStream), + obligation.message, + options, + trace, + ); + rendered = true; + traceLog(options, "discordbot_render_recovery_complete", trace); + } catch (error) { + traceLog(options, "discordbot_render_recovery_render_failed", trace, { + error: errorMessage(error), + }); + throw error; + } finally { + const latest = (await thread.state) ?? {}; + await thread.setState({ + activeExecution: false, + lastEventId: Math.max(latest.lastEventId ?? 0, lastEventId), + ...(rendered ? { renderObligation: null } : {}), + }); + traceLog(options, "discordbot_render_recovery_finalized", trace, { + obligation_cleared: rendered, + last_event_id: lastEventId, + }); + } + return false; +} + +async function indexRenderObligation( + state: StateAdapter, + input: { + options: DiscordbotOptions; + threadId: string; + trace?: DiscordbotTrace; + }, +): Promise { + await state.appendToList(RENDER_OBLIGATION_INDEX_KEY, input.threadId, { + maxLength: RENDER_OBLIGATION_INDEX_MAX_LENGTH, + ttlMs: RENDER_INDEX_TTL_MS, + }); + traceLog(input.options, "discordbot_render_obligation_indexed", input.trace); +} + +async function* streamOpenedSession( + input: Pick, + stream: AsyncIterable, +): AsyncIterable { + yield startingStreamNotification(input.threadId); + for await (const event of stream) yield event; +} + +function renderRecoveryLeaseKey(threadId: string): string { + return `discordbot:render:lease:${threadId}`; +} + +async function renderExecutionStream( + thread: Thread, + stream: AsyncIterable, + message: DiscordbotApiMessage, + options: DiscordbotOptions, + isInitialExecution: boolean, + trace?: DiscordbotTrace, +): Promise { + const logger = options.logger ?? noopLogger; + if (isInitialExecution && options.nameThreads !== false) { + await renameThreadFromMessage( + options, + thread.id, + deriveThreadName(message.text, options.userName), + logger, + ); + traceLog(options, "discordbot_thread_named", trace); + } + + const stopTyping = startTypingKeepalive(thread, logger); + try { + await thread.post( + new StreamingPlan( + codexAppServerToChatSdkStream(stream, rendererOptions(options)), + {}, + ), + ); + } finally { + stopTyping(); + } +} + +async function renderRecoveredExecutionStream( + thread: Thread, + stream: AsyncIterable, + message: DiscordbotApiMessage, + options: DiscordbotOptions, + trace?: DiscordbotTrace, +): Promise { + // Recovered renders never rename the thread; naming happens on the initial execution. + await renderExecutionStream(thread, stream, message, options, false, trace); +} + +async function* streamSessionAfterHandoff( + options: DiscordbotOptions, + input: ForwardSessionInput, +): AsyncIterable { + let stream: AsyncIterable; + try { + stream = await openSessionEventStream(options, input); + } catch (error) { + traceLog(options, "discordbot_forward_failed", input.trace, { + error: errorMessage(error), + }); + if (isRetryableSessionApiError(error)) throw error; + yield sessionStreamError(error); + return; + } + + yield startingStreamNotification(input.threadId); + traceLog(options, "discordbot_stream_heartbeat_emitted", input.trace); + for await (const event of stream) yield event; +} + +async function* streamError( + error: unknown, +): AsyncIterable { + yield sessionStreamError(error); +} + +function backgroundWaitUntil(promise: Promise): void { + // Discord ingress runs in a long-lived Gateway process (no per-request waitUntil); + // background work just needs its rejections swallowed after they are traced. + void promise.catch(() => undefined); +} + +function rendererOptions( + options: DiscordbotOptions, +): CodexAppServerToChatStreamOptions { + const mapper = options.mapper; + return { + ...mapper, + async onRendererEvent(event: RendererEvent) { + await mapper?.onRendererEvent?.(event); + }, + }; +} + +function renderRetryDelayMs(attempt: number): number { + return Math.min( + RENDER_RETRY_INITIAL_DELAY_MS * 2 ** attempt, + RENDER_RETRY_MAX_DELAY_MS, + ); +} + +async function sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * Discord's typing indicator expires after ~10s, so a single call blinks off mid-run. Re-fire on + * an interval while the stream is open; errors are swallowed (typing is cosmetic) and the interval + * is always cleared by the returned stop function. + */ +function startTypingKeepalive(thread: Thread, logger: Logger): () => void { + const adapter = thread.adapter as TypingCapableAdapter; + if (!adapter.startTyping) return () => undefined; + + const fire = (): void => { + void adapter.startTyping?.(thread.id).catch((error) => { + logger.debug("discordbot_typing_error", { + error: error instanceof Error ? error.message : String(error), + }); + }); + }; + fire(); + const interval = globalThis.setInterval(fire, TYPING_KEEPALIVE_MS); + return () => globalThis.clearInterval(interval); +} diff --git a/services/discordbot/src/server.ts b/services/discordbot/src/server.ts new file mode 100644 index 00000000..ecf34f40 --- /dev/null +++ b/services/discordbot/src/server.ts @@ -0,0 +1,115 @@ +import { createGatewayController } from "./gateway"; +import { createDiscordbot, type DiscordbotOptions } from "./index"; + +const port = numberEnv("PORT", 3001); +const apiUrl = stringEnv("CENTAUR_API_URL", "http://127.0.0.1:8080"); +const botToken = requiredEnv("DISCORD_BOT_TOKEN"); +const publicKey = requiredEnv("DISCORD_PUBLIC_KEY"); +const applicationId = requiredEnv("DISCORD_APPLICATION_ID"); + +const consoleLogger = { + debug: (message: string, data?: unknown) => log("debug", message, data), + info: (message: string, data?: unknown) => log("info", message, data), + warn: (message: string, data?: unknown) => log("warn", message, data), + error: (message: string, data?: unknown) => log("error", message, data), + child: () => consoleLogger, +}; + +const gateway = createGatewayController({ logger: consoleLogger }); + +const options: DiscordbotOptions = { + apiUrl, + apiKey: optionalEnv("DISCORDBOT_API_KEY") ?? optionalEnv("CENTAUR_API_KEY"), + applicationId, + botToken, + publicKey, + discordApiUrl: optionalEnv("DISCORD_API_URL"), + guildAllowlist: optionalList("DISCORDBOT_GUILD_ALLOWLIST"), + idleTimeoutMs: optionalNumberEnv("SESSION_IDLE_TIMEOUT_MS"), + isGatewayActive: () => gateway.isActive(), + maxDurationMs: optionalNumberEnv("SESSION_MAX_DURATION_MS"), + mentionRoleIds: optionalList("DISCORD_MENTION_ROLE_IDS"), + nameThreads: optionalEnv("DISCORDBOT_NAME_THREADS") !== "false", + postgresUrl: + optionalEnv("DISCORDBOT_DATABASE_URL") ?? + optionalEnv("DATABASE_URL") ?? + optionalEnv("POSTGRES_URL"), + stateKeyPrefix: optionalEnv("DISCORDBOT_STATE_KEY_PREFIX"), + userName: stringEnv("DISCORDBOT_USER_NAME", "centaur"), + logger: consoleLogger, +}; + +const { app, chat, adapter } = createDiscordbot(options); +const server = Bun.serve({ port, fetch: app.fetch }); + +log("info", "discordbot_started", { + port: server.port, + api_url: apiUrl, +}); + +const shutdown = async (signal: string): Promise => { + log("info", "discordbot_shutdown_started", { signal }); + await gateway.shutdown(); + await chat.shutdown().catch(() => undefined); + server.stop(); + log("info", "discordbot_shutdown_complete", { signal }); + process.exit(0); +}; +process.on("SIGTERM", () => void shutdown("SIGTERM")); +process.on("SIGINT", () => void shutdown("SIGINT")); + +await gateway.start(chat, adapter); + +function optionalEnv(name: string): string | undefined { + const value = process.env[name]?.trim(); + return value ? value : undefined; +} + +function optionalList(name: string): string[] | undefined { + const value = optionalEnv(name); + if (!value) return undefined; + return value + .split(/[\s,]+/) + .map((part) => part.trim()) + .filter(Boolean); +} + +function requiredEnv(name: string): string { + const value = optionalEnv(name); + if (!value) { + throw new Error(`${name} is required`); + } + return value; +} + +function stringEnv(name: string, fallback: string): string { + return optionalEnv(name) ?? fallback; +} + +function numberEnv(name: string, fallback: number): number { + return optionalNumberEnv(name) ?? fallback; +} + +function optionalNumberEnv(name: string): number | undefined { + const value = optionalEnv(name); + if (!value) return undefined; + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error(`${name} must be a positive integer`); + } + return parsed; +} + +function log(level: string, message: string, data?: unknown): void { + console.log( + JSON.stringify({ + level, + service: "discordbot", + timestamp: new Date().toISOString(), + event: message, + ...(data && typeof data === "object" + ? (data as Record) + : {}), + }), + ); +} diff --git a/services/discordbot/src/session-api.ts b/services/discordbot/src/session-api.ts new file mode 100644 index 00000000..91e363d2 --- /dev/null +++ b/services/discordbot/src/session-api.ts @@ -0,0 +1,666 @@ +import type { RustSessionStreamEvent } from "@centaur/harness-events"; +import type { Attachment, Message } from "chat"; +import type { + DiscordbotApiAttachment, + DiscordbotApiMessage, + DiscordbotAppendMessagesRequest, + DiscordbotCreateSessionRequest, + DiscordbotExecuteSessionRequest, + DiscordbotExecuteSessionResponse, + DiscordbotOptions, + DiscordbotRendererSource, + DiscordbotSessionMessage, + ForwardSessionInput, + JsonObject, + JsonValue, +} from "./types"; +import { + elapsedMs, + isJsonObject, + noopLogger, + nowMs, + stringValue, + toAsyncIterable, + traceLog, +} from "./utils"; + +export class SessionApiError extends Error { + readonly action: string; + readonly body: string; + readonly retryable: boolean; + readonly status: number; + readonly statusText: string; + + constructor(input: { + action: string; + body: string; + retryable: boolean; + status: number; + statusText: string; + }) { + // api-rs error bodies can carry internals; keep them out of the message, + // which is surfaced verbatim into the user-facing Discord thread. + super( + `Centaur session ${input.action} failed: ${input.status} ${input.statusText}`, + ); + this.name = "SessionApiError"; + this.action = input.action; + this.body = input.body; + this.retryable = input.retryable; + this.status = input.status; + this.statusText = input.statusText; + } +} + +export function isRetryableSessionApiError(error: unknown): boolean { + if (error instanceof SessionApiError) return error.retryable; + if (!(error instanceof Error)) return false; + return error.name === "AbortError" || error.name === "TypeError"; +} + +type ForwardSessionApiCallbacks = { + onExecutionStarted?( + execution: DiscordbotExecuteSessionResponse, + ): Promise; + onMessagesAppended?(): Promise; +}; + +export async function collectInitialContext( + thread: { allMessages: AsyncIterable }, + currentMessage: Message, +): Promise { + const messages: Message[] = []; + for await (const message of thread.allMessages) { + messages.push(message); + } + + const currentIndex = messages.findIndex( + (message) => message.id === currentMessage.id, + ); + if (currentIndex >= 0) { + messages[currentIndex] = currentMessage; + } else { + messages.push(currentMessage); + } + + const serialized: DiscordbotApiMessage[] = []; + for (const message of messages) { + serialized.push(await serializeMessage(message)); + } + return serialized; +} + +export async function serializeMessage( + message: Message, +): Promise { + const attachments: DiscordbotApiAttachment[] = []; + for (const attachment of message.attachments) { + attachments.push(await serializeAttachment(attachment)); + } + + return { + attachments, + author: { + fullName: message.author.fullName, + isBot: message.author.isBot, + isMe: message.author.isMe, + userId: message.author.userId, + userName: message.author.userName, + }, + id: message.id, + isMention: message.isMention === true, + raw: message.raw, + text: message.text, + threadId: message.threadId, + timestamp: message.metadata.dateSent.toISOString(), + }; +} + +export async function forwardToSessionApi( + options: DiscordbotOptions, + input: ForwardSessionInput, + callbacks: ForwardSessionApiCallbacks = {}, +): Promise | null> { + const createStartedAtMs = nowMs(); + await createSession(options, input.threadId); + traceLog(options, "discordbot_session_create_complete", input.trace, { + phase_ms: elapsedMs(createStartedAtMs), + }); + if (input.messages.length > 0) { + const appendStartedAtMs = nowMs(); + await appendSessionMessages(options, input.threadId, input.messages); + traceLog(options, "discordbot_session_append_complete", input.trace, { + message_count: input.messages.length, + phase_ms: elapsedMs(appendStartedAtMs), + }); + await callbacks.onMessagesAppended?.(); + } else { + traceLog(options, "discordbot_session_append_skipped", input.trace, { + message_count: 0, + }); + } + if (!input.executeMessage) return null; + + const executeStartedAtMs = nowMs(); + const execution = await executeSession( + options, + input.threadId, + input.executeMessage, + ); + traceLog(options, "discordbot_session_execute_complete", input.trace, { + execution_id: execution.execution_id, + phase_ms: elapsedMs(executeStartedAtMs), + }); + await callbacks.onExecutionStarted?.(execution); + if (!input.openStream) return null; + + return openSessionEventStream(options, input); +} + +export async function openSessionEventStream( + options: DiscordbotOptions, + input: Pick< + ForwardSessionInput, + "afterEventId" | "onEventId" | "threadId" | "trace" + >, +): Promise> { + const streamStartedAtMs = nowMs(); + const stream = await streamSessionNotifications( + options, + input.threadId, + input.afterEventId, + input.onEventId, + ); + traceLog(options, "discordbot_session_events_opened", input.trace, { + after_event_id: input.afterEventId, + phase_ms: elapsedMs(streamStartedAtMs), + }); + return stream; +} + +export function startingStreamNotification(threadId: string): JsonObject { + return { + method: "item/started", + params: { + threadId, + turnId: "discordbot-starting-turn", + startedAtMs: Date.now(), + item: { + id: "discordbot-starting", + memoryCitation: null, + phase: "commentary", + text: "", + type: "agentMessage", + }, + }, + }; +} + +export function sessionStreamError(error: unknown): RustSessionStreamEvent { + return { + data: { error: error instanceof Error ? error.message : String(error) }, + event: "session.stream_error", + eventKind: "session.stream_error", + }; +} + +async function serializeAttachment( + attachment: Attachment, +): Promise { + const serialized: DiscordbotApiAttachment = { + fetchMetadata: attachment.fetchMetadata, + height: attachment.height, + mimeType: attachment.mimeType, + name: attachment.name, + size: attachment.size, + type: attachment.type, + url: attachment.url, + width: attachment.width, + }; + + try { + const data = attachment.data ?? (await attachment.fetchData?.()); + if (data) { + serialized.dataBase64 = await bytesToBase64(data); + } + } catch (error) { + serialized.fetchError = + error instanceof Error ? error.message : String(error); + } + + return serialized; +} + +async function bytesToBase64(data: Buffer | Blob): Promise { + if (Buffer.isBuffer(data)) return data.toString("base64"); + const bytes = await data.arrayBuffer(); + return Buffer.from(bytes).toString("base64"); +} + +async function createSession( + options: DiscordbotOptions, + threadId: string, +): Promise { + const fetchFn = options.fetch ?? fetch; + const body: DiscordbotCreateSessionRequest = { + harness_type: "codex", + metadata: { + source: "discordbot", + platform: "discord", + thread_id: threadId, + }, + }; + const response = await fetchFn(apiSessionUrl(options.apiUrl, threadId), { + method: "POST", + headers: apiHeaders(options), + body: JSON.stringify(body), + }); + await ensureApiOk(response, "create session", options); +} + +async function appendSessionMessages( + options: DiscordbotOptions, + threadId: string, + messages: DiscordbotApiMessage[], +): Promise { + const fetchFn = options.fetch ?? fetch; + const body: DiscordbotAppendMessagesRequest = { + messages: messages.map(toSessionMessage), + }; + const response = await fetchFn( + apiSessionUrl(options.apiUrl, threadId, "messages"), + { + method: "POST", + headers: apiHeaders(options), + body: JSON.stringify(body), + }, + ); + await ensureApiOk(response, "append session messages", options); +} + +async function executeSession( + options: DiscordbotOptions, + threadId: string, + message: DiscordbotApiMessage, +): Promise { + const fetchFn = options.fetch ?? fetch; + const body: DiscordbotExecuteSessionRequest = { + idempotency_key: message.id, + metadata: sessionMetadata(message, { action: "execute" }), + input_lines: [toCodexInputLine(message, threadId)], + ...(options.idleTimeoutMs === undefined + ? {} + : { idle_timeout_ms: options.idleTimeoutMs }), + ...(options.maxDurationMs === undefined + ? {} + : { max_duration_ms: options.maxDurationMs }), + }; + const response = await fetchFn( + apiSessionUrl(options.apiUrl, threadId, "execute"), + { + method: "POST", + headers: apiHeaders(options), + body: JSON.stringify(body), + }, + ); + await ensureApiOk(response, "execute session", options); + return (await response.json()) as DiscordbotExecuteSessionResponse; +} + +async function ensureApiOk( + response: Response, + action: string, + options: DiscordbotOptions, +): Promise { + if (response.ok) return; + let body = ""; + try { + body = await response.text(); + } catch { + body = ""; + } + // api-rs is internal and unauthenticated; its error bodies can carry stack traces, internal + // hostnames, or echoed payloads. Log the full body server-side, but the thrown message stays + // generic — it is surfaced verbatim into the user-facing Discord thread via sessionStreamError. + if (body) { + (options.logger ?? noopLogger).warn("discordbot_session_api_error", { + action, + status: response.status, + status_text: response.statusText, + body, + }); + } + throw new SessionApiError({ + action, + body, + retryable: isRetryableApiStatus(response.status), + status: response.status, + statusText: response.statusText, + }); +} + +function isRetryableApiStatus(status: number): boolean { + return status === 408 || status === 425 || status === 429 || status >= 500; +} + +async function streamSessionNotifications( + options: DiscordbotOptions, + threadId: string, + afterEventId: number, + onEventId: (eventId: number) => void, +): Promise> { + const fetchFn = options.fetch ?? fetch; + const response = await fetchFn( + `${apiSessionUrl(options.apiUrl, threadId, "events")}?after_event_id=${afterEventId}`, + { + method: "GET", + headers: apiHeaders(options, false), + }, + ); + await ensureApiOk(response, "stream events", options); + if (!response.body) return toAsyncIterable([]); + return parseSessionEventStream(response.body, onEventId); +} + +function apiSessionUrl( + apiUrl: string, + threadId: string, + suffix?: "messages" | "execute" | "events", +): string { + const path = `/api/session/${encodeURIComponent(threadId)}${suffix ? `/${suffix}` : ""}`; + return new URL(path, ensureTrailingSlash(apiUrl)).toString(); +} + +function ensureTrailingSlash(value: string): string { + return value.endsWith("/") ? value : `${value}/`; +} + +function apiHeaders(options: DiscordbotOptions, jsonBody = true): HeadersInit { + const apiKey = + options.apiKey ?? + process.env.DISCORDBOT_API_KEY ?? + process.env.CENTAUR_API_KEY; + return { + ...(jsonBody ? { "content-type": "application/json" } : {}), + ...(apiKey ? { authorization: `Bearer ${apiKey}` } : {}), + }; +} + +function toSessionMessage( + message: DiscordbotApiMessage, +): DiscordbotSessionMessage { + return { + client_message_id: message.id, + role: message.author.isMe ? "assistant" : "user", + parts: sessionMessageParts(message), + metadata: sessionMetadata(message), + }; +} + +function sessionMessageParts(message: DiscordbotApiMessage): JsonValue[] { + const parts: JsonValue[] = []; + if (message.text.trim()) { + parts.push({ type: "text", text: message.text }); + } + for (const attachment of message.attachments) { + parts.push({ + ...attachment, + attachment_type: attachment.type, + type: "attachment", + }); + } + return parts.length > 0 ? parts : [{ type: "text", text: "" }]; +} + +function sessionMetadata( + message: DiscordbotApiMessage, + extra: JsonObject = {}, +): JsonObject { + return { + source: "discordbot", + platform: "discord", + message_id: message.id, + thread_id: message.threadId, + is_mention: message.isMention, + timestamp: message.timestamp, + user_id: message.author.userId, + user_name: message.author.userName, + ...extra, + }; +} + +function toCodexInputLine( + message: DiscordbotApiMessage, + threadId: string, +): string { + return JSON.stringify({ + type: "user", + thread_key: threadId, + trace_metadata: sessionMetadata(message, { action: "execute" }), + message: { + role: "user", + content: codexInputContent(message), + }, + }); +} + +function codexInputContent(message: DiscordbotApiMessage): JsonValue[] { + const content: JsonValue[] = []; + if (message.text.trim()) { + content.push({ type: "text", text: message.text }); + } + for (const attachment of message.attachments) { + content.push(codexAttachmentInput(attachment)); + } + return content.length > 0 ? content : [{ type: "text", text: "continue" }]; +} + +function codexAttachmentInput(attachment: DiscordbotApiAttachment): JsonValue { + const dataUrl = + attachment.dataBase64 && attachment.mimeType + ? `data:${attachment.mimeType};base64,${attachment.dataBase64}` + : undefined; + if (attachment.type === "image" && (dataUrl || attachment.url)) { + return { + type: "image", + url: dataUrl ?? attachment.url, + detail: "auto", + name: attachment.name, + }; + } + return { + type: "text", + text: attachmentDescription(attachment), + }; +} + +function attachmentDescription(attachment: DiscordbotApiAttachment): string { + const fields = [ + `name=${attachment.name ?? "attachment"}`, + `type=${attachment.type}`, + attachment.mimeType ? `mime=${attachment.mimeType}` : undefined, + attachment.url ? `url=${attachment.url}` : undefined, + attachment.dataBase64 ? `base64=${attachment.dataBase64}` : undefined, + attachment.fetchError ? `fetch_error=${attachment.fetchError}` : undefined, + ].filter(Boolean); + return `[Discord attachment: ${fields.join(" ")}]`; +} + +type ParsedSessionEvent = { + data: string; + event?: string; + id?: number; +}; + +async function* parseSessionEventStream( + stream: ReadableStream, + onEventId: (eventId: number) => void, +): AsyncIterable { + for await (const event of parseSseEvents(stream)) { + if (typeof event.id === "number") onEventId(event.id); + if (event.event === "session.output.line") { + yield { + data: event.data, + event: event.event, + eventId: event.id, + eventKind: event.event, + } satisfies RustSessionStreamEvent; + if (isTerminalCodexOutputLine(event.data)) return; + continue; + } + if ( + event.event === "session.execution_failed" || + event.event === "session.stream_error" + ) { + yield { + data: { error: sessionErrorMessage(event) }, + event: event.event, + eventId: event.id, + eventKind: event.event, + } satisfies RustSessionStreamEvent; + return; + } + if (event.event === "session.execution_cancelled") { + yield { + data: { error: sessionErrorMessage(event, "Execution cancelled") }, + event: event.event, + eventId: event.id, + eventKind: event.event, + } satisfies RustSessionStreamEvent; + return; + } + if (event.event === "session.execution_completed") { + yield { + data: sessionEventData(event), + event: event.event, + eventId: event.id, + eventKind: event.event, + } satisfies RustSessionStreamEvent; + return; + } + } +} + +async function* parseSseEvents( + stream: ReadableStream, +): AsyncIterable { + const reader = stream.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + let eventName: string | undefined; + let eventId: number | undefined; + let data: string[] = []; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split(/\r?\n/); + buffer = lines.pop() ?? ""; + + for (const line of lines) { + const emitted = parseSseLine(line, { data, eventId, eventName }); + data = emitted.state.data; + eventId = emitted.state.eventId; + eventName = emitted.state.eventName; + if (emitted.event) yield emitted.event; + } + } + + buffer += decoder.decode(); + if (buffer) { + const emitted = parseSseLine(buffer, { data, eventId, eventName }); + data = emitted.state.data; + eventId = emitted.state.eventId; + eventName = emitted.state.eventName; + if (emitted.event) yield emitted.event; + } + if (data.length > 0) { + yield { data: data.join("\n"), event: eventName, id: eventId }; + } +} + +function parseSseLine( + line: string, + state: { + data: string[]; + eventId?: number; + eventName?: string; + }, +): { + event?: ParsedSessionEvent; + state: { data: string[]; eventId?: number; eventName?: string }; +} { + if (!line.trim()) { + const event = + state.data.length > 0 + ? { + data: state.data.join("\n"), + event: state.eventName, + id: state.eventId, + } + : undefined; + return { event, state: { data: [] } }; + } + if (line.startsWith(":")) return { state }; + + const separator = line.indexOf(":"); + const field = separator >= 0 ? line.slice(0, separator) : line; + const value = + separator >= 0 ? line.slice(separator + 1).replace(/^ /, "") : ""; + if (field === "event") return { state: { ...state, eventName: value } }; + if (field === "id") { + const id = Number.parseInt(value, 10); + return { + state: { ...state, eventId: Number.isFinite(id) ? id : undefined }, + }; + } + if (field === "data" && value !== "[DONE]") { + return { state: { ...state, data: [...state.data, value] } }; + } + + return { state }; +} + +function isTerminalCodexOutputLine(line: string): boolean { + let payload: unknown; + try { + payload = JSON.parse(line); + } catch { + return true; + } + if (!isJsonObject(payload)) return false; + + return ( + payload.type === "turn.completed" || + payload.type === "turn.failed" || + payload.type === "turn.done" || + payload.method === "error" || + payload.method === "turn/completed" + ); +} + +function sessionEventData(event: ParsedSessionEvent): unknown { + try { + return JSON.parse(event.data); + } catch { + return event.data; + } +} + +function sessionErrorMessage( + event: ParsedSessionEvent, + fallback?: string, +): string { + let message = fallback ?? `${event.event ?? "session error"}`; + try { + const payload = JSON.parse(event.data); + if (isJsonObject(payload)) { + message = + stringValue(payload.error) ?? stringValue(payload.message) ?? message; + } + } catch { + if (event.data.trim()) message = event.data.trim(); + } + return message; +} diff --git a/services/discordbot/src/types.ts b/services/discordbot/src/types.ts new file mode 100644 index 00000000..f66cdbc5 --- /dev/null +++ b/services/discordbot/src/types.ts @@ -0,0 +1,166 @@ +import type { RustSessionStreamEvent } from "@centaur/harness-events"; +import type { CodexAppServerToChatStreamOptions } from "@centaur/rendering"; +import type { Attachment, Chat, Logger, StateAdapter } from "chat"; +import type { Hono } from "hono"; + +export type JsonPrimitive = string | number | boolean | null; +export type JsonValue = JsonPrimitive | JsonObject | JsonValue[]; +export type JsonObject = { [key: string]: JsonValue | undefined }; + +export type DiscordbotApiAuthor = { + fullName: string; + isBot: boolean | "unknown"; + isMe: boolean; + userId: string; + userName: string; +}; + +export type DiscordbotApiAttachment = { + dataBase64?: string; + fetchError?: string; + fetchMetadata?: Record; + height?: number; + mimeType?: string; + name?: string; + size?: number; + type: Attachment["type"]; + url?: string; + width?: number; +}; + +export type DiscordbotApiMessage = { + attachments: DiscordbotApiAttachment[]; + author: DiscordbotApiAuthor; + id: string; + isMention: boolean; + raw: unknown; + text: string; + threadId: string; + timestamp: string; +}; + +export type DiscordbotSessionMessageRole = + | "user" + | "assistant" + | "system" + | "tool"; + +export type DiscordbotSessionMessage = { + client_message_id?: string; + metadata: JsonObject; + parts: JsonValue[]; + role: DiscordbotSessionMessageRole; +}; + +export type DiscordbotAppendMessagesRequest = { + messages: DiscordbotSessionMessage[]; +}; + +export type DiscordbotCreateSessionRequest = { + harness_type: string; + metadata: JsonObject; +}; + +export type DiscordbotExecuteSessionRequest = { + idempotency_key?: string; + idle_timeout_ms?: number; + input_lines: string[]; + max_duration_ms?: number; + metadata: JsonObject; +}; + +export type DiscordbotExecuteSessionResponse = { + execution_id: string; + ok: boolean; + status: string; + thread_key: string; +}; + +export type DiscordbotFetch = ( + input: RequestInfo | URL, + init?: RequestInit, +) => Promise; + +export type DiscordbotOptions = { + apiKey?: string; + apiUrl: string; + applicationId: string; + botToken: string; + discordApiUrl?: string; + fetch?: DiscordbotFetch; + guildAllowlist?: readonly string[]; + idleTimeoutMs?: number; + /** Liveness probe for `/health`; reflects the Gateway connection state. */ + isGatewayActive?: () => boolean; + logger?: Logger; + mapper?: CodexAppServerToChatStreamOptions; + maxDurationMs?: number; + mentionRoleIds?: string[]; + /** Rename auto-created threads to the message-derived title. Defaults to true. */ + nameThreads?: boolean; + postgresUrl?: string; + publicKey: string; + recoverRenderObligationsOnStart?: boolean; + state?: StateAdapter; + stateKeyPrefix?: string; + userName?: string; +}; + +export type Discordbot = { + app: Hono; + chat: Chat; + adapter: GatewayCapableAdapter; +}; + +export type DiscordbotThreadState = { + activeExecution?: boolean; + executedMessageIds?: string[]; + forwardedMessageIds?: string[]; + historyForwarded?: boolean; + lastEventId?: number; + renderObligation?: DiscordbotRenderObligation | null; +}; + +export type DiscordbotRenderObligation = { + afterEventId: number; + executionId: string; + message: DiscordbotApiMessage; +}; + +export type DiscordbotMessageMode = "append" | "execute"; + +export type DiscordbotRendererSource = RustSessionStreamEvent | JsonObject; + +export type DiscordbotTrace = { + includeContext: boolean; + messageId: string; + mode: DiscordbotMessageMode; + openStream: boolean; + startedAtMs: number; + threadId: string; +}; + +export type ForwardSessionInput = { + afterEventId: number; + executeMessage?: DiscordbotApiMessage; + messages: DiscordbotApiMessage[]; + onEventId(eventId: number): void; + openStream: boolean; + threadId: string; + trace?: DiscordbotTrace; +}; + +/** Minimal slice of the Discord adapter the Gateway runner needs. */ +export type GatewayCapableAdapter = { + startGatewayListener( + options: { waitUntil(promise: Promise): void }, + durationMs?: number, + abortSignal?: AbortSignal, + webhookUrl?: string, + ): Promise; +}; + +/** Minimal slice of the Discord adapter used to send a typing indicator. */ +export type TypingCapableAdapter = { + startTyping?(threadId: string, status?: string): Promise; +}; diff --git a/services/discordbot/src/utils.ts b/services/discordbot/src/utils.ts new file mode 100644 index 00000000..20d5c8b9 --- /dev/null +++ b/services/discordbot/src/utils.ts @@ -0,0 +1,61 @@ +import type { Logger } from "chat"; +import type { DiscordbotOptions, DiscordbotTrace, JsonObject } from "./types"; + +export const noopLogger: Logger = { + debug: () => undefined, + info: () => undefined, + warn: () => undefined, + error: () => undefined, + child: () => noopLogger, +}; + +export function nowMs(): number { + return globalThis.performance?.now?.() ?? Date.now(); +} + +export function elapsedMs(startedAtMs: number): number { + return Math.max(0, Math.round(nowMs() - startedAtMs)); +} + +export function traceLog( + options: DiscordbotOptions, + event: string, + trace?: DiscordbotTrace, + fields: JsonObject = {}, +): void { + const logger = options.logger ?? noopLogger; + logger.info(event, { + ...(trace + ? { + elapsed_ms: elapsedMs(trace.startedAtMs), + include_context: trace.includeContext, + message_id: trace.messageId, + mode: trace.mode, + open_stream: trace.openStream, + thread_id: trace.threadId, + } + : {}), + ...fields, + }); +} + +export function errorMessage(error: unknown): string { + if (error instanceof Error) return error.message; + return String(error); +} + +export function stringValue(value: unknown): string | undefined { + return typeof value === "string" && value.trim() ? value.trim() : undefined; +} + +export function isJsonObject(value: unknown): value is JsonObject { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +export async function* toAsyncIterable( + source: Iterable, +): AsyncIterable { + for await (const item of source) { + yield item; + } +} diff --git a/services/discordbot/test/discord-allowlist.test.ts b/services/discordbot/test/discord-allowlist.test.ts new file mode 100644 index 00000000..4be3811e --- /dev/null +++ b/services/discordbot/test/discord-allowlist.test.ts @@ -0,0 +1,141 @@ +import { describe, expect, it } from "bun:test"; +import type { Logger, Message } from "chat"; +import { + isAllowedDiscordMessage, + isGuildAllowlistEmpty, + parseDiscordThreadKey, +} from "../src/discord-allowlist"; +import type { DiscordbotOptions } from "../src/types"; + +const silentLogger: Logger = { + debug: () => undefined, + info: () => undefined, + warn: () => undefined, + error: () => undefined, + child: () => silentLogger, +}; + +function message(overrides: { + threadId: string; + isBot?: boolean | "unknown"; + isMe?: boolean; +}): Message { + return { + id: "m1", + threadId: overrides.threadId, + isMention: true, + author: { + isBot: overrides.isBot ?? false, + isMe: overrides.isMe ?? false, + userId: "u1", + userName: "alice", + fullName: "Alice", + }, + } as unknown as Message; +} + +function options( + overrides: Partial = {}, +): DiscordbotOptions { + return { + apiUrl: "http://localhost", + applicationId: "app", + botToken: "token", + publicKey: "key", + guildAllowlist: ["G1", "G2"], + ...overrides, + }; +} + +describe("parseDiscordThreadKey", () => { + it("decodes guild/channel/thread", () => { + expect(parseDiscordThreadKey("discord:G1:C1:T1")).toEqual({ + guildId: "G1", + channelId: "C1", + threadId: "T1", + }); + }); + + it("handles missing thread segment", () => { + expect(parseDiscordThreadKey("discord:G1:C1")).toEqual({ + guildId: "G1", + channelId: "C1", + threadId: undefined, + }); + }); + + it("returns empty for non-discord keys", () => { + expect(parseDiscordThreadKey("slack:C1:123")).toEqual({}); + }); +}); + +describe("isAllowedDiscordMessage", () => { + it("allows an allowlisted guild from a human", () => { + const allowed = isAllowedDiscordMessage( + message({ threadId: "discord:G1:C1:T1" }), + options(), + silentLogger, + ); + expect(allowed).toBe(true); + }); + + it("denies DMs (guildId @me)", () => { + expect( + isAllowedDiscordMessage( + message({ threadId: "discord:@me:C1" }), + options(), + silentLogger, + ), + ).toBe(false); + }); + + it("denies a guild not on the allowlist", () => { + expect( + isAllowedDiscordMessage( + message({ threadId: "discord:G9:C1:T1" }), + options(), + silentLogger, + ), + ).toBe(false); + }); + + it("is fail-closed: empty allowlist denies everything", () => { + expect( + isAllowedDiscordMessage( + message({ threadId: "discord:G1:C1:T1" }), + options({ guildAllowlist: [] }), + silentLogger, + ), + ).toBe(false); + }); + + it("denies bot-authored messages", () => { + expect( + isAllowedDiscordMessage( + message({ threadId: "discord:G1:C1:T1", isBot: true }), + options(), + silentLogger, + ), + ).toBe(false); + }); + + it("denies the bot’s own messages", () => { + expect( + isAllowedDiscordMessage( + message({ threadId: "discord:G1:C1:T1", isMe: true }), + options(), + silentLogger, + ), + ).toBe(false); + }); +}); + +describe("isGuildAllowlistEmpty", () => { + it("is true when no guilds are configured", () => { + expect(isGuildAllowlistEmpty(options({ guildAllowlist: [] }))).toBe(true); + }); + + it("is false when guilds are configured", () => { + expect(isGuildAllowlistEmpty(options())).toBe(false); + }); +}); diff --git a/services/discordbot/test/discord-threading.test.ts b/services/discordbot/test/discord-threading.test.ts new file mode 100644 index 00000000..02980de6 --- /dev/null +++ b/services/discordbot/test/discord-threading.test.ts @@ -0,0 +1,108 @@ +import { describe, expect, it } from "bun:test"; +import type { Logger } from "chat"; +import { + deriveThreadName, + renameThreadFromMessage, +} from "../src/discord-threading"; +import type { DiscordbotFetch } from "../src/types"; +import type { DiscordbotOptions } from "../src/types"; + +const silentLogger: Logger = { + debug: () => undefined, + info: () => undefined, + warn: () => undefined, + error: () => undefined, + child: () => silentLogger, +}; + +describe("deriveThreadName", () => { + it("strips a leading user mention", () => { + expect( + deriveThreadName("<@123456> deploy the staging app", "centaur"), + ).toBe("deploy the staging app"); + }); + + it("strips nickname and role mentions", () => { + expect(deriveThreadName("<@!123> <@&456> check the logs", "centaur")).toBe( + "check the logs", + ); + }); + + it("falls back when only a mention is present", () => { + expect(deriveThreadName("<@123>", "centaur")).toBe("Centaur task"); + }); + + it("clips to Discord’s 100-char thread-name limit", () => { + const long = "a".repeat(200); + const name = deriveThreadName(long, "centaur"); + expect(name.length).toBe(100); + }); +}); + +describe("renameThreadFromMessage", () => { + function options(fetchFn: DiscordbotFetch): DiscordbotOptions { + return { + apiUrl: "http://localhost", + applicationId: "app", + botToken: "bot-token", + publicKey: "key", + discordApiUrl: "https://discord.com/api/v10", + fetch: fetchFn, + }; + } + + it("PATCHes the thread channel with the new name", async () => { + const calls: Array<{ url: string; init?: RequestInit }> = []; + const fetchFn = (async (url: RequestInfo | URL, init?: RequestInit) => { + calls.push({ url: String(url), init }); + return new Response("{}", { status: 200 }); + }) as DiscordbotFetch; + + await renameThreadFromMessage( + options(fetchFn), + "discord:G1:C1:T9", + "deploy app", + silentLogger, + ); + + expect(calls).toHaveLength(1); + expect(calls[0]?.url).toBe("https://discord.com/api/v10/channels/T9"); + expect(calls[0]?.init?.method).toBe("PATCH"); + expect(JSON.parse(String(calls[0]?.init?.body))).toEqual({ + name: "deploy app", + }); + const headers = calls[0]?.init?.headers as Record; + expect(headers.authorization).toBe("Bot bot-token"); + }); + + it("no-ops when the key has no thread segment", async () => { + let called = false; + const fetchFn = (async () => { + called = true; + return new Response("{}"); + }) as DiscordbotFetch; + + await renameThreadFromMessage( + options(fetchFn), + "discord:G1:C1", + "x", + silentLogger, + ); + expect(called).toBe(false); + }); + + it("swallows fetch errors", async () => { + const fetchFn = (async () => { + throw new Error("network down"); + }) as DiscordbotFetch; + + await expect( + renameThreadFromMessage( + options(fetchFn), + "discord:G1:C1:T9", + "x", + silentLogger, + ), + ).resolves.toBeUndefined(); + }); +}); diff --git a/services/discordbot/test/gateway.test.ts b/services/discordbot/test/gateway.test.ts new file mode 100644 index 00000000..5ad8e98f --- /dev/null +++ b/services/discordbot/test/gateway.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, it } from "bun:test"; +import type { Chat, Logger } from "chat"; +import { createGatewayController } from "../src/gateway"; +import type { GatewayCapableAdapter } from "../src/types"; + +const silentLogger: Logger = { + debug: () => undefined, + info: () => undefined, + warn: () => undefined, + error: () => undefined, + child: () => silentLogger, +}; + +const fakeChat = { initialize: async () => undefined } as unknown as Chat; + +/** + * Fake adapter mirroring `startGatewayListener`'s contract: it registers a long-lived promise + * via `waitUntil` and resolves it when the abort signal fires (graceful stop). + */ +function fakeAdapter(): { + adapter: GatewayCapableAdapter; + endListener: () => void; +} { + let endListener!: () => void; + const listenerPromise = new Promise((resolve) => { + endListener = resolve; + }); + const adapter: GatewayCapableAdapter = { + async startGatewayListener(options, _durationMs, abortSignal) { + abortSignal?.addEventListener("abort", () => endListener()); + options.waitUntil(listenerPromise); + return new Response("ok"); + }, + }; + return { adapter, endListener }; +} + +describe("createGatewayController", () => { + it("marks active once started", async () => { + const { adapter } = fakeAdapter(); + const controller = createGatewayController({ + logger: silentLogger, + onFatalEnd: () => undefined, + }); + expect(controller.isActive()).toBe(false); + await controller.start(fakeChat, adapter); + expect(controller.isActive()).toBe(true); + }); + + it("does not treat a shutdown-triggered end as fatal", async () => { + let fatal = false; + const { adapter } = fakeAdapter(); + const controller = createGatewayController({ + logger: silentLogger, + onFatalEnd: () => { + fatal = true; + }, + }); + await controller.start(fakeChat, adapter); + await controller.shutdown(); + expect(controller.isActive()).toBe(false); + expect(fatal).toBe(false); + }); + + it("treats an unexpected connection end as fatal", async () => { + let fatal = false; + const { adapter, endListener } = fakeAdapter(); + const controller = createGatewayController({ + logger: silentLogger, + onFatalEnd: () => { + fatal = true; + }, + }); + await controller.start(fakeChat, adapter); + endListener(); // connection dropped without a shutdown request + await Bun.sleep(5); + expect(fatal).toBe(true); + expect(controller.isActive()).toBe(false); + }); +}); diff --git a/services/discordbot/tsconfig.json b/services/discordbot/tsconfig.json new file mode 100644 index 00000000..806cb668 --- /dev/null +++ b/services/discordbot/tsconfig.json @@ -0,0 +1,24 @@ +{ + "schema": "https://json.schemastore.org/tsconfig.json", + "compilerOptions": { + "strict": true, + "noEmit": true, + "lib": ["DOM", "ESNext", "DOM.Iterable"], + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "Bundler", + "types": ["bun", "node"], + "skipLibCheck": true, + "esModuleInterop": true, + "isolatedModules": true, + "verbatimModuleSyntax": true, + "allowImportingTsExtensions": true, + "resolvePackageJsonExports": true, + "resolvePackageJsonImports": true, + "forceConsistentCasingInFileNames": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedIndexedAccess": true, + "useUnknownInCatchVariables": true + }, + "include": ["src/**/*", "test/**/*"] +} From c1502f12a8e1d514f104e1bccd94649b12ce8843 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Wed, 3 Jun 2026 16:46:57 -0600 Subject: [PATCH 07/25] build(discordbot): deploy plumbing mirroring slackbotv2 on api-rs Mirror the slackbotv2 deploy pattern for discordbot against the api-rs control plane: Dockerfile (copies packages/ + .npmrc for the tslib hoist), Helm discordbot.yaml (Deployment + Service, CENTAUR_API_URL -> api-rs:{apiRs.port}, replicas:1 + Recreate + 35s grace for the singleton Gateway session), a dedicated NetworkPolicy (egress to api-rs + postgres + direct :443 for the Gateway, since the cluster is default-deny), values + dev override (off by default, guild allowlist required), Justfile _build-discordbot + import/ghcr wiring, and Discord keys in the secrets bootstrap. CI matrix unchanged (slackbotv2/api-rs build locally too). Co-Authored-By: Claude Opus 4.8 (1M context) --- Justfile | 12 +- contrib/chart/templates/discordbot.yaml | 125 +++++++++++++++++++++ contrib/chart/templates/networkpolicy.yaml | 51 +++++++++ contrib/chart/values.dev.yaml | 6 + contrib/chart/values.yaml | 19 ++++ contrib/scripts/bootstrap-k8s-secrets.sh | 18 +++ services/discordbot/Dockerfile | 22 ++++ 7 files changed, 250 insertions(+), 3 deletions(-) create mode 100644 contrib/chart/templates/discordbot.yaml create mode 100644 services/discordbot/Dockerfile diff --git a/Justfile b/Justfile index ad560948..c53a02f7 100644 --- a/Justfile +++ b/Justfile @@ -31,7 +31,7 @@ build: just _build-all-sequential else pids=() - for recipe in _build-api _build-api-rs _build-iron-proxy _build-slackbot _build-slackbotv2 _build-agent; do + for recipe in _build-api _build-api-rs _build-iron-proxy _build-slackbot _build-slackbotv2 _build-discordbot _build-agent; do just "$recipe" & pids+=("$!") done @@ -48,6 +48,7 @@ _build-all-sequential: just _build-iron-proxy just _build-slackbot just _build-slackbotv2 + just _build-discordbot just _build-agent build-one service: @@ -59,6 +60,7 @@ build-one service: iron-proxy) just _build-iron-proxy ;; slackbot) just _build-slackbot ;; slackbotv2) just _build-slackbotv2 ;; + discordbot) just _build-discordbot ;; agent|sandbox) just _build-agent ;; agent-thin|sandbox-thin) just _build-agent-thin ;; *) echo "unknown service: {{service}}" >&2; exit 2 ;; @@ -79,6 +81,9 @@ _build-slackbot: _build-slackbotv2: docker build -t centaur-slackbotv2:latest -f services/slackbotv2/Dockerfile . +_build-discordbot: + docker build -t centaur-discordbot:latest -f services/discordbot/Dockerfile . + _build-agent: docker build --target "{{agent_build_target}}" -t "{{agent_image}}" -f "{{agent_dockerfile}}" . @@ -91,7 +96,7 @@ _build-agent-thin: _push-registry: #!/usr/bin/env bash set -euo pipefail - for img in centaur-api centaur-api-rs centaur-iron-proxy centaur-slackbot centaur-slackbotv2 centaur-agent; do + for img in centaur-api centaur-api-rs centaur-iron-proxy centaur-slackbot centaur-slackbotv2 centaur-discordbot centaur-agent; do target="{{registry}}/library/${img}:latest" echo "pushing ${img}:latest -> ${target}..." docker tag "${img}:latest" "${target}" @@ -104,7 +109,7 @@ _push-registry: _import-k3s: #!/usr/bin/env bash set -euo pipefail - for img in centaur-api centaur-api-rs centaur-iron-proxy centaur-slackbot centaur-slackbotv2 centaur-agent; do + for img in centaur-api centaur-api-rs centaur-iron-proxy centaur-slackbot centaur-slackbotv2 centaur-discordbot centaur-agent; do echo "importing ${img}:latest into k3s containerd..." docker save "${img}:latest" | {{k3s_ctr}} images import - done @@ -126,6 +131,7 @@ deploy: --set ironProxy.image.repository=ghcr.io/paradigmxyz/centaur/centaur-iron-proxy --set slackbot.image.repository=ghcr.io/paradigmxyz/centaur/centaur-slackbot --set slackbotv2.image.repository=ghcr.io/paradigmxyz/centaur/centaur-slackbotv2 + --set discordbot.image.repository=ghcr.io/paradigmxyz/centaur/centaur-discordbot --set sandbox.image.repository=ghcr.io/paradigmxyz/centaur/centaur-agent ) ;; diff --git a/contrib/chart/templates/discordbot.yaml b/contrib/chart/templates/discordbot.yaml new file mode 100644 index 00000000..291b0bf2 --- /dev/null +++ b/contrib/chart/templates/discordbot.yaml @@ -0,0 +1,125 @@ +{{- if .Values.discordbot.enabled }} +{{- $apiRsName := include "centaur.componentName" (dict "root" . "component" "api-rs") -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "centaur.componentName" (dict "root" . "component" "discordbot") }} + labels: +{{ include "centaur.componentLabels" (dict "root" . "component" "discordbot") | nindent 4 }} +spec: + # Exactly one replica: two pods on the same bot token open two Gateway sessions and every message + # is handled twice. Recreate tears the old pod down before the new one logs in, so the token never + # holds two sessions during a rollout. Do NOT add an HPA. + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: +{{ include "centaur.componentSelectorLabels" (dict "root" . "component" "discordbot") | nindent 6 }} + template: + metadata: + annotations: + checksum/infra-secrets: {{ include "centaur.infraSecretsChecksum" . }} + labels: +{{ include "centaur.componentSelectorLabels" (dict "root" . "component" "discordbot") | nindent 8 }} + spec: + automountServiceAccountToken: false + # Allow the Gateway to drain and close cleanly on SIGTERM (protects the IDENTIFY budget). + terminationGracePeriodSeconds: 35 + {{- with .Values.global.imagePullSecrets }} + imagePullSecrets: +{{ toYaml . | nindent 8 }} + {{- end }} + containers: + - name: discordbot + image: {{ printf "%s:%s" .Values.discordbot.image.repository .Values.discordbot.image.tag | quote }} + imagePullPolicy: {{ .Values.discordbot.image.pullPolicy }} + env: + - name: PORT + value: "3001" + - name: CENTAUR_API_URL + value: {{ printf "http://%s:%v" $apiRsName .Values.apiRs.port | quote }} + - name: DISCORD_BOT_TOKEN + valueFrom: + secretKeyRef: + name: {{ include "centaur.secretEnvName" . }} + key: {{ printf "%sDISCORD_BOT_TOKEN" .Values.secretManager.envPrefix }} + - name: DISCORD_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: {{ include "centaur.secretEnvName" . }} + key: {{ printf "%sDISCORD_PUBLIC_KEY" .Values.secretManager.envPrefix }} + - name: DISCORD_APPLICATION_ID + valueFrom: + secretKeyRef: + name: {{ include "centaur.secretEnvName" . }} + key: {{ printf "%sDISCORD_APPLICATION_ID" .Values.secretManager.envPrefix }} + - name: DISCORDBOT_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "centaur.secretEnvName" . }} + key: {{ printf "%sDISCORDBOT_API_KEY" .Values.secretManager.envPrefix }} + # api-rs has no auth middleware yet, but discordbot still sends the key as a header; + # source it from the same infra secret. + - name: CENTAUR_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "centaur.secretEnvName" . }} + key: {{ printf "%sDISCORDBOT_API_KEY" .Values.secretManager.envPrefix }} + - name: DISCORDBOT_DATABASE_URL + valueFrom: + secretKeyRef: + name: {{ include "centaur.secretEnvName" . }} + key: {{ printf "%sDATABASE_URL" .Values.secretManager.envPrefix }} + - name: DISCORDBOT_USER_NAME + value: {{ .Values.discordbot.userName | quote }} + - name: DISCORDBOT_GUILD_ALLOWLIST + value: {{ .Values.discordbot.guildAllowlist | quote }} +{{- if .Values.discordbot.mentionRoleIds }} + - name: DISCORD_MENTION_ROLE_IDS + value: {{ .Values.discordbot.mentionRoleIds | quote }} +{{- end }} +{{- if not .Values.discordbot.nameThreads }} + - name: DISCORDBOT_NAME_THREADS + value: "false" +{{- end }} +{{- range $name, $value := .Values.discordbot.extraEnv }} + - name: {{ $name }} + value: {{ $value | quote }} +{{- end }} + envFrom: + - secretRef: + name: {{ include "centaur.secretEnvName" . }} + ports: + - containerPort: 3001 + name: http + readinessProbe: + httpGet: + path: /health + port: 3001 + livenessProbe: + httpGet: + path: /health + port: 3001 + # /health reflects the Gateway connection; give the listener time to connect on boot. + initialDelaySeconds: 20 + periodSeconds: 15 + securityContext: +{{ toYaml .Values.containerSecurityContext | nindent 12 }} + resources: +{{ toYaml .Values.discordbot.resources | nindent 12 }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "centaur.componentName" (dict "root" . "component" "discordbot") }} + labels: +{{ include "centaur.componentLabels" (dict "root" . "component" "discordbot") | nindent 4 }} +spec: + selector: +{{ include "centaur.componentSelectorLabels" (dict "root" . "component" "discordbot") | nindent 4 }} + ports: + - name: http + port: 3001 + targetPort: 3001 +{{- end }} diff --git a/contrib/chart/templates/networkpolicy.yaml b/contrib/chart/templates/networkpolicy.yaml index 06615227..981c5c16 100644 --- a/contrib/chart/templates/networkpolicy.yaml +++ b/contrib/chart/templates/networkpolicy.yaml @@ -220,6 +220,57 @@ spec: port: 443 --- {{- end }} +{{- if .Values.discordbot.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "centaur.componentName" (dict "root" . "component" "discordbot") }} + labels: +{{ include "centaur.componentLabels" (dict "root" . "component" "discordbot") | nindent 4 }} +spec: + podSelector: + matchLabels: +{{ include "centaur.componentSelectorLabels" (dict "root" . "component" "discordbot") | nindent 6 }} + policyTypes: + - Ingress + - Egress + ingress: + # Only health probes reach discordbot; nothing routes traffic to it (the Gateway is outbound). + - from: +{{- range $ingressSourceNamespaces }} + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ . | quote }} +{{- end }} + ports: + - protocol: TCP + port: 3001 + egress: +{{- if .Values.apiRs.enabled }} + - to: + - podSelector: + matchLabels: +{{ include "centaur.componentSelectorLabels" (dict "root" . "component" "api-rs") | nindent 14 }} + ports: + - protocol: TCP + port: {{ .Values.apiRs.port }} +{{- end }} +{{- if .Values.postgres.enabled }} + - to: + - podSelector: + matchLabels: +{{ include "centaur.componentSelectorLabels" (dict "root" . "component" "postgres") | nindent 14 }} + ports: + - protocol: TCP + port: 5432 +{{- end }} + # Discord Gateway (wss), REST, and CDN all need direct HTTPS egress. The Gateway cannot be + # proxied (discord.js ignores HTTPS_PROXY), so this rule is load-bearing. + - ports: + - protocol: TCP + port: 443 +--- +{{- end }} {{- if .Values.api.enabled }} apiVersion: networking.k8s.io/v1 kind: NetworkPolicy diff --git a/contrib/chart/values.dev.yaml b/contrib/chart/values.dev.yaml index 846ac325..23e14b13 100644 --- a/contrib/chart/values.dev.yaml +++ b/contrib/chart/values.dev.yaml @@ -21,6 +21,12 @@ slackbotv2: image: pullPolicy: IfNotPresent +discordbot: + # Enable once DISCORD_* secrets + a guild allowlist are configured. + enabled: false + image: + pullPolicy: IfNotPresent + apiRs: image: pullPolicy: IfNotPresent diff --git a/contrib/chart/values.yaml b/contrib/chart/values.yaml index 47d9199e..f692f577 100644 --- a/contrib/chart/values.yaml +++ b/contrib/chart/values.yaml @@ -285,6 +285,25 @@ slackbotv2: extraEnv: {} resources: {} +# Discord chat ingress — mirrors slackbotv2, forwards to the api-rs control plane (:8080) over a +# persistent Discord Gateway connection. Off by default; needs a Discord app + Message Content +# Intent + a guild allowlist. Always exactly one replica (singleton Gateway session). +discordbot: + enabled: false + image: + repository: centaur-discordbot + tag: latest + pullPolicy: Always + userName: centaur + # Comma/space-separated Discord guild IDs. The bot is INERT (ignores all messages) until set. + guildAllowlist: "" + # Comma/space-separated role IDs whose mentions also trigger the bot. + mentionRoleIds: "" + # Rename auto-created threads to the triggering message; set false to keep generic names. + nameThreads: true + extraEnv: {} + resources: {} + postgres: enabled: true image: diff --git a/contrib/scripts/bootstrap-k8s-secrets.sh b/contrib/scripts/bootstrap-k8s-secrets.sh index 9d160061..0c45a661 100755 --- a/contrib/scripts/bootstrap-k8s-secrets.sh +++ b/contrib/scripts/bootstrap-k8s-secrets.sh @@ -123,6 +123,16 @@ if secret_exists centaur-infra-env; then if [[ -n "${LOCAL_DEV_API_KEY:-}" ]]; then patch_data+=("\"LOCAL_DEV_API_KEY\":\"$(printf '%s' "$LOCAL_DEV_API_KEY" | base64 | tr -d '\n')\"") fi + # Discord ingress (discordbot) keys: added when DISCORD_BOT_TOKEN is in the env. DISCORD_* are + # overwritten on each run (so rotation works); DISCORDBOT_API_KEY is generated once if absent. + if [[ -n "${DISCORD_BOT_TOKEN:-}" ]]; then + patch_data+=("\"DISCORD_BOT_TOKEN\":\"$(printf '%s' "$DISCORD_BOT_TOKEN" | base64 | tr -d '\n')\"") + patch_data+=("\"DISCORD_PUBLIC_KEY\":\"$(printf '%s' "${DISCORD_PUBLIC_KEY:-}" | base64 | tr -d '\n')\"") + patch_data+=("\"DISCORD_APPLICATION_ID\":\"$(printf '%s' "${DISCORD_APPLICATION_ID:-}" | base64 | tr -d '\n')\"") + if ! secret_key_present DISCORDBOT_API_KEY; then + patch_data+=("\"DISCORDBOT_API_KEY\":\"$(printf '%s' "${DISCORDBOT_API_KEY:-$(rand_hex)}" | base64 | tr -d '\n')\"") + fi + fi # iron-control keys: top up only when absent so we never rotate them out from # under a running pod (its ActiveRecord-encrypted data would become # undecryptable). Generated values mirror the create path. @@ -197,6 +207,14 @@ else --from-literal=IRON_CONTROL_AR_ENCRYPTION_KEY_DERIVATION_SALT="$(rand_hex)" --from-literal=IRON_CONTROL_SECRET_KEY_BASE="$(rand_hex)$(rand_hex)" ) + if [[ -n "${DISCORD_BOT_TOKEN:-}" ]]; then + secret_args+=( + --from-literal=DISCORD_BOT_TOKEN="$DISCORD_BOT_TOKEN" + --from-literal=DISCORD_PUBLIC_KEY="${DISCORD_PUBLIC_KEY:-}" + --from-literal=DISCORD_APPLICATION_ID="${DISCORD_APPLICATION_ID:-}" + --from-literal=DISCORDBOT_API_KEY="${DISCORDBOT_API_KEY:-$(rand_hex)}" + ) + fi if [[ -n "${OP_CONNECT_TOKEN:-}" ]]; then secret_args+=(--from-literal=OP_CONNECT_TOKEN="$OP_CONNECT_TOKEN") fi diff --git a/services/discordbot/Dockerfile b/services/discordbot/Dockerfile new file mode 100644 index 00000000..1f0f7149 --- /dev/null +++ b/services/discordbot/Dockerfile @@ -0,0 +1,22 @@ +FROM oven/bun:1.3.13-slim + +WORKDIR /repo +ENV NODE_ENV=production + +RUN bun install -g pnpm@10.28.1 + +# Workspace manifests first so dependency install is a cached layer. discordbot depends on the +# @centaur/* workspace packages, so they must be present for pnpm to link the `workspace:*` ranges. +# .npmrc is required so tslib is public-hoisted (discord.js ships source + requires tslib, which Bun +# can't resolve via the nested pnpm symlink otherwise). +COPY package.json pnpm-lock.yaml pnpm-workspace.yaml .npmrc ./ +COPY packages/ packages/ +COPY services/discordbot/package.json services/discordbot/package.json +RUN pnpm install --filter discordbot --prod --frozen-lockfile + +COPY services/discordbot/ services/discordbot/ +WORKDIR /repo/services/discordbot + +EXPOSE 3001 +ENV PORT=3001 +CMD ["bun", "src/server.ts"] From 79d74a872e13b5b575266d58e43e8429daf3e184 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Tue, 2 Jun 2026 20:57:32 +0000 Subject: [PATCH 08/25] =?UTF-8?q?fix(chart):=20allow=20discordbot=20?= =?UTF-8?q?=E2=86=92=20api-rs=20in=20the=20api-rs=20NetworkPolicy=20ingres?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The discordbot (a clone of slackbotv2) forwards every message to api-rs (CENTAUR_API_URL → centaur-centaur-api-rs:8080). Its own egress NetworkPolicy already permits →api-rs, but the api-rs ingress `from` list allowed only slackbotv2 and managed-by=api-rs sandboxes — not the discordbot. On a cluster that enforces NetworkPolicy the forward is rejected at connect time, so the bot can never reach the control plane. Add a discordbot podSelector to the api-rs ingress, gated on `.Values.discordbot.enabled` and mirroring the slackbotv2 block. Co-Authored-By: Claude Opus 4.8 (1M context) --- contrib/chart/templates/networkpolicy.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/contrib/chart/templates/networkpolicy.yaml b/contrib/chart/templates/networkpolicy.yaml index 981c5c16..f6252734 100644 --- a/contrib/chart/templates/networkpolicy.yaml +++ b/contrib/chart/templates/networkpolicy.yaml @@ -111,6 +111,11 @@ spec: - podSelector: matchLabels: {{ include "centaur.componentSelectorLabels" (dict "root" . "component" "slackbotv2") | nindent 14 }} +{{- end }} +{{- if .Values.discordbot.enabled }} + - podSelector: + matchLabels: +{{ include "centaur.componentSelectorLabels" (dict "root" . "component" "discordbot") | nindent 14 }} {{- end }} # Sandboxes call back into the control plane. agent-k8s labels its pods # centaur.ai/managed-by=api-rs (MANAGED_BY_VALUE in centaur-sandbox-agent-k8s), From 7ec1e821b9b900ad07925f4eb490f177ac6d87bd Mon Sep 17 00:00:00 2001 From: Will Drach Date: Tue, 2 Jun 2026 21:07:42 +0000 Subject: [PATCH 09/25] =?UTF-8?q?fix(chart):=20allow=20discordbot=20?= =?UTF-8?q?=E2=86=92=20postgres=20in=20the=20postgres=20NetworkPolicy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The discordbot (like slackbotv2) connects to Postgres directly for its per-thread advisory locks / session state, and its egress NetworkPolicy already permits →postgres:5432. But the postgres ingress `from` list allowed api / api-rs / slackbotv2 / slackbot / iron-control — not the discordbot — so on a NetworkPolicy-enforcing cluster the bot crash-looped at startup with `ECONNREFUSED 5432`. Add a discordbot podSelector to the postgres ingress, gated on `.Values.discordbot.enabled` and mirroring the slackbotv2 block. Co-Authored-By: Claude Opus 4.8 (1M context) --- contrib/chart/templates/networkpolicy.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/contrib/chart/templates/networkpolicy.yaml b/contrib/chart/templates/networkpolicy.yaml index f6252734..5807c3a6 100644 --- a/contrib/chart/templates/networkpolicy.yaml +++ b/contrib/chart/templates/networkpolicy.yaml @@ -66,6 +66,11 @@ spec: matchLabels: {{ include "centaur.componentSelectorLabels" (dict "root" . "component" "slackbot") | nindent 14 }} {{- end }} +{{- if .Values.discordbot.enabled }} + - podSelector: + matchLabels: +{{ include "centaur.componentSelectorLabels" (dict "root" . "component" "discordbot") | nindent 14 }} +{{- end }} {{- if .Values.ironControl.enabled }} - podSelector: matchLabels: From 0f173cbc5a15e5ab316168e6e467e6f04c67946b Mon Sep 17 00:00:00 2001 From: Will Drach Date: Tue, 2 Jun 2026 21:07:42 +0000 Subject: [PATCH 10/25] fix(discordbot): cap gateway listener duration to setTimeout's 32-bit max MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LONG_RUNNING_MS was 365*24*60*60*1000 (31_536_000_000), passed as the Gateway listener's self-destruct durationMs. The adapter backs it with a single setTimeout, whose delay is a 32-bit signed int: any value above 2^31-1 ms silently clamps to 1ms (Node/Bun logs `TimeoutOverflowWarning ... set to 1`). The "stay connected" timer fired almost immediately — the bot connected, logged "duration elapsed, disconnecting", and exited via onFatalEnd, crash-looping the pod. Cap at 2_147_483_647 (~24.8 days), the largest delay setTimeout can represent. discord.js holds one session via RESUME within the window; the timer then forces at most one re-IDENTIFY per ~24.8 days, well under the 1000/24h budget. A re-arm loop is avoided: the adapter can't tell timer-expiry from a fatal login error, so looping would mask a bad token into an infinite reconnect. Co-Authored-By: Claude Opus 4.8 (1M context) --- services/discordbot/src/gateway.ts | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/services/discordbot/src/gateway.ts b/services/discordbot/src/gateway.ts index 0bf77c85..168fd649 100644 --- a/services/discordbot/src/gateway.ts +++ b/services/discordbot/src/gateway.ts @@ -2,13 +2,18 @@ import type { Chat, Logger } from "chat"; import type { GatewayCapableAdapter } from "./types"; /** - * One year. `startGatewayListener` treats `durationMs` as a self-destruct timer; within that - * window discord.js maintains a single Gateway session with native RESUME, so a very large value - * gives us one long-lived connection rather than a re-IDENTIFY loop (which would burn the - * 1000/24h IDENTIFY budget). If the connection ends before this elapses it's a fatal/login error - * and we let the process exit so Kubernetes restarts the pod. + * `startGatewayListener` treats `durationMs` as a self-destruct timer backed by a single + * `setTimeout`; within that window discord.js maintains one Gateway session with native RESUME, + * so a large value gives us one long-lived connection rather than a re-IDENTIFY loop (which would + * burn the 1000/24h IDENTIFY budget). If the connection ends before this elapses it's a + * fatal/login error and we let the process exit so Kubernetes restarts the pod. + * + * This is capped at the maximum delay a 32-bit `setTimeout` can represent (2^31-1 ms ≈ 24.8 days). + * A larger value (e.g. one year) silently overflows and clamps to 1ms, firing the self-destruct + * almost immediately and crash-looping the pod. At ~24.8 days the timer forces at most one + * reconnect/IDENTIFY per window — negligible against the 1000/24h budget. */ -const LONG_RUNNING_MS = 365 * 24 * 60 * 60 * 1000; +const LONG_RUNNING_MS = 2_147_483_647; export type GatewayController = { /** True once the listener has started and the connection has not ended. */ From d5d2d76696c6244cb72b9b142eed3fdaa9605fe2 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Thu, 4 Jun 2026 04:41:56 +0000 Subject: [PATCH 11/25] =?UTF-8?q?feat(discordbot):=20friendlier=20streamin?= =?UTF-8?q?g=20placeholder=20("=E2=9C=A8=20thinking...")?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chat SDK posts a bare "..." while the agent works before any streamed content. Set ChatConfig.fallbackStreamingPlaceholderText to "✨ thinking..." (overridable via DiscordbotOptions.streamingPlaceholderText). Co-Authored-By: Claude Opus 4.8 (1M context) --- services/discordbot/src/index.ts | 4 ++++ services/discordbot/src/types.ts | 2 ++ 2 files changed, 6 insertions(+) diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index 24a34a16..5e8b1e09 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -92,6 +92,10 @@ export function createDiscordbot(options: DiscordbotOptions): Discordbot { userName, adapters: { discord }, state, + // Initial placeholder posted while the agent works, before any streamed + // content (the chat SDK default is a bare "..."). Overridable via options. + fallbackStreamingPlaceholderText: + options.streamingPlaceholderText ?? "✨ thinking...", // Serialize handlers per thread via the SDK's per-thread lock. The deprecated // `onLockConflict: 'force'` force-released the lock so two handlers ran concurrently on one // thread — two near-simultaneous mentions could both pass the `activeExecution` check and diff --git a/services/discordbot/src/types.ts b/services/discordbot/src/types.ts index f66cdbc5..327480af 100644 --- a/services/discordbot/src/types.ts +++ b/services/discordbot/src/types.ts @@ -103,6 +103,8 @@ export type DiscordbotOptions = { recoverRenderObligationsOnStart?: boolean; state?: StateAdapter; stateKeyPrefix?: string; + /** Placeholder posted while the agent works, before streamed content. Defaults to "✨ thinking...". */ + streamingPlaceholderText?: string; userName?: string; }; From 279e030622c802bcf841b3735aa84a76037b556b Mon Sep 17 00:00:00 2001 From: Will Drach Date: Thu, 4 Jun 2026 05:22:56 +0000 Subject: [PATCH 12/25] =?UTF-8?q?perf(discordbot):=20post=20"=E2=9C=A8=20t?= =?UTF-8?q?hinking..."=20instantly,=20run=20execute=20in=20the=20stream?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The placeholder only appeared after the ~9s cold sandbox spin-up because the discordbot awaited the execute call (which blocks on sandbox+tool-server readiness) before starting the render that posts the placeholder. Do create+append only up front (fast), then run executeSession INSIDE the render stream — after yielding the placeholder. The user now sees "✨ thinking..." in ~0.3s while the sandbox spins up. executeSession is idempotent (idempotency_key = message id), so a render retry won't re-spawn; sandbox-spawn failures surface as an error in the same message (api-rs writes no event if the spawn itself fails, so this avoids a hung placeholder). The activeExecution guard is still set synchronously under the per-thread lock before execute, so double-execution protection is unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- services/discordbot/src/index.ts | 52 ++++++++++++++++++++++---- services/discordbot/src/session-api.ts | 25 +++++++++++++ 2 files changed, 70 insertions(+), 7 deletions(-) diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index 5e8b1e09..a09a1564 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -23,6 +23,7 @@ import { import { deriveThreadName, renameThreadFromMessage } from "./discord-threading"; import { collectInitialContext, + executeSessionTurn, forwardToSessionApi, isRetryableSessionApiError, openSessionEventStream, @@ -296,10 +297,16 @@ async function syncThreadMessageToSession( "discordbot_forward_active_execution_marked", trace, ); - await forwardToSessionApi(input.options, forwardInput, { - onExecutionStarted: commitExecutionStarted, - onMessagesAppended: commitMessagesAppended, - }); + // Create + append the session message only (fast). The execute call blocks + // ~9s on cold sandbox spin-up (incl. the tool-server sidecar), so it's run + // inside the render stream below — after the "✨ thinking..." placeholder is + // posted — instead of before it. executeSession is idempotent + // (idempotency_key = message id), so a render retry won't re-spawn. + await forwardToSessionApi( + input.options, + { ...forwardInput, executeMessage: undefined, openStream: false }, + { onMessagesAppended: commitMessagesAppended }, + ); scheduleExecutionRender( thread, serializedMessage, @@ -308,6 +315,7 @@ async function syncThreadMessageToSession( () => lastEventId, shouldIncludeContext, trace, + commitExecutionStarted, ); traceLog(input.options, "discordbot_forward_complete", trace, { last_event_id: lastEventId, @@ -343,6 +351,9 @@ function scheduleExecutionRender( getLastEventId: () => number, isInitialExecution: boolean, trace?: DiscordbotTrace, + onExecutionStarted?: ( + execution: DiscordbotExecuteSessionResponse, + ) => Promise, ): void { const promise = (async () => { let attempt = 0; @@ -355,6 +366,7 @@ function scheduleExecutionRender( getLastEventId, isInitialExecution, trace, + onExecutionStarted, ); if (result === "complete") return; const delayMs = renderRetryDelayMs(attempt); @@ -377,13 +389,16 @@ async function renderExecutionAttempt( getLastEventId: () => number, isInitialExecution: boolean, trace?: DiscordbotTrace, + onExecutionStarted?: ( + execution: DiscordbotExecuteSessionResponse, + ) => Promise, ): Promise<"complete" | "retry"> { let rendered = false; let retry = false; try { await renderExecutionStream( thread, - streamSessionAfterHandoff(options, input), + streamSessionAfterHandoff(options, input, onExecutionStarted), message, options, isInitialExecution, @@ -690,7 +705,32 @@ async function renderRecoveredExecutionStream( async function* streamSessionAfterHandoff( options: DiscordbotOptions, input: ForwardSessionInput, + onExecutionStarted?: ( + execution: DiscordbotExecuteSessionResponse, + ) => Promise, ): AsyncIterable { + // Post the placeholder BEFORE executing so the user sees "✨ thinking..." + // immediately, instead of waiting ~9s for the cold sandbox (incl. tool-server + // sidecar) to spin up. Execute runs here, inside the render stream, so a + // sandbox-spawn failure surfaces in the same message rather than hanging the + // placeholder (api-rs writes no event if the spawn itself fails). + yield startingStreamNotification(input.threadId); + traceLog(options, "discordbot_stream_heartbeat_emitted", input.trace); + + if (input.executeMessage) { + try { + const execution = await executeSessionTurn(options, input); + if (execution) await onExecutionStarted?.(execution); + } catch (error) { + traceLog(options, "discordbot_forward_failed", input.trace, { + error: errorMessage(error), + }); + if (isRetryableSessionApiError(error)) throw error; + yield sessionStreamError(error); + return; + } + } + let stream: AsyncIterable; try { stream = await openSessionEventStream(options, input); @@ -703,8 +743,6 @@ async function* streamSessionAfterHandoff( return; } - yield startingStreamNotification(input.threadId); - traceLog(options, "discordbot_stream_heartbeat_emitted", input.trace); for await (const event of stream) yield event; } diff --git a/services/discordbot/src/session-api.ts b/services/discordbot/src/session-api.ts index 91e363d2..5bdba306 100644 --- a/services/discordbot/src/session-api.ts +++ b/services/discordbot/src/session-api.ts @@ -157,6 +157,31 @@ export async function forwardToSessionApi( return openSessionEventStream(options, input); } +/** + * Execute the session turn on its own (start the agent run), returning the + * execution. Split out of forwardToSessionApi so the render stream can run it + * AFTER posting the placeholder — the execute call blocks on cold sandbox + * spin-up. Idempotent via the request's idempotency_key, so a render retry + * won't re-spawn the sandbox. + */ +export async function executeSessionTurn( + options: DiscordbotOptions, + input: ForwardSessionInput, +): Promise { + if (!input.executeMessage) return null; + const executeStartedAtMs = nowMs(); + const execution = await executeSession( + options, + input.threadId, + input.executeMessage, + ); + traceLog(options, "discordbot_session_execute_complete", input.trace, { + execution_id: execution.execution_id, + phase_ms: elapsedMs(executeStartedAtMs), + }); + return execution; +} + export async function openSessionEventStream( options: DiscordbotOptions, input: Pick< From c0d510ef05ed1da4198b6bb0f7fe76f5c77c348c Mon Sep 17 00:00:00 2001 From: Will Drach Date: Fri, 5 Jun 2026 07:01:26 -0600 Subject: [PATCH 13/25] feat(discordbot): absorb upstream slackbotv2 stream-safety fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sync pass against slackbotv2 @ e391f113 (3-way merge per file): - Port #416/#418: discordSafeChatSdkStream omits task_update output and truncates details to 500 chars before streaming. - Port the thread_not_found tolerance in collectInitialContext with a Discord-shaped guard (NetworkError carrying "Unknown Channel"/10003). Deliberate deltas kept (commented in-code): the synthetic starting notification and no streamAfterFirstChunk deferral — both serve the instant "✨ thinking..." placeholder, where slackbotv2 instead posts nothing until the first visible chunk (#406/#415). Co-Authored-By: Claude Opus 4.8 (1M context) --- services/discordbot/src/index.ts | 45 +++++++++++++++++++++++--- services/discordbot/src/session-api.ts | 23 +++++++++++-- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index a09a1564..18ec2ebd 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -1,6 +1,7 @@ import { randomUUID } from "node:crypto"; import { codexAppServerToChatSdkStream, + type ChatSDKStreamChunk, type CodexAppServerToChatStreamOptions, type RendererEvent, } from "@centaur/rendering"; @@ -68,6 +69,7 @@ const RENDER_INDEX_TTL_MS = 30 * 24 * 60 * 60 * 1000; const RENDER_RECOVERY_LEASE_TTL_MS = 2 * 60 * 1000; const RENDER_RETRY_INITIAL_DELAY_MS = 250; const RENDER_RETRY_MAX_DELAY_MS = 5_000; +const DISCORD_TASK_DETAILS_MAX_CHARS = 500; export function createDiscordbot(options: DiscordbotOptions): Discordbot { const userName = options.userName ?? "centaur"; @@ -651,6 +653,8 @@ async function* streamOpenedSession( input: Pick, stream: AsyncIterable, ): AsyncIterable { + // Deliberate delta from slackbotv2 (which removed its synthetic starting + // task): the synthetic item drives the instant "✨ thinking..." placeholder. yield startingStreamNotification(input.threadId); for await (const event of stream) yield event; } @@ -680,12 +684,14 @@ async function renderExecutionStream( const stopTyping = startTypingKeepalive(thread, logger); try { - await thread.post( - new StreamingPlan( - codexAppServerToChatSdkStream(stream, rendererOptions(options)), - {}, - ), + // Deliberate delta from slackbotv2: no streamAfterFirstChunk deferral. + // The instant "✨ thinking..." placeholder covers the no-visible-output + // window, so the stream posts immediately instead of waiting for the + // first visible chunk. + const visibleStream = discordSafeChatSdkStream( + codexAppServerToChatSdkStream(stream, rendererOptions(options)), ); + await thread.post(new StreamingPlan(visibleStream, {})); } finally { stopTyping(); } @@ -699,9 +705,38 @@ async function renderRecoveredExecutionStream( trace?: DiscordbotTrace, ): Promise { // Recovered renders never rename the thread; naming happens on the initial execution. + // The discordSafe stream wrapping comes via renderExecutionStream. await renderExecutionStream(thread, stream, message, options, false, trace); } +async function* discordSafeChatSdkStream( + stream: AsyncIterable, +): AsyncIterable { + for await (const chunk of stream) { + yield discordSafeChatSdkChunk(chunk); + } +} + +function discordSafeChatSdkChunk( + chunk: ChatSDKStreamChunk, +): ChatSDKStreamChunk { + if (chunk.type !== "task_update") return chunk; + const { output: _output, details, ...safeChunk } = chunk; + void _output; + return { + ...safeChunk, + ...(details ? { details: truncateDiscordTaskField(details) } : {}), + }; +} + +function truncateDiscordTaskField(value: string): string { + if (value.length <= DISCORD_TASK_DETAILS_MAX_CHARS) return value; + const omitted = value.length - DISCORD_TASK_DETAILS_MAX_CHARS; + const suffix = `\n[truncated ${omitted} chars from Discord task details]`; + const keep = Math.max(0, DISCORD_TASK_DETAILS_MAX_CHARS - suffix.length); + return `${value.slice(0, keep).trimEnd()}${suffix}`; +} + async function* streamSessionAfterHandoff( options: DiscordbotOptions, input: ForwardSessionInput, diff --git a/services/discordbot/src/session-api.ts b/services/discordbot/src/session-api.ts index 5bdba306..6d2687a3 100644 --- a/services/discordbot/src/session-api.ts +++ b/services/discordbot/src/session-api.ts @@ -70,8 +70,13 @@ export async function collectInitialContext( currentMessage: Message, ): Promise { const messages: Message[] = []; - for await (const message of thread.allMessages) { - messages.push(message); + try { + for await (const message of thread.allMessages) { + messages.push(message); + } + } catch (error) { + if (!isDiscordThreadNotFoundError(error)) throw error; + return [await serializeMessage(currentMessage)]; } const currentIndex = messages.findIndex( @@ -90,6 +95,18 @@ export async function collectInitialContext( return serialized; } +// Discord analog of slackbotv2's isSlackThreadNotFoundError: the Discord +// adapter throws a NetworkError carrying the raw Discord API body, e.g. +// `Discord API error: 404 {"message": "Unknown Channel", "code": 10003}`. +function isDiscordThreadNotFoundError(error: unknown): boolean { + if (!(error instanceof Error)) return false; + return ( + error.message.includes("Unknown Channel") || + error.message.includes("Unknown Message") || + error.message.includes('"code": 10003') + ); +} + export async function serializeMessage( message: Message, ): Promise { @@ -203,6 +220,8 @@ export async function openSessionEventStream( return stream; } +// Deliberate delta from slackbotv2 (which removed this entirely): the +// synthetic starting item drives the instant "✨ thinking..." placeholder. export function startingStreamNotification(threadId: string): JsonObject { return { method: "item/started", From 089c332093dcfb0ab97457818440ba918a0ae3ca Mon Sep 17 00:00:00 2001 From: Will Drach Date: Fri, 5 Jun 2026 08:18:36 -0600 Subject: [PATCH 14/25] =?UTF-8?q?build(discordbot):=20drop=20the=20workspa?= =?UTF-8?q?ce-wide=20tslib=20hoist=20=E2=80=94=20no=20longer=20needed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .npmrc public-hoist-pattern (and discordbot's direct tslib dep) worked around Bun failing to resolve tslib from discord.js under pnpm's strict layout. On the current lockfile discord.js@14.26.4 declares tslib properly, so pnpm nests a copy right next to it and Bun resolves it without help. Verified both locally and in the production image (build + import discord.js in the container) with the hoist and the direct dep removed. Removes the only workspace-wide file the discordbot work touched. Co-Authored-By: Claude Opus 4.8 (1M context) --- .npmrc | 4 ---- pnpm-lock.yaml | 3 --- services/discordbot/Dockerfile | 4 +--- services/discordbot/package.json | 3 +-- 4 files changed, 2 insertions(+), 12 deletions(-) delete mode 100644 .npmrc diff --git a/.npmrc b/.npmrc deleted file mode 100644 index 1d90af05..00000000 --- a/.npmrc +++ /dev/null @@ -1,4 +0,0 @@ -# Hoist tslib to the root node_modules. discord.js (used by services/discordbot) ships its -# source and requires tslib; with pnpm strict layout, Bun cannot resolve the nested .pnpm symlink -# ("Cannot find package 'tslib'"). Public-hoisting makes it resolvable by walking up to root. -public-hoist-pattern[]=tslib diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 817b9a00..e1e538d7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -78,9 +78,6 @@ importers: hono: specifier: ^4.12.18 version: 4.12.19 - tslib: - specifier: ^2.8.1 - version: 2.8.1 devDependencies: '@chat-adapter/state-memory': specifier: ^4.30.0 diff --git a/services/discordbot/Dockerfile b/services/discordbot/Dockerfile index 1f0f7149..b05aecd5 100644 --- a/services/discordbot/Dockerfile +++ b/services/discordbot/Dockerfile @@ -7,9 +7,7 @@ RUN bun install -g pnpm@10.28.1 # Workspace manifests first so dependency install is a cached layer. discordbot depends on the # @centaur/* workspace packages, so they must be present for pnpm to link the `workspace:*` ranges. -# .npmrc is required so tslib is public-hoisted (discord.js ships source + requires tslib, which Bun -# can't resolve via the nested pnpm symlink otherwise). -COPY package.json pnpm-lock.yaml pnpm-workspace.yaml .npmrc ./ +COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./ COPY packages/ packages/ COPY services/discordbot/package.json services/discordbot/package.json RUN pnpm install --filter discordbot --prod --frozen-lockfile diff --git a/services/discordbot/package.json b/services/discordbot/package.json index 91c5f4c5..2b5fac39 100644 --- a/services/discordbot/package.json +++ b/services/discordbot/package.json @@ -16,8 +16,7 @@ "@chat-adapter/state-pg": "^4.30.0", "chat": "^4.30.0", "discord.js": "^14.25.1", - "hono": "^4.12.18", - "tslib": "^2.8.1" + "hono": "^4.12.18" }, "devDependencies": { "@chat-adapter/state-memory": "^4.30.0", From 5441fdff43e691b42797f179c76b89d17e90b826 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Fri, 5 Jun 2026 08:52:55 -0600 Subject: [PATCH 15/25] feat(discordbot): scope session event streams to the execution (#422 port) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sync pass against slackbotv2 @ 17882c4d: thread executionId through ForwardSessionInput into the events URL so streams only carry the turn they're rendering (set in-stream after execute returns, and from the stored obligation on recovery — the obligation already tracked it). Also adopt the fixed-point truncation helper. Not ported (deliberate delta): the oversized-render plain-text fallback — the Discord adapter hard-truncates content at the 2000-char limit on every outgoing payload, so Slack's msg_too_long failure mode cannot occur here. Co-Authored-By: Claude Opus 4.8 (1M context) --- services/discordbot/src/index.ts | 35 +++++++++++++++++++++----- services/discordbot/src/session-api.ts | 19 ++++++++------ services/discordbot/src/types.ts | 1 + 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index 18ec2ebd..bd5aa1d0 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -564,6 +564,7 @@ async function recoverRenderObligation( ); const input: ForwardSessionInput = { afterEventId: lastEventId, + executionId: obligation.executionId, messages: [], onEventId: (eventId) => { lastEventId = Math.max(lastEventId, eventId); @@ -730,11 +731,28 @@ function discordSafeChatSdkChunk( } function truncateDiscordTaskField(value: string): string { - if (value.length <= DISCORD_TASK_DETAILS_MAX_CHARS) return value; - const omitted = value.length - DISCORD_TASK_DETAILS_MAX_CHARS; - const suffix = `\n[truncated ${omitted} chars from Discord task details]`; - const keep = Math.max(0, DISCORD_TASK_DETAILS_MAX_CHARS - suffix.length); - return `${value.slice(0, keep).trimEnd()}${suffix}`; + return truncateDiscordText( + value, + DISCORD_TASK_DETAILS_MAX_CHARS, + "Discord task details", + ); +} + +function truncateDiscordText( + value: string, + maxChars: number, + label: string, +): string { + if (value.length <= maxChars) return value; + let omitted = value.length - maxChars; + while (true) { + const suffix = `\n[truncated ${omitted} chars from ${label}]`; + const keep = Math.max(0, maxChars - suffix.length); + const actualOmitted = value.length - keep; + if (actualOmitted === omitted) + return `${value.slice(0, keep).trimEnd()}${suffix}`; + omitted = actualOmitted; + } } async function* streamSessionAfterHandoff( @@ -755,7 +773,12 @@ async function* streamSessionAfterHandoff( if (input.executeMessage) { try { const execution = await executeSessionTurn(options, input); - if (execution) await onExecutionStarted?.(execution); + if (execution) { + // Scope the event stream we open below to this execution (upstream + // #422 sets this where execute returns; for us that's in-stream). + input.executionId = execution.execution_id; + await onExecutionStarted?.(execution); + } } catch (error) { traceLog(options, "discordbot_forward_failed", input.trace, { error: errorMessage(error), diff --git a/services/discordbot/src/session-api.ts b/services/discordbot/src/session-api.ts index 6d2687a3..ea4fe208 100644 --- a/services/discordbot/src/session-api.ts +++ b/services/discordbot/src/session-api.ts @@ -203,7 +203,7 @@ export async function openSessionEventStream( options: DiscordbotOptions, input: Pick< ForwardSessionInput, - "afterEventId" | "onEventId" | "threadId" | "trace" + "afterEventId" | "executionId" | "onEventId" | "threadId" | "trace" >, ): Promise> { const streamStartedAtMs = nowMs(); @@ -211,10 +211,12 @@ export async function openSessionEventStream( options, input.threadId, input.afterEventId, + input.executionId, input.onEventId, ); traceLog(options, "discordbot_session_events_opened", input.trace, { after_event_id: input.afterEventId, + execution_id: input.executionId, phase_ms: elapsedMs(streamStartedAtMs), }); return stream; @@ -391,16 +393,17 @@ async function streamSessionNotifications( options: DiscordbotOptions, threadId: string, afterEventId: number, + executionId: string | undefined, onEventId: (eventId: number) => void, ): Promise> { const fetchFn = options.fetch ?? fetch; - const response = await fetchFn( - `${apiSessionUrl(options.apiUrl, threadId, "events")}?after_event_id=${afterEventId}`, - { - method: "GET", - headers: apiHeaders(options, false), - }, - ); + const url = new URL(apiSessionUrl(options.apiUrl, threadId, "events")); + url.searchParams.set("after_event_id", String(afterEventId)); + if (executionId) url.searchParams.set("execution_id", executionId); + const response = await fetchFn(url.toString(), { + method: "GET", + headers: apiHeaders(options, false), + }); await ensureApiOk(response, "stream events", options); if (!response.body) return toAsyncIterable([]); return parseSessionEventStream(response.body, onEventId); diff --git a/services/discordbot/src/types.ts b/services/discordbot/src/types.ts index 327480af..dd9ecbb9 100644 --- a/services/discordbot/src/types.ts +++ b/services/discordbot/src/types.ts @@ -144,6 +144,7 @@ export type DiscordbotTrace = { export type ForwardSessionInput = { afterEventId: number; + executionId?: string; executeMessage?: DiscordbotApiMessage; messages: DiscordbotApiMessage[]; onEventId(eventId: number): void; From ef1cd55efc95580134c5c60a87776f8c07f85c90 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Fri, 5 Jun 2026 10:50:57 -0600 Subject: [PATCH 16/25] feat(discordbot): absorb slackbotv2's Postgres resilience hardening (#404 port) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Own the pg pool so an error handler can swallow idle-client drops (Postgres restart / startup network races) instead of crashing the process, and block obligation recovery on a backoff-retried first connect — same rationale as upstream, Discord trace names. Co-Authored-By: Claude Opus 4.8 (1M context) --- pnpm-lock.yaml | 6 ++++ services/discordbot/package.json | 4 ++- services/discordbot/src/index.ts | 55 ++++++++++++++++++++++++++++++-- 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e1e538d7..3accd873 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -78,6 +78,9 @@ importers: hono: specifier: ^4.12.18 version: 4.12.19 + pg: + specifier: ^8.21.0 + version: 8.21.0 devDependencies: '@chat-adapter/state-memory': specifier: ^4.30.0 @@ -88,6 +91,9 @@ importers: '@types/node': specifier: ^25.7.0 version: 25.9.0 + '@types/pg': + specifier: ^8.15.5 + version: 8.20.0 '@typescript/native-preview': specifier: ^7.0.0-dev.20260512.1 version: 7.0.0-dev.20260518.1 diff --git a/services/discordbot/package.json b/services/discordbot/package.json index 2b5fac39..0377d57d 100644 --- a/services/discordbot/package.json +++ b/services/discordbot/package.json @@ -16,10 +16,12 @@ "@chat-adapter/state-pg": "^4.30.0", "chat": "^4.30.0", "discord.js": "^14.25.1", - "hono": "^4.12.18" + "hono": "^4.12.18", + "pg": "^8.21.0" }, "devDependencies": { "@chat-adapter/state-memory": "^4.30.0", + "@types/pg": "^8.15.5", "@types/bun": "^1.3.13", "@types/node": "^25.7.0", "@typescript/native-preview": "^7.0.0-dev.20260512.1", diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index bd5aa1d0..6795db35 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -17,6 +17,7 @@ import { type Thread, } from "chat"; import { Hono } from "hono"; +import pg from "pg"; import { isAllowedDiscordMessage, isGuildAllowlistEmpty, @@ -70,6 +71,8 @@ const RENDER_RECOVERY_LEASE_TTL_MS = 2 * 60 * 1000; const RENDER_RETRY_INITIAL_DELAY_MS = 250; const RENDER_RETRY_MAX_DELAY_MS = 5_000; const DISCORD_TASK_DETAILS_MAX_CHARS = 500; +const POSTGRES_CONNECT_INITIAL_DELAY_MS = 250; +const POSTGRES_CONNECT_MAX_DELAY_MS = 10_000; export function createDiscordbot(options: DiscordbotOptions): Discordbot { const userName = options.userName ?? "centaur"; @@ -150,13 +153,58 @@ function createDefaultState( options: DiscordbotOptions, logger: Logger, ): StateAdapter { + const stateLogger = logger.child("postgres-state"); + // Own the pool so we can attach an error handler. pg.Pool emits 'error' for + // idle clients whose connection drops (Postgres restart, or a transient blip + // while the pod's network is still being programmed at startup). With no + // listener, node-postgres rethrows it as an uncaught exception and the process + // crashes/spews. Logging and swallowing lets the pool reconnect on the next query. + const pool = new pg.Pool({ connectionString: options.postgresUrl }); + pool.on("error", (error) => { + stateLogger.warn("postgres pool error", { error: errorMessage(error) }); + }); return createPostgresState({ - url: options.postgresUrl, + client: pool, keyPrefix: options.stateKeyPrefix ?? "centaur-discordbot", - logger: logger.child("postgres-state"), + logger: stateLogger, }); } +/** + * Blocks until the state backend accepts a connection, retrying with exponential + * backoff. The first DB connection fires within milliseconds of process start and + * can lose a race with the pod's network programming (a one-off ECONNREFUSED). + * Retrying instead of throwing absorbs that race; the first successful connect + * also flips the adapter's `connected` flag, so the message path comes alive too. + */ +async function ensureStateConnected( + state: StateAdapter, + options: DiscordbotOptions, +): Promise { + for (let attempt = 0; ; attempt++) { + try { + await state.connect(); + if (attempt > 0) { + traceLog(options, "discordbot_postgres_connected", undefined, { + attempts: attempt + 1, + }); + } + return; + } catch (error) { + const delayMs = Math.min( + POSTGRES_CONNECT_INITIAL_DELAY_MS * 2 ** attempt, + POSTGRES_CONNECT_MAX_DELAY_MS, + ); + traceLog(options, "discordbot_postgres_connect_retry", undefined, { + attempt: attempt + 1, + delay_ms: delayMs, + error: errorMessage(error), + }); + await sleep(delayMs); + } + } +} + /** * Persists a Discord thread update into the session API. In execute mode the create/append/execute * handoff completes before the handler returns; SSE rendering continues in background. @@ -450,6 +498,9 @@ async function recoverRenderObligationsWithRetry( state: StateAdapter, options: DiscordbotOptions, ): Promise { + // Wait for Postgres before scanning for obligations. This is also what warms the + // shared pool at startup, so transient connect failures don't wedge the bot. + await ensureStateConnected(state, options); let attempt = 0; while (true) { try { From a8918c7f9845a1aef06199b57d47af800504c1e8 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Fri, 5 Jun 2026 11:48:22 -0600 Subject: [PATCH 17/25] fix(discordbot): drop details from command-execution task chunks (port) Sync with slackbotv2 @ 02db3ebc: command-execution tasks omit their details from the stream entirely instead of truncating them. Co-Authored-By: Claude Opus 4.8 (1M context) --- services/discordbot/src/index.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index 6795db35..7e82b5c0 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -775,12 +775,22 @@ function discordSafeChatSdkChunk( if (chunk.type !== "task_update") return chunk; const { output: _output, details, ...safeChunk } = chunk; void _output; + if (isCommandExecutionTask(chunk)) return safeChunk; return { ...safeChunk, ...(details ? { details: truncateDiscordTaskField(details) } : {}), }; } +function isCommandExecutionTask( + chunk: Extract, +): boolean { + return ( + chunk.id.startsWith("call_") || + chunk.title.toLowerCase().includes("command execution") + ); +} + function truncateDiscordTaskField(value: string): string { return truncateDiscordText( value, From 5d14bb4fc3ef8df358a9fc2bd6500be2ed38b5ad Mon Sep 17 00:00:00 2001 From: Will Drach Date: Fri, 5 Jun 2026 13:48:02 -0600 Subject: [PATCH 18/25] feat(discordbot): honor plain text render requests (port) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sync with slackbotv2 @ f9fcb5ee: messages asking for plain text ("plain text only", "no interactive blocks", "no dashboards") drain the stream silently and post one final text message — captured terminal result text first, accumulated markdown as fallback. Discord-flavored: typing keepalive instead of assistant status, and the final text pre-truncates to fit Discord's 2000-char content cap with an honest suffix. Brings in the render collector class this needed (the msg_too_long fallback path it was built for remains unported — the Discord adapter hard-truncates, so that failure mode can't occur). Co-Authored-By: Claude Opus 4.8 (1M context) --- services/discordbot/src/index.ts | 123 ++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index 7e82b5c0..c66ef080 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -71,6 +71,9 @@ const RENDER_RECOVERY_LEASE_TTL_MS = 2 * 60 * 1000; const RENDER_RETRY_INITIAL_DELAY_MS = 250; const RENDER_RETRY_MAX_DELAY_MS = 5_000; const DISCORD_TASK_DETAILS_MAX_CHARS = 500; +// Discord caps message content at 2000 chars; leave headroom so the honest +// "[truncated ...]" suffix lands instead of the adapter's silent "..." cut. +const DISCORD_FALLBACK_TEXT_MAX_CHARS = 1_900; const POSTGRES_CONNECT_INITIAL_DELAY_MS = 250; const POSTGRES_CONNECT_MAX_DELAY_MS = 10_000; @@ -734,6 +737,11 @@ async function renderExecutionStream( traceLog(options, "discordbot_thread_named", trace); } + if (isPlainTextOnlyRequest(message.text)) { + await renderPlainTextExecutionStream(thread, stream, options, trace); + return; + } + const stopTyping = startTypingKeepalive(thread, logger); try { // Deliberate delta from slackbotv2: no streamAfterFirstChunk deferral. @@ -757,10 +765,123 @@ async function renderRecoveredExecutionStream( trace?: DiscordbotTrace, ): Promise { // Recovered renders never rename the thread; naming happens on the initial execution. - // The discordSafe stream wrapping comes via renderExecutionStream. + // The discordSafe stream wrapping (and the plain-text-only branch) comes via + // renderExecutionStream. await renderExecutionStream(thread, stream, message, options, false, trace); } +async function renderPlainTextExecutionStream( + thread: Thread, + stream: AsyncIterable, + options: DiscordbotOptions, + trace?: DiscordbotTrace, +): Promise { + const logger = options.logger ?? noopLogger; + const fallback = new DiscordRenderFallback(); + const stopTyping = startTypingKeepalive(thread, logger); + try { + const chatStream = fallback.collectChatSdk( + discordSafeChatSdkStream( + codexAppServerToChatSdkStream( + fallback.collectSource(stream), + rendererOptions(options), + ), + ), + ); + for await (const _chunk of chatStream) { + void _chunk; + } + const text = truncateDiscordText( + fallback.text() || "Execution completed, but no final text was captured.", + DISCORD_FALLBACK_TEXT_MAX_CHARS, + "Discord final answer", + ); + traceLog(options, "discordbot_render_plain_text_final", trace, { + chars: text.length, + }); + await thread.post(text); + } finally { + stopTyping(); + } +} + +class DiscordRenderFallback { + private markdownText = ""; + private terminalText = ""; + + async *collectSource( + stream: AsyncIterable, + ): AsyncIterable { + for await (const event of stream) { + this.captureTerminalText(event); + yield event; + } + } + + async *collectChatSdk( + stream: AsyncIterable, + ): AsyncIterable { + for await (const chunk of stream) { + if (chunk.type === "markdown_text") this.markdownText += chunk.text; + yield chunk; + } + } + + text(): string { + return (this.terminalText || this.markdownText).trim(); + } + + private captureTerminalText(event: DiscordbotRendererSource): void { + if (!event || typeof event !== "object") return; + const eventKind = String( + "eventKind" in event + ? event.eventKind + : "event" in event + ? event.event + : "", + ); + if ( + eventKind !== "session.execution_completed" && + eventKind !== "session.execution_cancelled" && + !isTerminalCodexAppServerEvent(event) + ) { + return; + } + const data = + "data" in event && event.data && typeof event.data === "object" + ? event.data + : event; + const text = terminalResultText(data); + if (text) this.terminalText = text; + } +} + +function isTerminalCodexAppServerEvent(event: unknown): boolean { + if (!event || typeof event !== "object") return false; + const type = (event as { type?: unknown }).type; + return type === "result" || type === "turn.done" || type === "turn.completed"; +} + +function terminalResultText(event: unknown): string { + if (!event || typeof event !== "object") return ""; + for (const key of ["result", "result_text", "text", "final_text"]) { + const value = (event as Record)[key]; + if (typeof value !== "string") continue; + const resultText = value.trim(); + if (resultText) return resultText; + } + return ""; +} + +function isPlainTextOnlyRequest(text: string): boolean { + const normalized = text.toLowerCase(); + return ( + /\bplain\s+text\s+only\b/.test(normalized) || + /\bno\s+interactive\s+blocks?\b/.test(normalized) || + /\bno\s+dashboards?\b/.test(normalized) + ); +} + async function* discordSafeChatSdkStream( stream: AsyncIterable, ): AsyncIterable { From 6497c7c76d328b6b221ad8dc95556c4ea14a370a Mon Sep 17 00:00:00 2001 From: Will Drach Date: Fri, 5 Jun 2026 21:52:02 -0600 Subject: [PATCH 19/25] feat(discordbot): surface thread starter messages and embeds to the agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A thread created from a message keeps that message in the parent channel (the thread shares its ID), so the thread-history walk in collectInitialContext never sees it — Slack has no analog because conversations.replies returns the parent as the first reply. Fetch the starter from the parent channel (mirroring discord.js ThreadChannel#fetchStarterMessage) and prepend it to the initial context. Webhook-style messages (Sentry alerts) carry their payload entirely in embeds with empty content, which the chat adapter drops; flatten embed text into the forwarded message so the agent can read them. Co-Authored-By: Claude Opus 4.8 (1M context) --- services/discordbot/src/discord-starter.ts | 169 ++++++++++++++++ services/discordbot/src/discord-threading.ts | 2 +- services/discordbot/src/index.ts | 14 ++ services/discordbot/src/session-api.ts | 5 +- .../discordbot/test/discord-starter.test.ts | 191 ++++++++++++++++++ 5 files changed, 379 insertions(+), 2 deletions(-) create mode 100644 services/discordbot/src/discord-starter.ts create mode 100644 services/discordbot/test/discord-starter.test.ts diff --git a/services/discordbot/src/discord-starter.ts b/services/discordbot/src/discord-starter.ts new file mode 100644 index 00000000..d53660c0 --- /dev/null +++ b/services/discordbot/src/discord-starter.ts @@ -0,0 +1,169 @@ +import type { Attachment, Logger } from "chat"; +import { parseDiscordThreadKey } from "./discord-allowlist"; +import { DEFAULT_DISCORD_API_URL } from "./discord-threading"; +import type { + DiscordbotApiAttachment, + DiscordbotApiMessage, + DiscordbotOptions, + JsonObject, +} from "./types"; +import { isJsonObject } from "./utils"; + +/** + * Discord delta with no slackbotv2 analog: a thread created from a message keeps + * that starter message in the **parent channel** (the thread shares its ID), so it + * never appears in the thread's own history — Slack's conversations.replies + * returns the parent as the first reply, but Discord requires this extra fetch + * (mirrors discord.js `ThreadChannel#fetchStarterMessage`). + * + * Returns null when the key has no thread segment, the thread was not created + * from a message (404), or on any failure — context enrichment must never block + * execution. + */ +export async function fetchThreadStarterMessage( + options: DiscordbotOptions, + threadKey: string, + logger: Logger, +): Promise { + const { channelId, threadId } = parseDiscordThreadKey(threadKey); + if (!channelId || !threadId) return null; + + const fetchFn = options.fetch ?? fetch; + const apiBase = (options.discordApiUrl ?? DEFAULT_DISCORD_API_URL).replace( + /\/$/, + "", + ); + try { + const response = await fetchFn( + `${apiBase}/channels/${channelId}/messages/${threadId}`, + { headers: { authorization: `Bot ${options.botToken}` } }, + ); + if (!response.ok) { + // 404 = the thread was created standalone ("+ New Thread") or the + // starter message was deleted; both are normal, not errors. + if (response.status !== 404) { + logger.warn("discordbot_thread_starter_fetch_failed", { + status: response.status, + thread_id: threadId, + }); + } + return null; + } + return rawMessageToApiMessage( + await response.json(), + threadKey, + options.applicationId, + ); + } catch (error) { + logger.warn("discordbot_thread_starter_fetch_error", { + error: error instanceof Error ? error.message : String(error), + thread_id: threadId, + }); + return null; + } +} + +/** + * Append flattened embed content to a message's text. Webhook-style messages + * (Sentry alerts, GitHub notifications) carry their payload entirely in + * `embeds` with empty `content`; the chat adapter only surfaces `content`, so + * without this the agent sees an empty message. + */ +export function withDiscordEmbedText(text: string, raw: unknown): string { + if (!isJsonObject(raw)) return text; + const embeds = Array.isArray(raw.embeds) ? raw.embeds : []; + const embedText = embeds + .map((embed) => embedToText(embed)) + .filter(Boolean) + .join("\n\n"); + if (!embedText) return text; + return text.trim() ? `${text}\n\n${embedText}` : embedText; +} + +function embedToText(embed: unknown): string { + if (!isJsonObject(embed)) return ""; + const lines: string[] = []; + const author = isJsonObject(embed.author) + ? nonEmptyString(embed.author.name) + : undefined; + if (author) lines.push(author); + const title = nonEmptyString(embed.title); + const url = nonEmptyString(embed.url); + if (title) { + lines.push(url ? `${title} (${url})` : title); + } else if (url) { + lines.push(url); + } + const description = nonEmptyString(embed.description); + if (description) lines.push(description); + const fields = Array.isArray(embed.fields) ? embed.fields : []; + for (const field of fields) { + if (!isJsonObject(field)) continue; + const name = nonEmptyString(field.name); + const value = nonEmptyString(field.value); + if (name && value) lines.push(`${name}: ${value}`); + else if (value) lines.push(value); + } + const footer = isJsonObject(embed.footer) + ? nonEmptyString(embed.footer.text) + : undefined; + if (footer) lines.push(footer); + if (lines.length === 0) return ""; + return `[embed] ${lines.join("\n")}`; +} + +function rawMessageToApiMessage( + raw: unknown, + threadKey: string, + botUserId: string, +): DiscordbotApiMessage | null { + if (!isJsonObject(raw) || typeof raw.id !== "string") return null; + const author = isJsonObject(raw.author) ? raw.author : {}; + const userId = nonEmptyString(author.id) ?? "unknown"; + const userName = nonEmptyString(author.username) ?? "unknown"; + return { + attachments: rawAttachments(raw), + author: { + fullName: nonEmptyString(author.global_name) ?? userName, + isBot: author.bot === true, + isMe: userId === botUserId, + userId, + userName, + }, + id: raw.id, + isMention: false, + raw, + text: withDiscordEmbedText(nonEmptyString(raw.content) ?? "", raw), + threadId: threadKey, + timestamp: nonEmptyString(raw.timestamp) ?? "", + }; +} + +function rawAttachments(raw: JsonObject): DiscordbotApiAttachment[] { + const attachments = Array.isArray(raw.attachments) ? raw.attachments : []; + return attachments.filter(isJsonObject).map((attachment) => ({ + height: numberValue(attachment.height), + mimeType: nonEmptyString(attachment.content_type), + name: nonEmptyString(attachment.filename), + size: numberValue(attachment.size), + type: attachmentType(nonEmptyString(attachment.content_type)), + url: nonEmptyString(attachment.url), + width: numberValue(attachment.width), + })); +} + +// Mirrors the chat adapter's getAttachmentType MIME mapping. +function attachmentType(mimeType: string | undefined): Attachment["type"] { + if (mimeType?.startsWith("image/")) return "image"; + if (mimeType?.startsWith("video/")) return "video"; + if (mimeType?.startsWith("audio/")) return "audio"; + return "file"; +} + +function nonEmptyString(value: unknown): string | undefined { + return typeof value === "string" && value ? value : undefined; +} + +function numberValue(value: unknown): number | undefined { + return typeof value === "number" ? value : undefined; +} diff --git a/services/discordbot/src/discord-threading.ts b/services/discordbot/src/discord-threading.ts index 71dfc0b1..f448266e 100644 --- a/services/discordbot/src/discord-threading.ts +++ b/services/discordbot/src/discord-threading.ts @@ -3,7 +3,7 @@ import { parseDiscordThreadKey } from "./discord-allowlist"; import type { DiscordbotOptions } from "./types"; const DISCORD_THREAD_NAME_LIMIT = 100; -const DEFAULT_DISCORD_API_URL = "https://discord.com/api/v10"; +export const DEFAULT_DISCORD_API_URL = "https://discord.com/api/v10"; /** * Derive a Discord thread name from the triggering message text. The `@chat-adapter/discord` diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index c66ef080..23cb7c1a 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -22,6 +22,7 @@ import { isAllowedDiscordMessage, isGuildAllowlistEmpty, } from "./discord-allowlist"; +import { fetchThreadStarterMessage } from "./discord-starter"; import { deriveThreadName, renameThreadFromMessage } from "./discord-threading"; import { collectInitialContext, @@ -263,9 +264,22 @@ async function syncThreadMessageToSession( if (shouldIncludeContext && !state.historyForwarded) { const contextStartedAtMs = nowMs(); context = await collectInitialContext(thread, message); + // Discord delta: a thread created from a message keeps that starter message + // in the parent channel, so thread history alone misses it (Slack's + // conversations.replies includes the parent). Prefer the fetched starter + // over any thread-starter stub already in the history. + const starter = await fetchThreadStarterMessage( + input.options, + thread.id, + input.options.logger ?? noopLogger, + ); + if (starter) { + context = [starter, ...context.filter((item) => item.id !== starter.id)]; + } traceLog(input.options, "discordbot_forward_context_collected", trace, { message_count: context.length, phase_ms: elapsedMs(contextStartedAtMs), + starter_included: starter !== null, }); } else { traceLog(input.options, "discordbot_forward_context_skipped", trace, { diff --git a/services/discordbot/src/session-api.ts b/services/discordbot/src/session-api.ts index ea4fe208..3c404735 100644 --- a/services/discordbot/src/session-api.ts +++ b/services/discordbot/src/session-api.ts @@ -1,5 +1,6 @@ import type { RustSessionStreamEvent } from "@centaur/harness-events"; import type { Attachment, Message } from "chat"; +import { withDiscordEmbedText } from "./discord-starter"; import type { DiscordbotApiAttachment, DiscordbotApiMessage, @@ -127,7 +128,9 @@ export async function serializeMessage( id: message.id, isMention: message.isMention === true, raw: message.raw, - text: message.text, + // Discord delta: webhook-style messages (Sentry alerts etc.) carry their + // payload in embeds, which the chat adapter drops from `text`. + text: withDiscordEmbedText(message.text, message.raw), threadId: message.threadId, timestamp: message.metadata.dateSent.toISOString(), }; diff --git a/services/discordbot/test/discord-starter.test.ts b/services/discordbot/test/discord-starter.test.ts new file mode 100644 index 00000000..70efe258 --- /dev/null +++ b/services/discordbot/test/discord-starter.test.ts @@ -0,0 +1,191 @@ +import { describe, expect, it } from "bun:test"; +import type { Logger } from "chat"; +import { + fetchThreadStarterMessage, + withDiscordEmbedText, +} from "../src/discord-starter"; +import type { DiscordbotFetch, DiscordbotOptions } from "../src/types"; + +const silentLogger: Logger = { + debug: () => undefined, + info: () => undefined, + warn: () => undefined, + error: () => undefined, + child: () => silentLogger, +}; + +function options(fetchFn: DiscordbotFetch): DiscordbotOptions { + return { + apiUrl: "http://localhost", + applicationId: "bot-user", + botToken: "bot-token", + publicKey: "key", + discordApiUrl: "https://discord.com/api/v10", + fetch: fetchFn, + }; +} + +// Shape of a Sentry-style webhook alert: empty content, payload in embeds. +const sentryStarter = { + id: "T9", + channel_id: "C1", + content: "", + timestamp: "2026-06-05T12:00:00.000000+00:00", + author: { id: "webhook-1", username: "Sentry", bot: true }, + attachments: [], + embeds: [ + { + title: "TypeError: cannot read pair", + url: "https://sentry.io/issues/123", + description: "ingest.pairs in processSwap", + fields: [{ name: "events", value: "14" }], + footer: { text: "prod | ingest" }, + }, + ], +}; + +describe("fetchThreadStarterMessage", () => { + it("fetches the starter from the parent channel and flattens embeds", async () => { + const calls: Array<{ url: string; init?: RequestInit }> = []; + const fetchFn = (async (url: RequestInfo | URL, init?: RequestInit) => { + calls.push({ url: String(url), init }); + return new Response(JSON.stringify(sentryStarter), { status: 200 }); + }) as DiscordbotFetch; + + const starter = await fetchThreadStarterMessage( + options(fetchFn), + "discord:G1:C1:T9", + silentLogger, + ); + + expect(calls).toHaveLength(1); + expect(calls[0]?.url).toBe( + "https://discord.com/api/v10/channels/C1/messages/T9", + ); + const headers = calls[0]?.init?.headers as Record; + expect(headers.authorization).toBe("Bot bot-token"); + + expect(starter).not.toBeNull(); + expect(starter?.id).toBe("T9"); + expect(starter?.threadId).toBe("discord:G1:C1:T9"); + expect(starter?.author.userName).toBe("Sentry"); + expect(starter?.author.isBot).toBe(true); + expect(starter?.author.isMe).toBe(false); + expect(starter?.text).toContain( + "TypeError: cannot read pair (https://sentry.io/issues/123)", + ); + expect(starter?.text).toContain("ingest.pairs in processSwap"); + expect(starter?.text).toContain("events: 14"); + expect(starter?.text).toContain("prod | ingest"); + }); + + it("keeps plain content and maps attachments", async () => { + const fetchFn = (async () => + new Response( + JSON.stringify({ + id: "T9", + content: "look at this", + timestamp: "2026-06-05T12:00:00.000000+00:00", + author: { id: "U1", username: "will", global_name: "Will" }, + attachments: [ + { + filename: "chart.png", + content_type: "image/png", + url: "https://cdn.discordapp.com/chart.png", + size: 123, + width: 800, + height: 600, + }, + ], + embeds: [], + }), + { status: 200 }, + )) as DiscordbotFetch; + + const starter = await fetchThreadStarterMessage( + options(fetchFn), + "discord:G1:C1:T9", + silentLogger, + ); + + expect(starter?.text).toBe("look at this"); + expect(starter?.author.fullName).toBe("Will"); + expect(starter?.attachments).toEqual([ + { + height: 600, + mimeType: "image/png", + name: "chart.png", + size: 123, + type: "image", + url: "https://cdn.discordapp.com/chart.png", + width: 800, + }, + ]); + }); + + it("returns null on 404 (thread not created from a message)", async () => { + const fetchFn = (async () => + new Response('{"message": "Unknown Message", "code": 10008}', { + status: 404, + })) as DiscordbotFetch; + + const starter = await fetchThreadStarterMessage( + options(fetchFn), + "discord:G1:C1:T9", + silentLogger, + ); + expect(starter).toBeNull(); + }); + + it("no-ops when the key has no thread segment", async () => { + let called = false; + const fetchFn = (async () => { + called = true; + return new Response("{}"); + }) as DiscordbotFetch; + + const starter = await fetchThreadStarterMessage( + options(fetchFn), + "discord:G1:C1", + silentLogger, + ); + expect(starter).toBeNull(); + expect(called).toBe(false); + }); + + it("swallows fetch errors", async () => { + const fetchFn = (async () => { + throw new Error("network down"); + }) as DiscordbotFetch; + + await expect( + fetchThreadStarterMessage( + options(fetchFn), + "discord:G1:C1:T9", + silentLogger, + ), + ).resolves.toBeNull(); + }); +}); + +describe("withDiscordEmbedText", () => { + it("appends embed text after existing content", () => { + const text = withDiscordEmbedText("heads up", { + embeds: [{ title: "Alert", description: "something broke" }], + }); + expect(text).toBe("heads up\n\n[embed] Alert\nsomething broke"); + }); + + it("returns the text unchanged without embeds", () => { + expect(withDiscordEmbedText("plain", { embeds: [] })).toBe("plain"); + expect(withDiscordEmbedText("plain", undefined)).toBe("plain"); + expect(withDiscordEmbedText("plain", "not-an-object")).toBe("plain"); + }); + + it("skips malformed embeds and fields", () => { + const text = withDiscordEmbedText("", { + embeds: [null, {}, { fields: [null, { value: "lonely value" }] }], + }); + expect(text).toBe("[embed] lonely value"); + }); +}); From 5e5adfd9c2b2c779029cf90d1dba06940c299612 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Sun, 7 Jun 2026 07:03:36 -0600 Subject: [PATCH 20/25] feat(discordbot): split renders into a progress timeline + answer message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Discord's post+edit streaming dropped every task chunk, so runs showed a bare placeholder that was later overwritten by the answer — no chain of thought, and the answer sorted above messages sent mid-run. Runs now post a progress message that's edited in place with a step timeline (reasoning excerpts, commands, tool calls) and finalized as a permanent record, while the answer streams into a separate message created on first visible text. Drops the now-subsumed task-chunk stripping (the renderer truncates its own previews instead). Co-Authored-By: Claude Opus 4.8 --- services/discordbot/README.md | 5 + services/discordbot/src/discord-progress.ts | 321 ++++++++++++++++++ services/discordbot/src/index.ts | 152 +++++---- services/discordbot/src/session-api.ts | 9 +- services/discordbot/src/types.ts | 2 +- services/discordbot/src/utils.ts | 38 +++ .../discordbot/test/discord-progress.test.ts | 284 ++++++++++++++++ 7 files changed, 741 insertions(+), 70 deletions(-) create mode 100644 services/discordbot/src/discord-progress.ts create mode 100644 services/discordbot/test/discord-progress.test.ts diff --git a/services/discordbot/README.md b/services/discordbot/README.md index 595c7823..df4d44df 100644 --- a/services/discordbot/README.md +++ b/services/discordbot/README.md @@ -12,6 +12,11 @@ unchanged (`discord:…` thread keys flow through identically). keyed by the new thread (`discord:{guild}:{channel}:{threadId}`). - **`@`-mention inside an existing thread** → the bot answers in that thread. - **Follow-ups inside an active thread** append to the same session without a re-mention. +- **Two-message replies**: a run posts an instant "✨ thinking..." **progress message** that is + edited in place with a step timeline (reasoning excerpts, commands, tool calls) and finalized + (`✅ Done · 42s`) — never overwritten or deleted. The **answer** streams into a separate message + created when the first answer text arrives, so it lands at the bottom of the thread even when + users chime in mid-run. ## Ingress model diff --git a/services/discordbot/src/discord-progress.ts b/services/discordbot/src/discord-progress.ts new file mode 100644 index 00000000..bd042800 --- /dev/null +++ b/services/discordbot/src/discord-progress.ts @@ -0,0 +1,321 @@ +import type { ChatSDKStreamChunk } from "@centaur/rendering"; +import type { Logger, Thread } from "chat"; +import { elapsedMs, errorMessage, nowMs } from "./utils"; + +export type DiscordProgressChunk = Exclude< + ChatSDKStreamChunk, + { type: "markdown_text" } +>; +type DiscordTaskChunk = Extract; +type DiscordTaskStatus = DiscordTaskChunk["status"]; + +/** Terminal state the progress message settles into. */ +export type DiscordProgressOutcome = "done" | "failed" | "retrying"; + +// Discord caps message content at 2000 chars; headroom keeps every edit safe. +const PROGRESS_MESSAGE_MAX_CHARS = 1_900; +const PROGRESS_SNIPPET_MAX_CHARS = 280; +const PROGRESS_COMMAND_MAX_CHARS = 64; +const PROGRESS_TITLE_MAX_CHARS = 80; +// Steps beyond this fall off the front of the model and render as "… earlier steps". +const PROGRESS_MAX_TRACKED_STEPS = 120; +// Stay well below Discord's ~5 edits/5s per-channel budget; the answer message +// streams its own post+edit cycle in parallel with these edits. +const PROGRESS_EDIT_INTERVAL_MS = 1_500; + +type ProgressStep = { + command?: string; + snippetParts: Map; + status: DiscordTaskStatus; + title: string; +}; + +/** + * Pure model + renderer for the progress message: an ordered step timeline built + * from the renderer's task/plan updates, with the latest reasoning excerpt quoted + * under the current "Thinking" step. + */ +export class DiscordProgressTimeline { + private readonly placeholderText: string; + private readonly steps: ProgressStep[] = []; + private readonly stepById = new Map(); + private planTitle: string | null = null; + private droppedSteps = 0; + private outcome: DiscordProgressOutcome | null = null; + private elapsedLabel = ""; + + constructor(placeholderText: string) { + this.placeholderText = placeholderText; + } + + update(chunk: DiscordProgressChunk): void { + if (this.outcome) return; + if (chunk.type === "plan_update") { + this.planTitle = oneLine(chunk.title, PROGRESS_TITLE_MAX_CHARS); + return; + } + const existing = this.stepById.get(chunk.id); + if (existing) { + existing.status = chunk.status; + captureDetails(existing, chunk); + return; + } + const last = this.steps.at(-1); + if (chunk.title === "Thinking" && last?.title === "Thinking") { + // Reasoning deltas arrive as one task per delta; consecutive Thinking + // updates merge into a single step so the timeline reads as one thought. + // A command or tool call in between starts a fresh Thinking step. + this.stepById.set(chunk.id, last); + last.status = chunk.status; + captureDetails(last, chunk); + return; + } + const step: ProgressStep = { + snippetParts: new Map(), + status: chunk.status, + title: oneLine(chunk.title, PROGRESS_TITLE_MAX_CHARS), + }; + captureDetails(step, chunk); + this.steps.push(step); + this.stepById.set(chunk.id, step); + if (this.steps.length > PROGRESS_MAX_TRACKED_STEPS) { + this.steps.shift(); + this.droppedSteps += 1; + } + } + + /** + * Settles the timeline. A "done" outcome downgrades to "failed" when any step + * errored (the renderer surfaces in-stream failures as error tasks, not throws). + * "retrying" leaves step statuses alone — a fresh progress message follows. + */ + finish(outcome: DiscordProgressOutcome, runElapsedMs: number): void { + if (this.outcome) return; + const failed = + outcome === "failed" || (outcome === "done" && this.hasError()); + this.outcome = failed ? "failed" : outcome; + this.elapsedLabel = formatDuration(runElapsedMs); + if (this.outcome === "retrying") return; + const settledStatus: DiscordTaskStatus = + this.outcome === "failed" ? "error" : "complete"; + for (const step of this.steps) { + if (step.status === "in_progress" || step.status === "pending") { + step.status = settledStatus; + } + } + } + + render(): string { + const headLines = [this.headerLine()]; + if (this.planTitle) headLines.push(`*${this.planTitle}*`); + const blocks = this.steps.map((step, index) => + stepBlock(step, index === this.steps.length - 1), + ); + let omitted = this.droppedSteps; + const compose = (): string => { + const lines = [...headLines]; + if (blocks.length || omitted) lines.push(""); + if (omitted) { + lines.push(`*… ${omitted} earlier step${omitted === 1 ? "" : "s"}*`); + } + lines.push(...blocks); + return lines.join("\n"); + }; + let content = compose(); + while (content.length > PROGRESS_MESSAGE_MAX_CHARS && blocks.length > 1) { + blocks.shift(); + omitted += 1; + content = compose(); + } + return content.length > PROGRESS_MESSAGE_MAX_CHARS + ? content.slice(0, PROGRESS_MESSAGE_MAX_CHARS) + : content; + } + + private headerLine(): string { + if (this.outcome === "done") return `✅ **Done** · ${this.elapsedLabel}`; + if (this.outcome === "failed") + return `❌ **Failed** · ${this.elapsedLabel}`; + if (this.outcome === "retrying") { + return "🔁 **Stream interrupted — retrying...**"; + } + return this.placeholderText; + } + + private hasError(): boolean { + return this.steps.some((step) => step.status === "error"); + } +} + +export type DiscordProgressMessageOptions = { + editIntervalMs?: number; + logger: Logger; + placeholderText: string; +}; + +/** + * The Discord-side chain-of-thought surface: a message posted the instant a run + * starts, edited in place (throttled) as task/reasoning updates arrive, and + * finalized — never deleted — when the run settles. The final answer streams into + * a separate message, so this one stays put in the timeline as a record even when + * new user messages arrive mid-run. + */ +export class DiscordProgressMessage { + private readonly thread: Thread; + private readonly timeline: DiscordProgressTimeline; + private readonly intervalMs: number; + private readonly logger: Logger; + private readonly messageId: string; + private readonly editThreadId: string; + private readonly startedAtMs = nowMs(); + private lastContent: string; + private timer: ReturnType | null = null; + private edits: Promise = Promise.resolve(); + private finished = false; + + private constructor( + thread: Thread, + messageId: string, + editThreadId: string, + options: DiscordProgressMessageOptions, + ) { + this.thread = thread; + this.timeline = new DiscordProgressTimeline(options.placeholderText); + this.intervalMs = options.editIntervalMs ?? PROGRESS_EDIT_INTERVAL_MS; + this.logger = options.logger; + this.messageId = messageId; + this.editThreadId = editThreadId; + this.lastContent = options.placeholderText; + } + + static async post( + thread: Thread, + options: DiscordProgressMessageOptions, + ): Promise { + const raw = await thread.adapter.postMessage( + thread.id, + options.placeholderText, + ); + return new DiscordProgressMessage( + thread, + raw.id, + raw.threadId || thread.id, + options, + ); + } + + update(chunk: DiscordProgressChunk): void { + if (this.finished) return; + this.timeline.update(chunk); + if (this.timer) return; + this.timer = setTimeout(() => { + this.timer = null; + this.enqueueEdit(); + }, this.intervalMs); + } + + /** Applies the final edit. Never throws — progress rendering is cosmetic. */ + async finish(outcome: DiscordProgressOutcome): Promise { + if (this.finished) return; + this.finished = true; + if (this.timer) { + clearTimeout(this.timer); + this.timer = null; + } + this.timeline.finish(outcome, elapsedMs(this.startedAtMs)); + this.enqueueEdit(); + await this.edits; + } + + private enqueueEdit(): void { + this.edits = this.edits.then(() => this.editNow()); + } + + private async editNow(): Promise { + const content = this.timeline.render(); + if (content === this.lastContent) return; + try { + await this.thread.adapter.editMessage(this.editThreadId, this.messageId, { + markdown: content, + }); + this.lastContent = content; + } catch (error) { + this.logger.warn("discordbot_progress_edit_failed", { + error: errorMessage(error), + }); + } + } +} + +export function isCommandExecutionTask(chunk: { + id: string; + title: string; +}): boolean { + return ( + chunk.id.startsWith("call_") || + chunk.title.toLowerCase().includes("command execution") + ); +} + +function captureDetails(step: ProgressStep, chunk: DiscordTaskChunk): void { + if (!chunk.details) return; + if (step.title === "Thinking") { + step.snippetParts.set(chunk.id, chunk.details); + return; + } + if (!step.command && isCommandExecutionTask(chunk)) { + step.command = commandPreview(chunk.details); + } +} + +function stepBlock(step: ProgressStep, isLatest: boolean): string { + const command = step.command ? `: \`${step.command}\`` : ""; + const line = `${statusEmoji(step.status)} ${step.title}${command}`; + if (!isLatest || step.title !== "Thinking") return line; + const snippet = snippetText(step); + if (!snippet) return line; + const quoted = snippet + .split("\n") + .map((snippetLine) => `> ${snippetLine}`) + .join("\n"); + return `${line}\n${quoted}`; +} + +function snippetText(step: ProgressStep): string { + const text = Array.from(step.snippetParts.values()) + .join("") + .replace(/\n{3,}/g, "\n\n") + .trim(); + if (!text) return ""; + if (text.length <= PROGRESS_SNIPPET_MAX_CHARS) return text; + return `…${text.slice(-PROGRESS_SNIPPET_MAX_CHARS)}`; +} + +function statusEmoji(status: DiscordTaskStatus): string { + if (status === "complete") return "✅"; + if (status === "error") return "❌"; + return "⏳"; +} + +/** First command line out of the renderer's fenced details block, one-lined. */ +function commandPreview(details: string): string | undefined { + const line = details + .split("\n") + .map((detailsLine) => detailsLine.trim()) + .find((detailsLine) => detailsLine && !detailsLine.startsWith("```")); + if (!line) return undefined; + return oneLine(line.replaceAll("`", "'"), PROGRESS_COMMAND_MAX_CHARS); +} + +function oneLine(value: string, maxChars: number): string { + const collapsed = value.replace(/\s+/g, " ").trim(); + if (collapsed.length <= maxChars) return collapsed; + return `${collapsed.slice(0, maxChars - 1).trimEnd()}…`; +} + +function formatDuration(ms: number): string { + const totalSeconds = Math.max(1, Math.round(ms / 1000)); + const minutes = Math.floor(totalSeconds / 60); + const seconds = totalSeconds % 60; + return minutes ? `${minutes}m ${seconds}s` : `${seconds}s`; +} diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index 23cb7c1a..012f7bc1 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -22,6 +22,7 @@ import { isAllowedDiscordMessage, isGuildAllowlistEmpty, } from "./discord-allowlist"; +import { DiscordProgressMessage } from "./discord-progress"; import { fetchThreadStarterMessage } from "./discord-starter"; import { deriveThreadName, renameThreadFromMessage } from "./discord-threading"; import { @@ -47,7 +48,14 @@ import type { ForwardSessionInput, TypingCapableAdapter, } from "./types"; -import { elapsedMs, errorMessage, noopLogger, nowMs, traceLog } from "./utils"; +import { + AsyncTextQueue, + elapsedMs, + errorMessage, + noopLogger, + nowMs, + traceLog, +} from "./utils"; export type { Discordbot, @@ -71,7 +79,7 @@ const RENDER_INDEX_TTL_MS = 30 * 24 * 60 * 60 * 1000; const RENDER_RECOVERY_LEASE_TTL_MS = 2 * 60 * 1000; const RENDER_RETRY_INITIAL_DELAY_MS = 250; const RENDER_RETRY_MAX_DELAY_MS = 5_000; -const DISCORD_TASK_DETAILS_MAX_CHARS = 500; +const PROGRESS_PLACEHOLDER_TEXT = "✨ thinking..."; // Discord caps message content at 2000 chars; leave headroom so the honest // "[truncated ...]" suffix lands instead of the adapter's silent "..." cut. const DISCORD_FALLBACK_TEXT_MAX_CHARS = 1_900; @@ -102,10 +110,12 @@ export function createDiscordbot(options: DiscordbotOptions): Discordbot { userName, adapters: { discord }, state, - // Initial placeholder posted while the agent works, before any streamed - // content (the chat SDK default is a bare "..."). Overridable via options. - fallbackStreamingPlaceholderText: - options.streamingPlaceholderText ?? "✨ thinking...", + // No SDK-level streaming placeholder: instant feedback comes from the + // progress message renderExecutionStream posts itself, and the final answer + // must land as a NEW message at the bottom of the timeline — with null, the + // SDK's post+edit fallback only creates the answer message once the first + // visible answer text arrives. + fallbackStreamingPlaceholderText: null, // Serialize handlers per thread via the SDK's per-thread lock. The deprecated // `onLockConflict: 'force'` force-released the lock so two handlers ran concurrently on one // thread — two near-simultaneous mentions could both pass the `activeExecution` check and @@ -366,8 +376,8 @@ async function syncThreadMessageToSession( ); // Create + append the session message only (fast). The execute call blocks // ~9s on cold sandbox spin-up (incl. the tool-server sidecar), so it's run - // inside the render stream below — after the "✨ thinking..." placeholder is - // posted — instead of before it. executeSession is idempotent + // inside the render stream below — after the "✨ thinking..." progress + // message is posted — instead of before it. executeSession is idempotent // (idempotency_key = message id), so a render retry won't re-spawn. await forwardToSessionApi( input.options, @@ -723,7 +733,8 @@ async function* streamOpenedSession( stream: AsyncIterable, ): AsyncIterable { // Deliberate delta from slackbotv2 (which removed its synthetic starting - // task): the synthetic item drives the instant "✨ thinking..." placeholder. + // task): the synthetic item seeds the first "⏳ Thinking" step in the + // progress message. yield startingStreamNotification(input.threadId); for await (const event of stream) yield event; } @@ -756,21 +767,71 @@ async function renderExecutionStream( return; } + // Two-message UX: a progress message posts instantly and is edited in place + // with the step timeline (chain of thought), while the final answer streams + // into a separate message created on first answer text. The progress message + // stays in the timeline as a record instead of being overwritten, and the + // answer lands at the bottom even when users chime in mid-run. + const progress = await DiscordProgressMessage.post(thread, { + logger, + placeholderText: + options.streamingPlaceholderText ?? PROGRESS_PLACEHOLDER_TEXT, + }); const stopTyping = startTypingKeepalive(thread, logger); try { - // Deliberate delta from slackbotv2: no streamAfterFirstChunk deferral. - // The instant "✨ thinking..." placeholder covers the no-visible-output - // window, so the stream posts immediately instead of waiting for the - // first visible chunk. - const visibleStream = discordSafeChatSdkStream( - codexAppServerToChatSdkStream(stream, rendererOptions(options)), + await renderSplitExecutionStreams(thread, stream, options, progress); + await progress.finish("done"); + } catch (error) { + await progress.finish( + isRetryableSessionApiError(error) ? "retrying" : "failed", ); - await thread.post(new StreamingPlan(visibleStream, {})); + throw error; } finally { stopTyping(); } } +/** + * Consumes the renderer's chunk stream, routing task/plan updates to the + * progress message and answer text to a separate streamed message. The answer + * post is created lazily on the first visible answer chunk; the chat SDK's + * post+edit fallback then streams the rest into it. + */ +async function renderSplitExecutionStreams( + thread: Thread, + stream: AsyncIterable, + options: DiscordbotOptions, + progress: DiscordProgressMessage, +): Promise { + const answerText = new AsyncTextQueue(); + let answerPost: Promise | null = null; + let sourceFailed = false; + try { + for await (const chunk of codexAppServerToChatSdkStream( + stream, + rendererOptions(options), + )) { + if (chunk.type === "markdown_text") { + answerPost ??= thread.post(new StreamingPlan(answerText, {})); + answerText.push(chunk.text); + continue; + } + progress.update(chunk); + } + } catch (error) { + sourceFailed = true; + throw error; + } finally { + answerText.end(); + if (answerPost) { + // Settle the answer post either way, but when the source stream failed + // that error is the one worth propagating. + if (sourceFailed) await answerPost.catch(() => undefined); + else await answerPost; + } + } +} + async function renderRecoveredExecutionStream( thread: Thread, stream: AsyncIterable, @@ -779,7 +840,7 @@ async function renderRecoveredExecutionStream( trace?: DiscordbotTrace, ): Promise { // Recovered renders never rename the thread; naming happens on the initial execution. - // The discordSafe stream wrapping (and the plain-text-only branch) comes via + // The progress/answer message split (and the plain-text-only branch) comes via // renderExecutionStream. await renderExecutionStream(thread, stream, message, options, false, trace); } @@ -795,11 +856,9 @@ async function renderPlainTextExecutionStream( const stopTyping = startTypingKeepalive(thread, logger); try { const chatStream = fallback.collectChatSdk( - discordSafeChatSdkStream( - codexAppServerToChatSdkStream( - fallback.collectSource(stream), - rendererOptions(options), - ), + codexAppServerToChatSdkStream( + fallback.collectSource(stream), + rendererOptions(options), ), ); for await (const _chunk of chatStream) { @@ -896,44 +955,6 @@ function isPlainTextOnlyRequest(text: string): boolean { ); } -async function* discordSafeChatSdkStream( - stream: AsyncIterable, -): AsyncIterable { - for await (const chunk of stream) { - yield discordSafeChatSdkChunk(chunk); - } -} - -function discordSafeChatSdkChunk( - chunk: ChatSDKStreamChunk, -): ChatSDKStreamChunk { - if (chunk.type !== "task_update") return chunk; - const { output: _output, details, ...safeChunk } = chunk; - void _output; - if (isCommandExecutionTask(chunk)) return safeChunk; - return { - ...safeChunk, - ...(details ? { details: truncateDiscordTaskField(details) } : {}), - }; -} - -function isCommandExecutionTask( - chunk: Extract, -): boolean { - return ( - chunk.id.startsWith("call_") || - chunk.title.toLowerCase().includes("command execution") - ); -} - -function truncateDiscordTaskField(value: string): string { - return truncateDiscordText( - value, - DISCORD_TASK_DETAILS_MAX_CHARS, - "Discord task details", - ); -} - function truncateDiscordText( value: string, maxChars: number, @@ -958,11 +979,12 @@ async function* streamSessionAfterHandoff( execution: DiscordbotExecuteSessionResponse, ) => Promise, ): AsyncIterable { - // Post the placeholder BEFORE executing so the user sees "✨ thinking..." - // immediately, instead of waiting ~9s for the cold sandbox (incl. tool-server - // sidecar) to spin up. Execute runs here, inside the render stream, so a - // sandbox-spawn failure surfaces in the same message rather than hanging the - // placeholder (api-rs writes no event if the spawn itself fails). + // The progress message is already posted before this generator is consumed, + // so the user has instant feedback while the cold sandbox (incl. tool-server + // sidecar) spends ~9s spinning up. Execute runs here, inside the render + // stream, so a sandbox-spawn failure surfaces in the same render rather than + // hanging the progress message (api-rs writes no event if the spawn itself + // fails). The synthetic starting item seeds the first "⏳ Thinking" step. yield startingStreamNotification(input.threadId); traceLog(options, "discordbot_stream_heartbeat_emitted", input.trace); diff --git a/services/discordbot/src/session-api.ts b/services/discordbot/src/session-api.ts index 3c404735..8c899bc7 100644 --- a/services/discordbot/src/session-api.ts +++ b/services/discordbot/src/session-api.ts @@ -180,9 +180,9 @@ export async function forwardToSessionApi( /** * Execute the session turn on its own (start the agent run), returning the * execution. Split out of forwardToSessionApi so the render stream can run it - * AFTER posting the placeholder — the execute call blocks on cold sandbox - * spin-up. Idempotent via the request's idempotency_key, so a render retry - * won't re-spawn the sandbox. + * AFTER the progress message is posted — the execute call blocks on cold + * sandbox spin-up. Idempotent via the request's idempotency_key, so a render + * retry won't re-spawn the sandbox. */ export async function executeSessionTurn( options: DiscordbotOptions, @@ -226,7 +226,8 @@ export async function openSessionEventStream( } // Deliberate delta from slackbotv2 (which removed this entirely): the -// synthetic starting item drives the instant "✨ thinking..." placeholder. +// synthetic starting item seeds the first "⏳ Thinking" step in the progress +// message. export function startingStreamNotification(threadId: string): JsonObject { return { method: "item/started", diff --git a/services/discordbot/src/types.ts b/services/discordbot/src/types.ts index dd9ecbb9..d490d51f 100644 --- a/services/discordbot/src/types.ts +++ b/services/discordbot/src/types.ts @@ -103,7 +103,7 @@ export type DiscordbotOptions = { recoverRenderObligationsOnStart?: boolean; state?: StateAdapter; stateKeyPrefix?: string; - /** Placeholder posted while the agent works, before streamed content. Defaults to "✨ thinking...". */ + /** Initial text of the progress message posted while the agent works. Defaults to "✨ thinking...". */ streamingPlaceholderText?: string; userName?: string; }; diff --git a/services/discordbot/src/utils.ts b/services/discordbot/src/utils.ts index 20d5c8b9..d7f6815a 100644 --- a/services/discordbot/src/utils.ts +++ b/services/discordbot/src/utils.ts @@ -59,3 +59,41 @@ export async function* toAsyncIterable( yield item; } } + +/** + * Single-consumer async queue bridging a producer loop to an AsyncIterable + * consumer (e.g. the chat SDK's streaming post). push() never blocks; end() + * lets the consumer drain the remaining items and finish. + */ +export class AsyncTextQueue implements AsyncIterable { + private readonly values: string[] = []; + private done = false; + private wake: (() => void) | null = null; + + push(value: string): void { + this.values.push(value); + this.wake?.(); + } + + end(): void { + this.done = true; + this.wake?.(); + } + + async *[Symbol.asyncIterator](): AsyncIterator { + while (true) { + const value = this.values.shift(); + if (value !== undefined) { + yield value; + continue; + } + if (this.done) return; + await new Promise((resolve) => { + this.wake = () => { + this.wake = null; + resolve(); + }; + }); + } + } +} diff --git a/services/discordbot/test/discord-progress.test.ts b/services/discordbot/test/discord-progress.test.ts new file mode 100644 index 00000000..75bbe03a --- /dev/null +++ b/services/discordbot/test/discord-progress.test.ts @@ -0,0 +1,284 @@ +import { describe, expect, it } from "bun:test"; +import type { Logger, Thread } from "chat"; +import { + DiscordProgressMessage, + DiscordProgressTimeline, + isCommandExecutionTask, +} from "../src/discord-progress"; + +const silentLogger: Logger = { + debug: () => undefined, + info: () => undefined, + warn: () => undefined, + error: () => undefined, + child: () => silentLogger, +}; + +function task(input: { + id: string; + title: string; + status?: "pending" | "in_progress" | "complete" | "error"; + details?: string; +}): { + type: "task_update"; + id: string; + title: string; + status: "pending" | "in_progress" | "complete" | "error"; + details?: string; +} { + return { + type: "task_update", + id: input.id, + title: input.title, + status: input.status ?? "in_progress", + ...(input.details ? { details: input.details } : {}), + }; +} + +describe("DiscordProgressTimeline", () => { + it("renders just the placeholder before any steps", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + expect(timeline.render()).toBe("✨ thinking..."); + }); + + it("renders steps with status emoji and updates them in place by id", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.update(task({ id: "t1", title: "Searching documents" })); + expect(timeline.render()).toBe("✨ thinking...\n\n⏳ Searching documents"); + + timeline.update( + task({ id: "t1", title: "Searching documents", status: "complete" }), + ); + expect(timeline.render()).toBe("✨ thinking...\n\n✅ Searching documents"); + }); + + it("merges consecutive Thinking updates into one step with a quoted snippet", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.update( + task({ id: "reasoning-1", title: "Thinking", details: "Comparing the " }), + ); + timeline.update( + task({ + id: "reasoning-2", + title: "Thinking", + details: "deploy manifests", + }), + ); + + expect(timeline.render()).toBe( + "✨ thinking...\n\n⏳ Thinking\n> Comparing the deploy manifests", + ); + }); + + it("starts a fresh Thinking step after another task and quotes only the latest", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.update( + task({ + id: "reasoning-1", + title: "Thinking", + status: "complete", + details: "First thought", + }), + ); + timeline.update( + task({ id: "cmd-1", title: "Command execution", status: "complete" }), + ); + timeline.update( + task({ id: "reasoning-2", title: "Thinking", details: "Second thought" }), + ); + + expect(timeline.render()).toBe( + [ + "✨ thinking...", + "", + "✅ Thinking", + "✅ Command execution", + "⏳ Thinking", + "> Second thought", + ].join("\n"), + ); + }); + + it("shows a one-line command preview from fenced details", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.update( + task({ + id: "cmd-1", + title: "Command execution (1)", + details: "```sh\ngit log --oneline\n```", + }), + ); + expect(timeline.render()).toBe( + "✨ thinking...\n\n⏳ Command execution (1): `git log --oneline`", + ); + }); + + it("renders the latest plan title under the header", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.update({ type: "plan_update", title: "Investigate the bug" }); + timeline.update(task({ id: "t1", title: "Thinking" })); + expect(timeline.render()).toBe( + "✨ thinking...\n*Investigate the bug*\n\n⏳ Thinking", + ); + }); + + it("drops oldest steps and counts them once the message would overflow", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + for (let index = 0; index < 60; index++) { + timeline.update( + task({ + id: `t${index}`, + title: `Step ${index} ${"x".repeat(60)}`, + status: "complete", + }), + ); + } + const content = timeline.render(); + expect(content.length).toBeLessThanOrEqual(1_900); + expect(content).toMatch(/\*… \d+ earlier steps\*/); + expect(content).toContain("Step 59"); + expect(content).not.toContain("Step 0 "); + }); + + it("finish('done') completes open steps and flips the header", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.update(task({ id: "t1", title: "Thinking" })); + timeline.finish("done", 42_000); + expect(timeline.render()).toBe("✅ **Done** · 42s\n\n✅ Thinking"); + }); + + it("finish('done') downgrades to failed when a step errored", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.update( + task({ id: "t1", title: "Execution failed", status: "error" }), + ); + timeline.finish("done", 90_500); + expect(timeline.render()).toBe( + "❌ **Failed** · 1m 31s\n\n❌ Execution failed", + ); + }); + + it("finish('retrying') keeps step statuses as they were", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.update(task({ id: "t1", title: "Thinking" })); + timeline.finish("retrying", 5_000); + expect(timeline.render()).toBe( + "🔁 **Stream interrupted — retrying...**\n\n⏳ Thinking", + ); + }); + + it("ignores updates after finish", () => { + const timeline = new DiscordProgressTimeline("✨ thinking..."); + timeline.finish("done", 1_000); + timeline.update(task({ id: "t1", title: "Thinking" })); + expect(timeline.render()).toBe("✅ **Done** · 1s"); + }); +}); + +type FakeCall = + | { kind: "post"; threadId: string; message: unknown } + | { kind: "edit"; threadId: string; messageId: string; message: unknown }; + +function fakeThread(input?: { failEdits?: boolean }): { + thread: Thread; + calls: FakeCall[]; +} { + const calls: FakeCall[] = []; + const adapter = { + postMessage: async (threadId: string, message: unknown) => { + calls.push({ kind: "post", threadId, message }); + return { id: "m1", raw: {}, threadId }; + }, + editMessage: async ( + threadId: string, + messageId: string, + message: unknown, + ) => { + if (input?.failEdits) throw new Error("edit failed"); + calls.push({ kind: "edit", threadId, messageId, message }); + return { id: messageId, raw: {}, threadId }; + }, + }; + return { + thread: { id: "thread-1", adapter } as unknown as Thread, + calls, + }; +} + +describe("DiscordProgressMessage", () => { + it("posts the placeholder immediately and finalizes via edit", async () => { + const { thread, calls } = fakeThread(); + const progress = await DiscordProgressMessage.post(thread, { + editIntervalMs: 1, + logger: silentLogger, + placeholderText: "✨ thinking...", + }); + expect(calls).toEqual([ + { kind: "post", threadId: "thread-1", message: "✨ thinking..." }, + ]); + + progress.update(task({ id: "t1", title: "Thinking" })); + await progress.finish("done"); + + const edits = calls.filter((call) => call.kind === "edit"); + expect(edits.length).toBeGreaterThanOrEqual(1); + const lastEdit = edits.at(-1); + expect(lastEdit?.messageId).toBe("m1"); + expect(lastEdit?.message).toEqual({ + markdown: expect.stringContaining("✅ **Done**"), + }); + }); + + it("throttles edits to one per interval", async () => { + const { thread, calls } = fakeThread(); + const progress = await DiscordProgressMessage.post(thread, { + editIntervalMs: 30, + logger: silentLogger, + placeholderText: "✨ thinking...", + }); + for (let index = 0; index < 10; index++) { + progress.update(task({ id: `t${index}`, title: `Step ${index}` })); + } + await new Promise((resolve) => setTimeout(resolve, 45)); + const editsBeforeFinish = calls.filter((call) => call.kind === "edit"); + expect(editsBeforeFinish).toHaveLength(1); + await progress.finish("done"); + }); + + it("swallows edit failures", async () => { + const { thread } = fakeThread({ failEdits: true }); + const progress = await DiscordProgressMessage.post(thread, { + editIntervalMs: 1, + logger: silentLogger, + placeholderText: "✨ thinking...", + }); + progress.update(task({ id: "t1", title: "Thinking" })); + await expect(progress.finish("done")).resolves.toBeUndefined(); + }); + + it("ignores updates after finish", async () => { + const { thread, calls } = fakeThread(); + const progress = await DiscordProgressMessage.post(thread, { + editIntervalMs: 1, + logger: silentLogger, + placeholderText: "✨ thinking...", + }); + await progress.finish("done"); + const editCount = calls.filter((call) => call.kind === "edit").length; + progress.update(task({ id: "t1", title: "Thinking" })); + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(calls.filter((call) => call.kind === "edit")).toHaveLength( + editCount, + ); + }); +}); + +describe("isCommandExecutionTask", () => { + it("matches call_ ids and command execution titles", () => { + expect(isCommandExecutionTask({ id: "call_1", title: "Tool" })).toBe(true); + expect( + isCommandExecutionTask({ id: "x", title: "Command execution (2)" }), + ).toBe(true); + expect(isCommandExecutionTask({ id: "x", title: "Thinking" })).toBe(false); + }); +}); From 1e1b20363cb2e77423b29dd1f17bb966e6d79749 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Sun, 7 Jun 2026 08:17:43 -0600 Subject: [PATCH 21/25] feat(discordbot): narrate runs with reactions + reasoning messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The edited step timeline felt heavy-handed in practice. Replace it with fully append-only narration: the triggering message gets an instant 👀 reaction (flipped to ✅/❌ on settle), and the agent's reasoning posts as its own italic messages as each thought completes — commands and tools render nothing. Reactions go through raw Discord REST since the adapter can't reach thread-starter messages, which live in the parent channel. Co-Authored-By: Claude Opus 4.8 --- services/discordbot/README.md | 11 +- services/discordbot/src/discord-narrator.ts | 260 ++++++++++++ services/discordbot/src/discord-progress.ts | 321 -------------- services/discordbot/src/index.ts | 66 ++- services/discordbot/src/session-api.ts | 6 +- services/discordbot/src/types.ts | 2 - .../discordbot/test/discord-narrator.test.ts | 400 ++++++++++++++++++ .../discordbot/test/discord-progress.test.ts | 284 ------------- 8 files changed, 701 insertions(+), 649 deletions(-) create mode 100644 services/discordbot/src/discord-narrator.ts delete mode 100644 services/discordbot/src/discord-progress.ts create mode 100644 services/discordbot/test/discord-narrator.test.ts delete mode 100644 services/discordbot/test/discord-progress.test.ts diff --git a/services/discordbot/README.md b/services/discordbot/README.md index df4d44df..539b7a4a 100644 --- a/services/discordbot/README.md +++ b/services/discordbot/README.md @@ -12,11 +12,12 @@ unchanged (`discord:…` thread keys flow through identically). keyed by the new thread (`discord:{guild}:{channel}:{threadId}`). - **`@`-mention inside an existing thread** → the bot answers in that thread. - **Follow-ups inside an active thread** append to the same session without a re-mention. -- **Two-message replies**: a run posts an instant "✨ thinking..." **progress message** that is - edited in place with a step timeline (reasoning excerpts, commands, tool calls) and finalized - (`✅ Done · 42s`) — never overwritten or deleted. The **answer** streams into a separate message +- **Append-only narration**: a run instantly reacts 👀 on the triggering message, then posts the + agent's reasoning blurbs as their own *italic* messages as each thought completes (commands/tools + are not rendered — they just end a thought). The **answer** streams into a separate message created when the first answer text arrives, so it lands at the bottom of the thread even when - users chime in mid-run. + users chime in mid-run. On settle the 👀 flips to ✅ (or ❌); no bot message is ever edited or + deleted. ## Ingress model @@ -60,7 +61,7 @@ DMs are denied unconditionally (DM intents are not requested). apply for it; below that it's a toggle.) 4. **Invite the bot** (OAuth2 → URL Generator) with scope `bot` and permissions: _View Channels_, _Send Messages_, _Send Messages in Threads_, **Create Public Threads**, - _Embed Links_, _Read Message History_. + _Embed Links_, _Read Message History_, _Add Reactions_ (the 👀/✅ run-status indicator). 5. Set `DISCORDBOT_GUILD_ALLOWLIST` to the server(s) you invited it to — the bot is **inert** until this is set. diff --git a/services/discordbot/src/discord-narrator.ts b/services/discordbot/src/discord-narrator.ts new file mode 100644 index 00000000..ccfebb4c --- /dev/null +++ b/services/discordbot/src/discord-narrator.ts @@ -0,0 +1,260 @@ +import type { ChatSDKStreamChunk } from "@centaur/rendering"; +import type { Logger, Thread } from "chat"; +import { parseDiscordThreadKey } from "./discord-allowlist"; +import { DEFAULT_DISCORD_API_URL } from "./discord-threading"; +import type { DiscordbotApiMessage, DiscordbotOptions } from "./types"; +import { errorMessage, nowMs } from "./utils"; + +export type DiscordNarratorChunk = Exclude< + ChatSDKStreamChunk, + { type: "markdown_text" } +>; +type DiscordTaskChunk = Extract; + +/** Terminal state the run's reaction settles into. */ +export type DiscordNarratorOutcome = "done" | "failed" | "retrying"; + +const REACTION_WORKING = "👀"; +const REACTION_DONE = "✅"; +const REACTION_FAILED = "❌"; + +// Discord caps message content at 2000 chars; headroom keeps every post safe. +const NARRATOR_MESSAGE_MAX_CHARS = 1_900; +// A single blurb is truncated to this, and a thought still pending at this size +// is flushed early so long reasoning doesn't sit invisible for the whole run. +const NARRATOR_BLURB_MAX_CHARS = 600; +// Thoughts that complete within this window merge into one message; also keeps +// posts well under Discord's per-channel message budget. +const NARRATOR_MIN_POST_GAP_MS = 1_500; +// Runaway runs stop narrating past this many posted messages. +const NARRATOR_MAX_POSTS = 12; +// Fragments shorter than this aren't worth a message of their own. +const NARRATOR_MIN_BLURB_CHARS = 12; + +export type DiscordNarratorOptions = { + logger: Logger; + maxPosts?: number; + minPostGapMs?: number; +}; + +/** + * The Discord-side chain-of-thought surface, fully append-only: the triggering + * message gets an instant 👀 reaction while the agent works, the agent's + * reasoning blurbs post as their own italic messages as each thought completes, + * and on settle the 👀 is swapped for ✅ (or ❌). No bot message is ever edited + * or deleted. Commands, tools, and plan updates are not rendered; they just + * mark where a thought ends. + * + * Reactions go through the raw Discord REST API rather than the adapter: a + * thread-starter message lives in the PARENT channel (same delta that + * motivates discord-starter.ts), while the adapter always routes reactions to + * the thread. + */ +export class DiscordNarrator { + private readonly thread: Thread; + private readonly botOptions: DiscordbotOptions; + private readonly logger: Logger; + private readonly minPostGapMs: number; + private readonly maxPosts: number; + private readonly reactionChannelId: string | undefined; + private readonly reactionMessageId: string; + // Current thought, keyed by chunk id: reasoning deltas have unique ids and + // concatenate; a commentary item re-uses its id and replaces its body. + private pendingParts = new Map(); + private queuedBlurbs: string[] = []; + private postedCount = 0; + private droppedBlurbs = 0; + private lastPostAtMs = 0; + private sawError = false; + private timer: ReturnType | null = null; + private chain: Promise = Promise.resolve(); + private finished = false; + + private constructor( + thread: Thread, + message: DiscordbotApiMessage, + botOptions: DiscordbotOptions, + options: DiscordNarratorOptions, + ) { + this.thread = thread; + this.botOptions = botOptions; + this.logger = options.logger; + this.minPostGapMs = options.minPostGapMs ?? NARRATOR_MIN_POST_GAP_MS; + this.maxPosts = options.maxPosts ?? NARRATOR_MAX_POSTS; + const { channelId, threadId } = parseDiscordThreadKey(thread.id); + // A thread-starter message (id == thread id) lives in the parent channel; + // anything else lives in the thread itself. + this.reactionChannelId = + message.id === threadId ? channelId : (threadId ?? channelId); + this.reactionMessageId = message.id; + } + + /** Adds the 👀 working reaction (best-effort) and returns the narrator. */ + static start( + thread: Thread, + message: DiscordbotApiMessage, + botOptions: DiscordbotOptions, + options: DiscordNarratorOptions, + ): DiscordNarrator { + const narrator = new DiscordNarrator(thread, message, botOptions, options); + narrator.enqueueReaction("PUT", REACTION_WORKING); + return narrator; + } + + update(chunk: DiscordNarratorChunk): void { + if (this.finished) return; + if (chunk.type !== "task_update") return; + if (chunk.status === "error") this.sawError = true; + if (chunk.title === "Thinking") { + if (chunk.details) this.pendingParts.set(chunk.id, chunk.details); + if ( + chunk.status === "complete" || + this.pendingText().length >= NARRATOR_BLURB_MAX_CHARS + ) { + this.flushPending(); + } + return; + } + // Any other task means the model moved on — the current thought is over. + this.flushPending(); + } + + /** + * Posts any remaining thought, then settles the reaction: ✅ on success, + * ❌ on failure, and 👀 stays put for "retrying" (the retry attempt re-adds + * it; the PUT is idempotent). Never throws — narration is cosmetic. A "done" + * outcome downgrades to "failed" when an error task was seen (the renderer + * surfaces in-stream failures as error tasks, not throws). + */ + async finish(outcome: DiscordNarratorOutcome): Promise { + if (this.finished) return; + this.finished = true; + if (this.timer) { + clearTimeout(this.timer); + this.timer = null; + } + this.flushPendingText(); + this.enqueueBlurbPost(); + const failed = + outcome === "failed" || (outcome === "done" && this.sawError); + if (outcome !== "retrying") { + // Add the settled reaction before clearing 👀 so the message always + // carries an indicator. + this.enqueueReaction("PUT", failed ? REACTION_FAILED : REACTION_DONE); + this.enqueueReaction("DELETE", REACTION_WORKING); + } + await this.chain; + if (this.droppedBlurbs) { + this.logger.debug("discordbot_narrator_blurbs_dropped", { + dropped: this.droppedBlurbs, + }); + } + } + + private pendingText(): string { + return Array.from(this.pendingParts.values()).join("").trim(); + } + + private flushPending(): void { + this.flushPendingText(); + this.schedulePost(); + } + + private flushPendingText(): void { + const text = this.pendingText(); + this.pendingParts = new Map(); + if (text.length < NARRATOR_MIN_BLURB_CHARS) return; + this.queuedBlurbs.push(truncateBlurb(text)); + } + + private schedulePost(): void { + if (this.timer || !this.queuedBlurbs.length) return; + const delayMs = Math.max( + 0, + this.minPostGapMs - (nowMs() - this.lastPostAtMs), + ); + this.timer = setTimeout(() => { + this.timer = null; + this.enqueueBlurbPost(); + }, delayMs); + } + + private enqueueBlurbPost(): void { + if (!this.queuedBlurbs.length) return; + if (this.postedCount >= this.maxPosts) { + this.droppedBlurbs += this.queuedBlurbs.length; + this.queuedBlurbs = []; + return; + } + const blurbs = this.queuedBlurbs; + this.queuedBlurbs = []; + this.postedCount += 1; + this.lastPostAtMs = nowMs(); + const content = clipMessage( + blurbs.map((blurb) => italicize(blurb)).join("\n\n"), + ); + this.chain = this.chain.then(async () => { + try { + await this.thread.adapter.postMessage(this.thread.id, { + markdown: content, + }); + } catch (error) { + this.logger.warn("discordbot_narrator_post_failed", { + error: errorMessage(error), + }); + } + }); + } + + private enqueueReaction(method: "PUT" | "DELETE", emoji: string): void { + const channelId = this.reactionChannelId; + if (!channelId) return; + this.chain = this.chain.then(async () => { + try { + const fetchFn = this.botOptions.fetch ?? fetch; + const apiBase = ( + this.botOptions.discordApiUrl ?? DEFAULT_DISCORD_API_URL + ).replace(/\/$/, ""); + const response = await fetchFn( + `${apiBase}/channels/${channelId}/messages/${this.reactionMessageId}/reactions/${encodeURIComponent(emoji)}/@me`, + { + method, + headers: { authorization: `Bot ${this.botOptions.botToken}` }, + }, + ); + if (!response.ok) { + this.logger.warn("discordbot_narrator_reaction_failed", { + emoji, + method, + status: response.status, + }); + } + } catch (error) { + this.logger.warn("discordbot_narrator_reaction_error", { + emoji, + method, + error: errorMessage(error), + }); + } + }); + } +} + +/** Discord italics don't span newlines, so wrap each non-empty line. */ +function italicize(text: string): string { + return text + .split("\n") + .map((line) => (line.trim() ? `*${line.trim()}*` : "")) + .join("\n") + .replace(/\n{3,}/g, "\n\n"); +} + +function truncateBlurb(text: string): string { + if (text.length <= NARRATOR_BLURB_MAX_CHARS) return text; + return `${text.slice(0, NARRATOR_BLURB_MAX_CHARS - 1).trimEnd()}…`; +} + +function clipMessage(content: string): string { + if (content.length <= NARRATOR_MESSAGE_MAX_CHARS) return content; + return content.slice(0, NARRATOR_MESSAGE_MAX_CHARS); +} diff --git a/services/discordbot/src/discord-progress.ts b/services/discordbot/src/discord-progress.ts deleted file mode 100644 index bd042800..00000000 --- a/services/discordbot/src/discord-progress.ts +++ /dev/null @@ -1,321 +0,0 @@ -import type { ChatSDKStreamChunk } from "@centaur/rendering"; -import type { Logger, Thread } from "chat"; -import { elapsedMs, errorMessage, nowMs } from "./utils"; - -export type DiscordProgressChunk = Exclude< - ChatSDKStreamChunk, - { type: "markdown_text" } ->; -type DiscordTaskChunk = Extract; -type DiscordTaskStatus = DiscordTaskChunk["status"]; - -/** Terminal state the progress message settles into. */ -export type DiscordProgressOutcome = "done" | "failed" | "retrying"; - -// Discord caps message content at 2000 chars; headroom keeps every edit safe. -const PROGRESS_MESSAGE_MAX_CHARS = 1_900; -const PROGRESS_SNIPPET_MAX_CHARS = 280; -const PROGRESS_COMMAND_MAX_CHARS = 64; -const PROGRESS_TITLE_MAX_CHARS = 80; -// Steps beyond this fall off the front of the model and render as "… earlier steps". -const PROGRESS_MAX_TRACKED_STEPS = 120; -// Stay well below Discord's ~5 edits/5s per-channel budget; the answer message -// streams its own post+edit cycle in parallel with these edits. -const PROGRESS_EDIT_INTERVAL_MS = 1_500; - -type ProgressStep = { - command?: string; - snippetParts: Map; - status: DiscordTaskStatus; - title: string; -}; - -/** - * Pure model + renderer for the progress message: an ordered step timeline built - * from the renderer's task/plan updates, with the latest reasoning excerpt quoted - * under the current "Thinking" step. - */ -export class DiscordProgressTimeline { - private readonly placeholderText: string; - private readonly steps: ProgressStep[] = []; - private readonly stepById = new Map(); - private planTitle: string | null = null; - private droppedSteps = 0; - private outcome: DiscordProgressOutcome | null = null; - private elapsedLabel = ""; - - constructor(placeholderText: string) { - this.placeholderText = placeholderText; - } - - update(chunk: DiscordProgressChunk): void { - if (this.outcome) return; - if (chunk.type === "plan_update") { - this.planTitle = oneLine(chunk.title, PROGRESS_TITLE_MAX_CHARS); - return; - } - const existing = this.stepById.get(chunk.id); - if (existing) { - existing.status = chunk.status; - captureDetails(existing, chunk); - return; - } - const last = this.steps.at(-1); - if (chunk.title === "Thinking" && last?.title === "Thinking") { - // Reasoning deltas arrive as one task per delta; consecutive Thinking - // updates merge into a single step so the timeline reads as one thought. - // A command or tool call in between starts a fresh Thinking step. - this.stepById.set(chunk.id, last); - last.status = chunk.status; - captureDetails(last, chunk); - return; - } - const step: ProgressStep = { - snippetParts: new Map(), - status: chunk.status, - title: oneLine(chunk.title, PROGRESS_TITLE_MAX_CHARS), - }; - captureDetails(step, chunk); - this.steps.push(step); - this.stepById.set(chunk.id, step); - if (this.steps.length > PROGRESS_MAX_TRACKED_STEPS) { - this.steps.shift(); - this.droppedSteps += 1; - } - } - - /** - * Settles the timeline. A "done" outcome downgrades to "failed" when any step - * errored (the renderer surfaces in-stream failures as error tasks, not throws). - * "retrying" leaves step statuses alone — a fresh progress message follows. - */ - finish(outcome: DiscordProgressOutcome, runElapsedMs: number): void { - if (this.outcome) return; - const failed = - outcome === "failed" || (outcome === "done" && this.hasError()); - this.outcome = failed ? "failed" : outcome; - this.elapsedLabel = formatDuration(runElapsedMs); - if (this.outcome === "retrying") return; - const settledStatus: DiscordTaskStatus = - this.outcome === "failed" ? "error" : "complete"; - for (const step of this.steps) { - if (step.status === "in_progress" || step.status === "pending") { - step.status = settledStatus; - } - } - } - - render(): string { - const headLines = [this.headerLine()]; - if (this.planTitle) headLines.push(`*${this.planTitle}*`); - const blocks = this.steps.map((step, index) => - stepBlock(step, index === this.steps.length - 1), - ); - let omitted = this.droppedSteps; - const compose = (): string => { - const lines = [...headLines]; - if (blocks.length || omitted) lines.push(""); - if (omitted) { - lines.push(`*… ${omitted} earlier step${omitted === 1 ? "" : "s"}*`); - } - lines.push(...blocks); - return lines.join("\n"); - }; - let content = compose(); - while (content.length > PROGRESS_MESSAGE_MAX_CHARS && blocks.length > 1) { - blocks.shift(); - omitted += 1; - content = compose(); - } - return content.length > PROGRESS_MESSAGE_MAX_CHARS - ? content.slice(0, PROGRESS_MESSAGE_MAX_CHARS) - : content; - } - - private headerLine(): string { - if (this.outcome === "done") return `✅ **Done** · ${this.elapsedLabel}`; - if (this.outcome === "failed") - return `❌ **Failed** · ${this.elapsedLabel}`; - if (this.outcome === "retrying") { - return "🔁 **Stream interrupted — retrying...**"; - } - return this.placeholderText; - } - - private hasError(): boolean { - return this.steps.some((step) => step.status === "error"); - } -} - -export type DiscordProgressMessageOptions = { - editIntervalMs?: number; - logger: Logger; - placeholderText: string; -}; - -/** - * The Discord-side chain-of-thought surface: a message posted the instant a run - * starts, edited in place (throttled) as task/reasoning updates arrive, and - * finalized — never deleted — when the run settles. The final answer streams into - * a separate message, so this one stays put in the timeline as a record even when - * new user messages arrive mid-run. - */ -export class DiscordProgressMessage { - private readonly thread: Thread; - private readonly timeline: DiscordProgressTimeline; - private readonly intervalMs: number; - private readonly logger: Logger; - private readonly messageId: string; - private readonly editThreadId: string; - private readonly startedAtMs = nowMs(); - private lastContent: string; - private timer: ReturnType | null = null; - private edits: Promise = Promise.resolve(); - private finished = false; - - private constructor( - thread: Thread, - messageId: string, - editThreadId: string, - options: DiscordProgressMessageOptions, - ) { - this.thread = thread; - this.timeline = new DiscordProgressTimeline(options.placeholderText); - this.intervalMs = options.editIntervalMs ?? PROGRESS_EDIT_INTERVAL_MS; - this.logger = options.logger; - this.messageId = messageId; - this.editThreadId = editThreadId; - this.lastContent = options.placeholderText; - } - - static async post( - thread: Thread, - options: DiscordProgressMessageOptions, - ): Promise { - const raw = await thread.adapter.postMessage( - thread.id, - options.placeholderText, - ); - return new DiscordProgressMessage( - thread, - raw.id, - raw.threadId || thread.id, - options, - ); - } - - update(chunk: DiscordProgressChunk): void { - if (this.finished) return; - this.timeline.update(chunk); - if (this.timer) return; - this.timer = setTimeout(() => { - this.timer = null; - this.enqueueEdit(); - }, this.intervalMs); - } - - /** Applies the final edit. Never throws — progress rendering is cosmetic. */ - async finish(outcome: DiscordProgressOutcome): Promise { - if (this.finished) return; - this.finished = true; - if (this.timer) { - clearTimeout(this.timer); - this.timer = null; - } - this.timeline.finish(outcome, elapsedMs(this.startedAtMs)); - this.enqueueEdit(); - await this.edits; - } - - private enqueueEdit(): void { - this.edits = this.edits.then(() => this.editNow()); - } - - private async editNow(): Promise { - const content = this.timeline.render(); - if (content === this.lastContent) return; - try { - await this.thread.adapter.editMessage(this.editThreadId, this.messageId, { - markdown: content, - }); - this.lastContent = content; - } catch (error) { - this.logger.warn("discordbot_progress_edit_failed", { - error: errorMessage(error), - }); - } - } -} - -export function isCommandExecutionTask(chunk: { - id: string; - title: string; -}): boolean { - return ( - chunk.id.startsWith("call_") || - chunk.title.toLowerCase().includes("command execution") - ); -} - -function captureDetails(step: ProgressStep, chunk: DiscordTaskChunk): void { - if (!chunk.details) return; - if (step.title === "Thinking") { - step.snippetParts.set(chunk.id, chunk.details); - return; - } - if (!step.command && isCommandExecutionTask(chunk)) { - step.command = commandPreview(chunk.details); - } -} - -function stepBlock(step: ProgressStep, isLatest: boolean): string { - const command = step.command ? `: \`${step.command}\`` : ""; - const line = `${statusEmoji(step.status)} ${step.title}${command}`; - if (!isLatest || step.title !== "Thinking") return line; - const snippet = snippetText(step); - if (!snippet) return line; - const quoted = snippet - .split("\n") - .map((snippetLine) => `> ${snippetLine}`) - .join("\n"); - return `${line}\n${quoted}`; -} - -function snippetText(step: ProgressStep): string { - const text = Array.from(step.snippetParts.values()) - .join("") - .replace(/\n{3,}/g, "\n\n") - .trim(); - if (!text) return ""; - if (text.length <= PROGRESS_SNIPPET_MAX_CHARS) return text; - return `…${text.slice(-PROGRESS_SNIPPET_MAX_CHARS)}`; -} - -function statusEmoji(status: DiscordTaskStatus): string { - if (status === "complete") return "✅"; - if (status === "error") return "❌"; - return "⏳"; -} - -/** First command line out of the renderer's fenced details block, one-lined. */ -function commandPreview(details: string): string | undefined { - const line = details - .split("\n") - .map((detailsLine) => detailsLine.trim()) - .find((detailsLine) => detailsLine && !detailsLine.startsWith("```")); - if (!line) return undefined; - return oneLine(line.replaceAll("`", "'"), PROGRESS_COMMAND_MAX_CHARS); -} - -function oneLine(value: string, maxChars: number): string { - const collapsed = value.replace(/\s+/g, " ").trim(); - if (collapsed.length <= maxChars) return collapsed; - return `${collapsed.slice(0, maxChars - 1).trimEnd()}…`; -} - -function formatDuration(ms: number): string { - const totalSeconds = Math.max(1, Math.round(ms / 1000)); - const minutes = Math.floor(totalSeconds / 60); - const seconds = totalSeconds % 60; - return minutes ? `${minutes}m ${seconds}s` : `${seconds}s`; -} diff --git a/services/discordbot/src/index.ts b/services/discordbot/src/index.ts index 012f7bc1..e4d52cb6 100644 --- a/services/discordbot/src/index.ts +++ b/services/discordbot/src/index.ts @@ -22,7 +22,7 @@ import { isAllowedDiscordMessage, isGuildAllowlistEmpty, } from "./discord-allowlist"; -import { DiscordProgressMessage } from "./discord-progress"; +import { DiscordNarrator } from "./discord-narrator"; import { fetchThreadStarterMessage } from "./discord-starter"; import { deriveThreadName, renameThreadFromMessage } from "./discord-threading"; import { @@ -79,7 +79,6 @@ const RENDER_INDEX_TTL_MS = 30 * 24 * 60 * 60 * 1000; const RENDER_RECOVERY_LEASE_TTL_MS = 2 * 60 * 1000; const RENDER_RETRY_INITIAL_DELAY_MS = 250; const RENDER_RETRY_MAX_DELAY_MS = 5_000; -const PROGRESS_PLACEHOLDER_TEXT = "✨ thinking..."; // Discord caps message content at 2000 chars; leave headroom so the honest // "[truncated ...]" suffix lands instead of the adapter's silent "..." cut. const DISCORD_FALLBACK_TEXT_MAX_CHARS = 1_900; @@ -110,9 +109,9 @@ export function createDiscordbot(options: DiscordbotOptions): Discordbot { userName, adapters: { discord }, state, - // No SDK-level streaming placeholder: instant feedback comes from the - // progress message renderExecutionStream posts itself, and the final answer - // must land as a NEW message at the bottom of the timeline — with null, the + // No SDK-level streaming placeholder: instant feedback is the 👀 reaction + // the narrator puts on the triggering message, and the final answer must + // land as a NEW message at the bottom of the timeline — with null, the // SDK's post+edit fallback only creates the answer message once the first // visible answer text arrives. fallbackStreamingPlaceholderText: null, @@ -376,8 +375,8 @@ async function syncThreadMessageToSession( ); // Create + append the session message only (fast). The execute call blocks // ~9s on cold sandbox spin-up (incl. the tool-server sidecar), so it's run - // inside the render stream below — after the "✨ thinking..." progress - // message is posted — instead of before it. executeSession is idempotent + // inside the render stream below — after the 👀 working reaction lands — + // instead of before it. executeSession is idempotent // (idempotency_key = message id), so a render retry won't re-spawn. await forwardToSessionApi( input.options, @@ -733,8 +732,8 @@ async function* streamOpenedSession( stream: AsyncIterable, ): AsyncIterable { // Deliberate delta from slackbotv2 (which removed its synthetic starting - // task): the synthetic item seeds the first "⏳ Thinking" step in the - // progress message. + // task): the synthetic item primes the mapper's task state so answer deltas + // stream immediately instead of waiting out the pre-stream grace period. yield startingStreamNotification(input.threadId); for await (const event of stream) yield event; } @@ -767,22 +766,19 @@ async function renderExecutionStream( return; } - // Two-message UX: a progress message posts instantly and is edited in place - // with the step timeline (chain of thought), while the final answer streams - // into a separate message created on first answer text. The progress message - // stays in the timeline as a record instead of being overwritten, and the - // answer lands at the bottom even when users chime in mid-run. - const progress = await DiscordProgressMessage.post(thread, { - logger, - placeholderText: - options.streamingPlaceholderText ?? PROGRESS_PLACEHOLDER_TEXT, - }); + // Append-only narration: an instant 👀 reaction on the triggering message, + // then the agent's reasoning blurbs posted as their own italic messages as + // each thought completes, then the answer streamed into a separate message + // created on first answer text. On settle the 👀 flips to ✅/❌. No bot + // message is ever edited or deleted, so messages keep their place in the + // timeline even when users chime in mid-run. + const narrator = DiscordNarrator.start(thread, message, options, { logger }); const stopTyping = startTypingKeepalive(thread, logger); try { - await renderSplitExecutionStreams(thread, stream, options, progress); - await progress.finish("done"); + await renderSplitExecutionStreams(thread, stream, options, narrator); + await narrator.finish("done"); } catch (error) { - await progress.finish( + await narrator.finish( isRetryableSessionApiError(error) ? "retrying" : "failed", ); throw error; @@ -792,8 +788,8 @@ async function renderExecutionStream( } /** - * Consumes the renderer's chunk stream, routing task/plan updates to the - * progress message and answer text to a separate streamed message. The answer + * Consumes the renderer's chunk stream, routing task updates to the narrator + * (reasoning blurbs) and answer text to a separate streamed message. The answer * post is created lazily on the first visible answer chunk; the chat SDK's * post+edit fallback then streams the rest into it. */ @@ -801,7 +797,7 @@ async function renderSplitExecutionStreams( thread: Thread, stream: AsyncIterable, options: DiscordbotOptions, - progress: DiscordProgressMessage, + narrator: DiscordNarrator, ): Promise { const answerText = new AsyncTextQueue(); let answerPost: Promise | null = null; @@ -816,7 +812,7 @@ async function renderSplitExecutionStreams( answerText.push(chunk.text); continue; } - progress.update(chunk); + narrator.update(chunk); } } catch (error) { sourceFailed = true; @@ -840,8 +836,8 @@ async function renderRecoveredExecutionStream( trace?: DiscordbotTrace, ): Promise { // Recovered renders never rename the thread; naming happens on the initial execution. - // The progress/answer message split (and the plain-text-only branch) comes via - // renderExecutionStream. + // The narration/answer message split (and the plain-text-only branch) comes + // via renderExecutionStream. await renderExecutionStream(thread, stream, message, options, false, trace); } @@ -979,12 +975,14 @@ async function* streamSessionAfterHandoff( execution: DiscordbotExecuteSessionResponse, ) => Promise, ): AsyncIterable { - // The progress message is already posted before this generator is consumed, - // so the user has instant feedback while the cold sandbox (incl. tool-server - // sidecar) spends ~9s spinning up. Execute runs here, inside the render - // stream, so a sandbox-spawn failure surfaces in the same render rather than - // hanging the progress message (api-rs writes no event if the spawn itself - // fails). The synthetic starting item seeds the first "⏳ Thinking" step. + // The 👀 working reaction is already queued before this generator is + // consumed, so the user has instant feedback while the cold sandbox (incl. + // tool-server sidecar) spends ~9s spinning up. Execute runs here, inside the + // render stream, so a sandbox-spawn failure surfaces in the same render + // rather than leaving the run looking alive forever (api-rs writes no event + // if the spawn itself fails). The synthetic starting item primes the + // mapper's task state so answer deltas stream without the pre-stream grace + // delay. yield startingStreamNotification(input.threadId); traceLog(options, "discordbot_stream_heartbeat_emitted", input.trace); diff --git a/services/discordbot/src/session-api.ts b/services/discordbot/src/session-api.ts index 8c899bc7..d1248e93 100644 --- a/services/discordbot/src/session-api.ts +++ b/services/discordbot/src/session-api.ts @@ -180,7 +180,7 @@ export async function forwardToSessionApi( /** * Execute the session turn on its own (start the agent run), returning the * execution. Split out of forwardToSessionApi so the render stream can run it - * AFTER the progress message is posted — the execute call blocks on cold + * AFTER the 👀 working reaction lands — the execute call blocks on cold * sandbox spin-up. Idempotent via the request's idempotency_key, so a render * retry won't re-spawn the sandbox. */ @@ -226,8 +226,8 @@ export async function openSessionEventStream( } // Deliberate delta from slackbotv2 (which removed this entirely): the -// synthetic starting item seeds the first "⏳ Thinking" step in the progress -// message. +// synthetic starting item primes the mapper's task state so answer deltas +// stream immediately instead of waiting out the pre-stream grace period. export function startingStreamNotification(threadId: string): JsonObject { return { method: "item/started", diff --git a/services/discordbot/src/types.ts b/services/discordbot/src/types.ts index d490d51f..6aad350a 100644 --- a/services/discordbot/src/types.ts +++ b/services/discordbot/src/types.ts @@ -103,8 +103,6 @@ export type DiscordbotOptions = { recoverRenderObligationsOnStart?: boolean; state?: StateAdapter; stateKeyPrefix?: string; - /** Initial text of the progress message posted while the agent works. Defaults to "✨ thinking...". */ - streamingPlaceholderText?: string; userName?: string; }; diff --git a/services/discordbot/test/discord-narrator.test.ts b/services/discordbot/test/discord-narrator.test.ts new file mode 100644 index 00000000..187c6194 --- /dev/null +++ b/services/discordbot/test/discord-narrator.test.ts @@ -0,0 +1,400 @@ +import { describe, expect, it } from "bun:test"; +import type { Logger, Thread } from "chat"; +import { DiscordNarrator } from "../src/discord-narrator"; +import type { + DiscordbotApiMessage, + DiscordbotFetch, + DiscordbotOptions, +} from "../src/types"; + +const silentLogger: Logger = { + debug: () => undefined, + info: () => undefined, + warn: () => undefined, + error: () => undefined, + child: () => silentLogger, +}; + +const EYES = encodeURIComponent("👀"); +const CHECK = encodeURIComponent("✅"); +const CROSS = encodeURIComponent("❌"); + +function task(input: { + id: string; + title: string; + status?: "pending" | "in_progress" | "complete" | "error"; + details?: string; +}): { + type: "task_update"; + id: string; + title: string; + status: "pending" | "in_progress" | "complete" | "error"; + details?: string; +} { + return { + type: "task_update", + id: input.id, + title: input.title, + status: input.status ?? "in_progress", + ...(input.details ? { details: input.details } : {}), + }; +} + +function apiMessage( + input?: Partial, +): DiscordbotApiMessage { + return { + attachments: [], + author: { + fullName: "User", + isBot: false, + isMe: false, + userId: "U1", + userName: "user", + }, + id: "M1", + isMention: true, + raw: {}, + text: "hello", + threadId: "discord:G1:C1:T9", + timestamp: "2026-06-07T00:00:00.000Z", + ...input, + }; +} + +type Harness = { + thread: Thread; + message: DiscordbotApiMessage; + botOptions: DiscordbotOptions; + posts: string[]; + reactions: Array<{ method: string; url: string }>; +}; + +function harness(input?: { + threadKey?: string; + messageId?: string; + failPosts?: boolean; + failReactions?: boolean; +}): Harness { + const posts: string[] = []; + const reactions: Array<{ method: string; url: string }> = []; + const threadKey = input?.threadKey ?? "discord:G1:C1:T9"; + const adapter = { + postMessage: async (_threadId: string, message: unknown) => { + if (input?.failPosts) throw new Error("post failed"); + posts.push( + typeof message === "string" + ? message + : String((message as { markdown?: string }).markdown ?? ""), + ); + return { id: `m${posts.length}`, raw: {}, threadId: threadKey }; + }, + }; + const fetchFn = (async (url: RequestInfo | URL, init?: RequestInit) => { + if (input?.failReactions) throw new Error("network down"); + reactions.push({ method: init?.method ?? "GET", url: String(url) }); + return new Response(null, { status: 204 }); + }) as DiscordbotFetch; + return { + thread: { id: threadKey, adapter } as unknown as Thread, + message: apiMessage({ id: input?.messageId ?? "M1", threadId: threadKey }), + botOptions: { + apiUrl: "http://localhost", + applicationId: "app", + botToken: "bot-token", + publicKey: "key", + discordApiUrl: "https://discord.com/api/v10", + fetch: fetchFn, + }, + posts, + reactions, + }; +} + +function startNarrator( + h: Harness, + options?: { minPostGapMs?: number; maxPosts?: number }, +): DiscordNarrator { + return DiscordNarrator.start(h.thread, h.message, h.botOptions, { + logger: silentLogger, + minPostGapMs: options?.minPostGapMs ?? 1, + maxPosts: options?.maxPosts, + }); +} + +describe("DiscordNarrator reactions", () => { + it("adds 👀 to a message inside the thread via the thread channel", async () => { + const h = harness(); + const narrator = startNarrator(h); + await narrator.finish("done"); + + expect(h.reactions[0]).toEqual({ + method: "PUT", + url: `https://discord.com/api/v10/channels/T9/messages/M1/reactions/${EYES}/@me`, + }); + }); + + it("routes a thread-starter message's reaction to the parent channel", async () => { + const h = harness({ messageId: "T9" }); + const narrator = startNarrator(h); + await narrator.finish("done"); + + expect(h.reactions[0]?.url).toBe( + `https://discord.com/api/v10/channels/C1/messages/T9/reactions/${EYES}/@me`, + ); + }); + + it("settles done as ✅ added before 👀 is removed", async () => { + const h = harness(); + const narrator = startNarrator(h); + await narrator.finish("done"); + + expect(h.reactions.map((r) => `${r.method} ${reactionOf(r.url)}`)).toEqual([ + `PUT ${EYES}`, + `PUT ${CHECK}`, + `DELETE ${EYES}`, + ]); + }); + + it("settles as ❌ when an error task was seen", async () => { + const h = harness(); + const narrator = startNarrator(h); + narrator.update( + task({ id: "err-1", title: "Execution failed", status: "error" }), + ); + await narrator.finish("done"); + + expect(h.reactions.map((r) => `${r.method} ${reactionOf(r.url)}`)).toEqual([ + `PUT ${EYES}`, + `PUT ${CROSS}`, + `DELETE ${EYES}`, + ]); + }); + + it("leaves 👀 in place for a retrying outcome", async () => { + const h = harness(); + const narrator = startNarrator(h); + await narrator.finish("retrying"); + + expect(h.reactions.map((r) => `${r.method} ${reactionOf(r.url)}`)).toEqual([ + `PUT ${EYES}`, + ]); + }); + + it("swallows reaction failures", async () => { + const h = harness({ failReactions: true }); + const narrator = startNarrator(h); + await expect(narrator.finish("done")).resolves.toBeUndefined(); + }); +}); + +describe("DiscordNarrator blurbs", () => { + it("coalesces reasoning deltas and posts one italic blurb when the thought completes", async () => { + const h = harness(); + const narrator = startNarrator(h); + narrator.update( + task({ id: "reasoning-1", title: "Thinking", details: "Comparing the " }), + ); + narrator.update( + task({ + id: "reasoning-2", + title: "Thinking", + status: "complete", + details: "deploy manifests against the defaults", + }), + ); + await narrator.finish("done"); + + expect(h.posts).toEqual([ + "*Comparing the deploy manifests against the defaults*", + ]); + }); + + it("flushes the pending thought when the model moves on to a command", async () => { + const h = harness(); + const narrator = startNarrator(h); + narrator.update( + task({ + id: "reasoning-1", + title: "Thinking", + details: "Need to check the recent deploy history first", + }), + ); + narrator.update(task({ id: "cmd-1", title: "Command execution (1)" })); + await narrator.finish("done"); + + expect(h.posts).toEqual([ + "*Need to check the recent deploy history first*", + ]); + }); + + it("never renders commands, tools, or plan updates", async () => { + const h = harness(); + const narrator = startNarrator(h); + narrator.update({ type: "plan_update", title: "Investigate" }); + narrator.update( + task({ id: "cmd-1", title: "Command execution (1)", details: "ls" }), + ); + narrator.update(task({ id: "tool-1", title: "Web search" })); + await narrator.finish("done"); + + expect(h.posts).toEqual([]); + }); + + it("italicizes each line of a multi-line blurb", async () => { + const h = harness(); + const narrator = startNarrator(h); + narrator.update( + task({ + id: "thinking-1", + title: "Thinking", + status: "complete", + details: "First line of thought\n\nSecond line of thought", + }), + ); + await narrator.finish("done"); + + expect(h.posts).toEqual([ + "*First line of thought*\n\n*Second line of thought*", + ]); + }); + + it("skips fragments too short to be worth a message", async () => { + const h = harness(); + const narrator = startNarrator(h); + narrator.update( + task({ + id: "thinking-1", + title: "Thinking", + status: "complete", + details: "Hmm.", + }), + ); + await narrator.finish("done"); + + expect(h.posts).toEqual([]); + }); + + it("merges thoughts that complete within the min post gap into one message", async () => { + const h = harness(); + const narrator = startNarrator(h, { minPostGapMs: 50 }); + narrator.update( + task({ + id: "thinking-1", + title: "Thinking", + status: "complete", + details: "First completed thought here", + }), + ); + narrator.update( + task({ + id: "thinking-2", + title: "Thinking", + status: "complete", + details: "Second completed thought here", + }), + ); + await new Promise((resolve) => setTimeout(resolve, 80)); + await narrator.finish("done"); + + expect(h.posts).toEqual([ + "*First completed thought here*\n\n*Second completed thought here*", + ]); + }); + + it("flushes an oversized pending thought early and truncates it", async () => { + const h = harness(); + const narrator = startNarrator(h); + narrator.update( + task({ id: "reasoning-1", title: "Thinking", details: "x".repeat(700) }), + ); + await narrator.finish("done"); + + expect(h.posts).toHaveLength(1); + expect(h.posts[0]?.length).toBeLessThanOrEqual(610); + expect(h.posts[0]).toEndWith("…*"); + }); + + it("stops posting past the max post cap", async () => { + const h = harness(); + const narrator = startNarrator(h, { maxPosts: 2 }); + for (let index = 0; index < 5; index++) { + narrator.update( + task({ + id: `thinking-${index}`, + title: "Thinking", + status: "complete", + details: `Completed thought number ${index}`, + }), + ); + await new Promise((resolve) => setTimeout(resolve, 5)); + } + await narrator.finish("done"); + + expect(h.posts.length).toBeLessThanOrEqual(2); + }); + + it("posts the pending thought during finish, before settling reactions", async () => { + const h = harness(); + const order: string[] = []; + const originalPost = (h.thread.adapter as { postMessage: unknown }) + .postMessage as (t: string, m: unknown) => Promise; + (h.thread.adapter as { postMessage: unknown }).postMessage = async ( + t: string, + m: unknown, + ) => { + order.push("post"); + return originalPost(t, m); + }; + const narrator = startNarrator(h, { minPostGapMs: 10_000 }); + narrator.update( + task({ + id: "reasoning-1", + title: "Thinking", + details: "A final trailing thought", + }), + ); + await narrator.finish("done"); + + expect(h.posts).toEqual(["*A final trailing thought*"]); + // ✅ lands after the trailing blurb (reactions chain behind posts). + const checkIndex = h.reactions.findIndex((r) => r.url.includes(CHECK)); + expect(checkIndex).toBeGreaterThan(-1); + expect(order).toEqual(["post"]); + }); + + it("ignores updates after finish", async () => { + const h = harness(); + const narrator = startNarrator(h); + await narrator.finish("done"); + narrator.update( + task({ + id: "thinking-1", + title: "Thinking", + status: "complete", + details: "Posthumous thought that should not post", + }), + ); + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(h.posts).toEqual([]); + }); + + it("swallows blurb post failures", async () => { + const h = harness({ failPosts: true }); + const narrator = startNarrator(h); + narrator.update( + task({ + id: "thinking-1", + title: "Thinking", + status: "complete", + details: "A thought that will fail to post", + }), + ); + await expect(narrator.finish("done")).resolves.toBeUndefined(); + }); +}); + +function reactionOf(url: string): string { + const match = url.match(/reactions\/([^/]+)\/@me$/); + return match?.[1] ?? ""; +} diff --git a/services/discordbot/test/discord-progress.test.ts b/services/discordbot/test/discord-progress.test.ts deleted file mode 100644 index 75bbe03a..00000000 --- a/services/discordbot/test/discord-progress.test.ts +++ /dev/null @@ -1,284 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import type { Logger, Thread } from "chat"; -import { - DiscordProgressMessage, - DiscordProgressTimeline, - isCommandExecutionTask, -} from "../src/discord-progress"; - -const silentLogger: Logger = { - debug: () => undefined, - info: () => undefined, - warn: () => undefined, - error: () => undefined, - child: () => silentLogger, -}; - -function task(input: { - id: string; - title: string; - status?: "pending" | "in_progress" | "complete" | "error"; - details?: string; -}): { - type: "task_update"; - id: string; - title: string; - status: "pending" | "in_progress" | "complete" | "error"; - details?: string; -} { - return { - type: "task_update", - id: input.id, - title: input.title, - status: input.status ?? "in_progress", - ...(input.details ? { details: input.details } : {}), - }; -} - -describe("DiscordProgressTimeline", () => { - it("renders just the placeholder before any steps", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - expect(timeline.render()).toBe("✨ thinking..."); - }); - - it("renders steps with status emoji and updates them in place by id", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.update(task({ id: "t1", title: "Searching documents" })); - expect(timeline.render()).toBe("✨ thinking...\n\n⏳ Searching documents"); - - timeline.update( - task({ id: "t1", title: "Searching documents", status: "complete" }), - ); - expect(timeline.render()).toBe("✨ thinking...\n\n✅ Searching documents"); - }); - - it("merges consecutive Thinking updates into one step with a quoted snippet", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.update( - task({ id: "reasoning-1", title: "Thinking", details: "Comparing the " }), - ); - timeline.update( - task({ - id: "reasoning-2", - title: "Thinking", - details: "deploy manifests", - }), - ); - - expect(timeline.render()).toBe( - "✨ thinking...\n\n⏳ Thinking\n> Comparing the deploy manifests", - ); - }); - - it("starts a fresh Thinking step after another task and quotes only the latest", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.update( - task({ - id: "reasoning-1", - title: "Thinking", - status: "complete", - details: "First thought", - }), - ); - timeline.update( - task({ id: "cmd-1", title: "Command execution", status: "complete" }), - ); - timeline.update( - task({ id: "reasoning-2", title: "Thinking", details: "Second thought" }), - ); - - expect(timeline.render()).toBe( - [ - "✨ thinking...", - "", - "✅ Thinking", - "✅ Command execution", - "⏳ Thinking", - "> Second thought", - ].join("\n"), - ); - }); - - it("shows a one-line command preview from fenced details", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.update( - task({ - id: "cmd-1", - title: "Command execution (1)", - details: "```sh\ngit log --oneline\n```", - }), - ); - expect(timeline.render()).toBe( - "✨ thinking...\n\n⏳ Command execution (1): `git log --oneline`", - ); - }); - - it("renders the latest plan title under the header", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.update({ type: "plan_update", title: "Investigate the bug" }); - timeline.update(task({ id: "t1", title: "Thinking" })); - expect(timeline.render()).toBe( - "✨ thinking...\n*Investigate the bug*\n\n⏳ Thinking", - ); - }); - - it("drops oldest steps and counts them once the message would overflow", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - for (let index = 0; index < 60; index++) { - timeline.update( - task({ - id: `t${index}`, - title: `Step ${index} ${"x".repeat(60)}`, - status: "complete", - }), - ); - } - const content = timeline.render(); - expect(content.length).toBeLessThanOrEqual(1_900); - expect(content).toMatch(/\*… \d+ earlier steps\*/); - expect(content).toContain("Step 59"); - expect(content).not.toContain("Step 0 "); - }); - - it("finish('done') completes open steps and flips the header", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.update(task({ id: "t1", title: "Thinking" })); - timeline.finish("done", 42_000); - expect(timeline.render()).toBe("✅ **Done** · 42s\n\n✅ Thinking"); - }); - - it("finish('done') downgrades to failed when a step errored", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.update( - task({ id: "t1", title: "Execution failed", status: "error" }), - ); - timeline.finish("done", 90_500); - expect(timeline.render()).toBe( - "❌ **Failed** · 1m 31s\n\n❌ Execution failed", - ); - }); - - it("finish('retrying') keeps step statuses as they were", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.update(task({ id: "t1", title: "Thinking" })); - timeline.finish("retrying", 5_000); - expect(timeline.render()).toBe( - "🔁 **Stream interrupted — retrying...**\n\n⏳ Thinking", - ); - }); - - it("ignores updates after finish", () => { - const timeline = new DiscordProgressTimeline("✨ thinking..."); - timeline.finish("done", 1_000); - timeline.update(task({ id: "t1", title: "Thinking" })); - expect(timeline.render()).toBe("✅ **Done** · 1s"); - }); -}); - -type FakeCall = - | { kind: "post"; threadId: string; message: unknown } - | { kind: "edit"; threadId: string; messageId: string; message: unknown }; - -function fakeThread(input?: { failEdits?: boolean }): { - thread: Thread; - calls: FakeCall[]; -} { - const calls: FakeCall[] = []; - const adapter = { - postMessage: async (threadId: string, message: unknown) => { - calls.push({ kind: "post", threadId, message }); - return { id: "m1", raw: {}, threadId }; - }, - editMessage: async ( - threadId: string, - messageId: string, - message: unknown, - ) => { - if (input?.failEdits) throw new Error("edit failed"); - calls.push({ kind: "edit", threadId, messageId, message }); - return { id: messageId, raw: {}, threadId }; - }, - }; - return { - thread: { id: "thread-1", adapter } as unknown as Thread, - calls, - }; -} - -describe("DiscordProgressMessage", () => { - it("posts the placeholder immediately and finalizes via edit", async () => { - const { thread, calls } = fakeThread(); - const progress = await DiscordProgressMessage.post(thread, { - editIntervalMs: 1, - logger: silentLogger, - placeholderText: "✨ thinking...", - }); - expect(calls).toEqual([ - { kind: "post", threadId: "thread-1", message: "✨ thinking..." }, - ]); - - progress.update(task({ id: "t1", title: "Thinking" })); - await progress.finish("done"); - - const edits = calls.filter((call) => call.kind === "edit"); - expect(edits.length).toBeGreaterThanOrEqual(1); - const lastEdit = edits.at(-1); - expect(lastEdit?.messageId).toBe("m1"); - expect(lastEdit?.message).toEqual({ - markdown: expect.stringContaining("✅ **Done**"), - }); - }); - - it("throttles edits to one per interval", async () => { - const { thread, calls } = fakeThread(); - const progress = await DiscordProgressMessage.post(thread, { - editIntervalMs: 30, - logger: silentLogger, - placeholderText: "✨ thinking...", - }); - for (let index = 0; index < 10; index++) { - progress.update(task({ id: `t${index}`, title: `Step ${index}` })); - } - await new Promise((resolve) => setTimeout(resolve, 45)); - const editsBeforeFinish = calls.filter((call) => call.kind === "edit"); - expect(editsBeforeFinish).toHaveLength(1); - await progress.finish("done"); - }); - - it("swallows edit failures", async () => { - const { thread } = fakeThread({ failEdits: true }); - const progress = await DiscordProgressMessage.post(thread, { - editIntervalMs: 1, - logger: silentLogger, - placeholderText: "✨ thinking...", - }); - progress.update(task({ id: "t1", title: "Thinking" })); - await expect(progress.finish("done")).resolves.toBeUndefined(); - }); - - it("ignores updates after finish", async () => { - const { thread, calls } = fakeThread(); - const progress = await DiscordProgressMessage.post(thread, { - editIntervalMs: 1, - logger: silentLogger, - placeholderText: "✨ thinking...", - }); - await progress.finish("done"); - const editCount = calls.filter((call) => call.kind === "edit").length; - progress.update(task({ id: "t1", title: "Thinking" })); - await new Promise((resolve) => setTimeout(resolve, 10)); - expect(calls.filter((call) => call.kind === "edit")).toHaveLength( - editCount, - ); - }); -}); - -describe("isCommandExecutionTask", () => { - it("matches call_ ids and command execution titles", () => { - expect(isCommandExecutionTask({ id: "call_1", title: "Tool" })).toBe(true); - expect( - isCommandExecutionTask({ id: "x", title: "Command execution (2)" }), - ).toBe(true); - expect(isCommandExecutionTask({ id: "x", title: "Thinking" })).toBe(false); - }); -}); From 670c7d03913a6f84acef369db2dba8357918579e Mon Sep 17 00:00:00 2001 From: diid <123841842+0xdiid@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:44:14 -0600 Subject: [PATCH 22/25] feat(tools): add read-only CloudWatch tool via iron-proxy aws_auth (#287) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(tools): add read-only CloudWatch tool via iron-proxy aws_auth Add a `cloudwatch` infra tool mirroring the AWS CloudWatch MCP's read-only surface: log groups, filter log events, Logs Insights queries, metrics, and alarms (boto3-backed, JSON-safe responses, lazy client so discovery needs no credentials or network). AWS auth rides iron-proxy's `aws_auth` transform rather than holding real credentials in the tool process. SigV4 can't be swapped on the wire like a bearer token, but iron-proxy re-signs: boto3 signs each request with throwaway placeholder credentials, and iron-proxy reads the region/service from the signature scope and re-signs with the real read-only IAM keys it resolves from the secrets backend. The keys never enter the workload — the SigV4 analogue of the `secrets` placeholder swap. (aws_auth landed in iron-proxy v0.40.0; Centaur pins 0.42.0-rc.2, which includes it.) - tool_manager: AwsAuthSecret type + parser (access_key_id/secret_access_key/ session_token refs, allowed_regions/services, hosts) - proxy_config: render the aws_auth transform; add to _MANAGED_TRANSFORMS - iron-proxy base configs: allowlist x-amz-* so x-amz-target (the CloudWatch operation header) survives egress filtering - cloudwatch tool: declare the aws_auth secret; sign with placeholders; region is the only real value (non-secret, read from env, defaults us-east-1) - kubernetes: expose only AWS_REGION (non-secret, optional) to the tool-server sidecar — no AWS credentials in-process - tests: cloudwatch client, aws_auth parser + renderer, sidecar (creds absent) Co-Authored-By: Claude Opus 4.8 (1M context) * fix(cloudwatch): allow AWS SDK signed headers through the egress filter aws_auth re-signs CloudWatch requests with the AWS SDK v4 signer, whose signed-headers set includes the SDK's amz-sdk-request, amz-sdk-invocation-id, and (for CloudWatch's query-JSON protocol) x-amzn-query-mode headers. header_allowlist runs after aws_auth and was stripping them, so AWS rebuilt the canonical request without them and rejected every call with InvalidSignatureException. Allow /^amz-sdk-.*$/ and /^x-amzn-.*$/ so the signed headers reach AWS. Pairs with the /^x-amz-.*$/ allowance already added for the SigV4 headers. Co-Authored-By: Claude Opus 4.7 * chore(iron-proxy): bump base image to 0.42.0-rc.4 for the awsauth CONNECT fix 0.42.0-rc.4 is the first release containing ironsh/iron-proxy#167, which lets the synthetic CONNECT through the tunnel transform-policy check so aws_auth signs the post-MITM inner request instead of rejecting the CONNECT. Required for the CloudWatch tool's aws_auth path. --------- Co-authored-by: Claude Opus 4.8 (1M context) --- services/api/api/proxy_config.py | 52 ++- services/api/api/sandbox/kubernetes.py | 18 + services/api/api/tool_manager.py | 88 +++- services/api/tests/test_proxy_config.py | 98 +++++ .../tests/test_sandbox_kubernetes_backend.py | 25 ++ services/iron-proxy/iron-proxy.yaml | 9 + tools/infra/cloudwatch/.env.example | 16 + tools/infra/cloudwatch/client.py | 376 ++++++++++++++++++ tools/infra/cloudwatch/pyproject.toml | 32 ++ tools/infra/cloudwatch/test_client.py | 164 ++++++++ 10 files changed, 873 insertions(+), 5 deletions(-) create mode 100644 tools/infra/cloudwatch/.env.example create mode 100644 tools/infra/cloudwatch/client.py create mode 100644 tools/infra/cloudwatch/pyproject.toml create mode 100644 tools/infra/cloudwatch/test_client.py diff --git a/services/api/api/proxy_config.py b/services/api/api/proxy_config.py index 34965da7..bdfec9b9 100644 --- a/services/api/api/proxy_config.py +++ b/services/api/api/proxy_config.py @@ -12,6 +12,9 @@ minting OAuth2 access tokens for the declared grant. - ``hmac_sign`` transforms — one per unique ``HmacSignSecret`` signing scheme, HMAC-signing each request and injecting the configured headers. +- ``aws_auth`` transforms — one per unique ``AwsAuthSecret`` credential set, + re-signing AWS SigV4 requests (the tool signs with placeholder credentials) + with real keys resolved from the secret source. - top-level ``postgres:`` — one listener per ``PgDsnSecret`` on sequential ports starting at 5432, ordered by name. """ @@ -26,6 +29,7 @@ import yaml from api.tool_manager import ( + AwsAuthSecret, BrokeredTokenSecret, GcpAuthSecret, HmacSignSecret, @@ -52,7 +56,7 @@ def load_base_config() -> str: PG_LISTEN_PORT_BASE = 5432 _MANAGED_TRANSFORMS: frozenset[str] = frozenset( - {"secrets", "gcp_auth", "oauth_token", "hmac_sign"} + {"secrets", "gcp_auth", "oauth_token", "hmac_sign", "aws_auth"} ) # Iron-proxy ``source`` schema for resolving secret values. ``env`` reads the @@ -435,6 +439,51 @@ def _build_hmac_sign_transforms( return transforms +def _build_aws_auth_transforms( + secrets: list[SecretDef], +) -> list[dict[str, Any]]: + """``aws_auth`` transforms: one per unique credential set + scope. + + Entries that share the same credential refs, session token, and + allowed regions/services are merged — their host rules are unioned so the + same signing config covers every upstream that opted in. iron-proxy re-signs + requests the tool signed with placeholder credentials, drawing the real keys + from the resolved secret sources. + """ + by_scheme: dict[ + tuple[str, str, str | None, tuple[str, ...], tuple[str, ...]], + set[str], + ] = {} + for secret in secrets: + if not isinstance(secret, AwsAuthSecret): + continue + key = ( + secret.access_key_id_ref, + secret.secret_access_key_ref, + secret.session_token_ref, + secret.allowed_regions, + secret.allowed_services, + ) + by_scheme.setdefault(key, set()).update(secret.hosts) + + transforms: list[dict[str, Any]] = [] + for key in sorted(by_scheme, key=lambda k: (k[0], k[1])): + access_key_id_ref, secret_access_key_ref, session_token_ref, regions, services = key + config: dict[str, Any] = { + "access_key_id": _build_source(access_key_id_ref), + "secret_access_key": _build_source(secret_access_key_ref), + } + if session_token_ref: + config["session_token"] = _build_source(session_token_ref) + if regions: + config["allowed_regions"] = list(regions) + if services: + config["allowed_services"] = list(services) + config["rules"] = [{"host": h} for h in sorted(by_scheme[key])] + transforms.append({"name": "aws_auth", "config": config}) + return transforms + + def _build_postgres_listeners( secrets: list[SecretDef], pg_listen_ports: dict[str, int], @@ -500,6 +549,7 @@ def render_proxy_yaml( if oauth_token is not None: new_transforms.append(oauth_token) new_transforms.extend(_build_hmac_sign_transforms(secrets)) + new_transforms.extend(_build_aws_auth_transforms(secrets)) if new_transforms: for index, transform in enumerate(transforms): if (transform or {}).get("name") == "header_allowlist": diff --git a/services/api/api/sandbox/kubernetes.py b/services/api/api/sandbox/kubernetes.py index 7caceb00..7ee39133 100644 --- a/services/api/api/sandbox/kubernetes.py +++ b/services/api/api/sandbox/kubernetes.py @@ -522,6 +522,24 @@ def _build_tool_server_container( env.append({"name": name, "value": dsn}) _apply_tool_server_extra_env(env, no_proxy) + # AWS region for the cloudwatch tool (non-secret). The tool signs with + # placeholder credentials and iron-proxy re-signs with the real keys, so no + # AWS credentials belong in this process — only the region, which boto3 + # needs to pick the endpoint host and signing scope. Optional: the tool + # defaults to us-east-1 when unset. + env.append( + { + "name": "AWS_REGION", + "valueFrom": { + "secretKeyRef": { + "name": secret_name, + "key": _secret_env_key("AWS_REGION"), + "optional": True, + } + }, + } + ) + volume_mounts: list[dict[str, Any]] = [ { "name": "firewall-ca", diff --git a/services/api/api/tool_manager.py b/services/api/api/tool_manager.py index 8b03ae88..aeeadd9e 100644 --- a/services/api/api/tool_manager.py +++ b/services/api/api/tool_manager.py @@ -290,6 +290,31 @@ class HmacSignSecret: allow_chunked_body: bool = False +@dataclass(frozen=True) +class AwsAuthSecret: + """AWS SigV4 re-signing handled by iron-proxy's ``aws_auth`` transform. + + The tool's AWS SDK signs each request with throwaway *placeholder* + credentials; iron-proxy reads the region and service from the inbound + signature's credential scope, strips that signature, and re-signs with the + real credentials resolved from ``access_key_id``/``secret_access_key`` (and + optional ``session_token``). The real keys never reach the sandbox — this is + the SigV4 analogue of the ``secrets`` transform's placeholder swap. + + ``allowed_services``/``allowed_regions`` scope which AWS services/regions the + proxy will sign for; ``hosts`` becomes the iron-proxy ``rules``. Credential + refs resolve like every other secret (env var or 1Password item). + """ + + name: str + hosts: tuple[str, ...] + access_key_id_ref: str + secret_access_key_ref: str + session_token_ref: str | None = None + allowed_regions: tuple[str, ...] = () + allowed_services: tuple[str, ...] = () + + SecretDef = ( HttpSecret | GcpAuthSecret @@ -297,6 +322,7 @@ class HmacSignSecret: | OAuthTokenSecret | BrokeredTokenSecret | HmacSignSecret + | AwsAuthSecret ) @@ -1022,6 +1048,59 @@ def _parse_secret(entry: Any, *, default_hosts: tuple[str, ...] = ()) -> SecretD timestamp_format=timestamp_format, allow_chunked_body=allow_chunked_body, ) + if secret_type == "aws_auth": + hosts = entry.get("hosts", []) + if ( + not isinstance(hosts, list) + or not hosts + or not all(isinstance(h, str) and h for h in hosts) + ): + raise ValueError( + f"aws_auth entry {name!r} 'hosts' must be a non-empty array " + f"of non-empty strings" + ) + access_key_id_ref = entry.get("access_key_id") + if not isinstance(access_key_id_ref, str) or not access_key_id_ref: + raise ValueError( + f"aws_auth entry {name!r} requires a non-empty 'access_key_id'" + ) + secret_access_key_ref = entry.get("secret_access_key") + if not isinstance(secret_access_key_ref, str) or not secret_access_key_ref: + raise ValueError( + f"aws_auth entry {name!r} requires a non-empty 'secret_access_key'" + ) + session_token_ref = entry.get("session_token") + if session_token_ref is not None and ( + not isinstance(session_token_ref, str) or not session_token_ref + ): + raise ValueError( + f"aws_auth entry {name!r} 'session_token' must be a non-empty string" + ) + allowed_regions = entry.get("allowed_regions", []) + if not isinstance(allowed_regions, list) or not all( + isinstance(r, str) and r for r in allowed_regions + ): + raise ValueError( + f"aws_auth entry {name!r} 'allowed_regions' must be an array of " + f"non-empty strings" + ) + allowed_services = entry.get("allowed_services", []) + if not isinstance(allowed_services, list) or not all( + isinstance(s, str) and s for s in allowed_services + ): + raise ValueError( + f"aws_auth entry {name!r} 'allowed_services' must be an array of " + f"non-empty strings" + ) + return AwsAuthSecret( + name=name, + hosts=tuple(hosts), + access_key_id_ref=access_key_id_ref, + secret_access_key_ref=secret_access_key_ref, + session_token_ref=session_token_ref, + allowed_regions=tuple(allowed_regions), + allowed_services=tuple(allowed_services), + ) raise ValueError(f"unknown secret type {secret_type!r}") @@ -1047,10 +1126,11 @@ async def _resolve_secrets(secrets: list[SecretDef]) -> dict[str, str]: ``ToolContext`` — the tool gets back the ``replacer`` token, which iron-proxy swaps for the real credential at the network boundary. Inject-mode HTTP secrets are applied entirely by iron-proxy and never reach the tool. - ``GcpAuthSecret``, ``OAuthTokenSecret`` and ``PgDsnSecret`` are likewise not - exposed via context: gcp_auth and oauth_token are minted and injected on the - wire by iron-proxy, and pg_dsn reaches the tool as an environment variable - set on the sandbox by the kubernetes backend. + ``GcpAuthSecret``, ``OAuthTokenSecret``, ``AwsAuthSecret`` and ``PgDsnSecret`` + are likewise not exposed via context: gcp_auth, oauth_token and aws_auth are + minted/re-signed and injected on the wire by iron-proxy (the tool signs AWS + requests with placeholder credentials), and pg_dsn reaches the tool as an + environment variable set on the sandbox by the kubernetes backend. """ return {s.name: s.replacer for s in secrets if _is_replace_secret(s)} diff --git a/services/api/tests/test_proxy_config.py b/services/api/tests/test_proxy_config.py index 35d65fe2..48ab308d 100644 --- a/services/api/tests/test_proxy_config.py +++ b/services/api/tests/test_proxy_config.py @@ -12,6 +12,7 @@ ) from api.tool_manager import ( DEFAULT_MATCH_HEADERS, + AwsAuthSecret, GcpAuthSecret, HmacHeader, HmacSignSecret, @@ -1802,3 +1803,100 @@ def test_render_brokered_token_merges_hosts_across_duplicate_names( "api.anthropic.com", "console.anthropic.com", } + + +# ── aws_auth parser ────────────────────────────────────────────────────────── + + +def _aws_entry(**overrides): + entry = { + "type": "aws_auth", + "name": "cloudwatch", + "access_key_id": "AWS_ACCESS_KEY_ID", + "secret_access_key": "AWS_SECRET_ACCESS_KEY", + "hosts": ["logs.*.amazonaws.com", "monitoring.*.amazonaws.com"], + "allowed_services": ["logs", "monitoring"], + } + entry.update(overrides) + return entry + + +def test_parser_typed_aws_auth_full_example() -> None: + secret = _parse_secret(_aws_entry()) + assert isinstance(secret, AwsAuthSecret) + assert secret.access_key_id_ref == "AWS_ACCESS_KEY_ID" + assert secret.secret_access_key_ref == "AWS_SECRET_ACCESS_KEY" + assert secret.session_token_ref is None + assert secret.hosts == ("logs.*.amazonaws.com", "monitoring.*.amazonaws.com") + assert secret.allowed_services == ("logs", "monitoring") + assert secret.allowed_regions == () + + +def test_parser_aws_auth_accepts_session_token_and_regions() -> None: + secret = _parse_secret( + _aws_entry(session_token="AWS_SESSION_TOKEN", allowed_regions=["us-east-1"]) + ) + assert isinstance(secret, AwsAuthSecret) + assert secret.session_token_ref == "AWS_SESSION_TOKEN" + assert secret.allowed_regions == ("us-east-1",) + + +def test_parser_aws_auth_requires_access_key_id() -> None: + entry = _aws_entry() + del entry["access_key_id"] + with pytest.raises(ValueError, match="requires a non-empty 'access_key_id'"): + _parse_secret(entry) + + +def test_parser_aws_auth_requires_secret_access_key() -> None: + entry = _aws_entry() + del entry["secret_access_key"] + with pytest.raises(ValueError, match="requires a non-empty 'secret_access_key'"): + _parse_secret(entry) + + +def test_parser_aws_auth_requires_hosts() -> None: + with pytest.raises(ValueError, match="'hosts' must be a non-empty array"): + _parse_secret(_aws_entry(hosts=[])) + + +# ── aws_auth renderer ──────────────────────────────────────────────────────── + + +def test_render_emits_aws_auth_transform() -> None: + secrets = [ + AwsAuthSecret( + name="cloudwatch", + hosts=("logs.*.amazonaws.com", "monitoring.*.amazonaws.com"), + access_key_id_ref="AWS_ACCESS_KEY_ID", + secret_access_key_ref="AWS_SECRET_ACCESS_KEY", + allowed_services=("logs", "monitoring"), + ) + ] + cfg = yaml.safe_load(render_proxy_yaml(secrets)) + aws = next(t for t in cfg["transforms"] if t["name"] == "aws_auth") + assert aws["config"]["access_key_id"] == {"type": "env", "var": "AWS_ACCESS_KEY_ID"} + assert aws["config"]["secret_access_key"] == { + "type": "env", + "var": "AWS_SECRET_ACCESS_KEY", + } + assert aws["config"]["allowed_services"] == ["logs", "monitoring"] + assert "session_token" not in aws["config"] + assert {r["host"] for r in aws["config"]["rules"]} == { + "logs.*.amazonaws.com", + "monitoring.*.amazonaws.com", + } + + +def test_render_merges_aws_auth_hosts_for_shared_credentials() -> None: + secrets = [ + AwsAuthSecret("a", ("logs.*.amazonaws.com",), "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"), + AwsAuthSecret("b", ("monitoring.*.amazonaws.com",), "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"), + ] + cfg = yaml.safe_load(render_proxy_yaml(secrets)) + blocks = [t for t in cfg["transforms"] if t["name"] == "aws_auth"] + assert len(blocks) == 1 + assert {r["host"] for r in blocks[0]["config"]["rules"]} == { + "logs.*.amazonaws.com", + "monitoring.*.amazonaws.com", + } diff --git a/services/api/tests/test_sandbox_kubernetes_backend.py b/services/api/tests/test_sandbox_kubernetes_backend.py index b7023c59..b3a9bd18 100644 --- a/services/api/tests/test_sandbox_kubernetes_backend.py +++ b/services/api/tests/test_sandbox_kubernetes_backend.py @@ -1214,6 +1214,31 @@ def test_proxy_iron_env_injects_broker_when_url_set( } +def test_tool_server_container_exposes_aws_region_not_credentials( + monkeypatch: pytest.MonkeyPatch, +) -> None: + from api.sandbox.kubernetes import _build_tool_server_container + + monkeypatch.setenv("KUBERNETES_TOOL_SERVER_IMAGE", "centaur-api:latest") + monkeypatch.setenv("KUBERNETES_SECRET_ENV_NAME", "centaur-infra-env") + + container = _build_tool_server_container( + firewall_host="fw", api_url="http://api:8000", overlay_mount=None + ) + by_name = {e["name"]: e for e in container["env"]} + + # Region (non-secret) is exposed, optional so sandboxes start without it. + assert by_name["AWS_REGION"]["valueFrom"]["secretKeyRef"] == { + "name": "centaur-infra-env", + "key": "AWS_REGION", + "optional": True, + } + # Credentials must NOT be here — iron-proxy's aws_auth transform holds them + # and re-signs on the wire; they never enter the tool process. + assert "AWS_ACCESS_KEY_ID" not in by_name + assert "AWS_SECRET_ACCESS_KEY" not in by_name + + @pytest.mark.asyncio async def test_per_sandbox_proxy_uses_bootstrap_secret_for_onepassword( monkeypatch: pytest.MonkeyPatch, diff --git a/services/iron-proxy/iron-proxy.yaml b/services/iron-proxy/iron-proxy.yaml index 00a8eb38..be9b8edb 100644 --- a/services/iron-proxy/iron-proxy.yaml +++ b/services/iron-proxy/iron-proxy.yaml @@ -78,6 +78,15 @@ transforms: - "x-as-user-email" - "/^x-codex-.*$/" - "/^x-openai-.*$/" + # AWS SigV4 request headers (x-amz-target, x-amz-date, + # x-amz-content-sha256, x-amz-security-token) for the aws_auth transform. + - "/^x-amz-.*$/" + # AWS SDK headers that the SDK signer folds into the SigV4 signed-headers + # set (amz-sdk-request, amz-sdk-invocation-id, and x-amzn-query-mode for + # CloudWatch's query-JSON protocol). aws_auth signs them, so they must + # survive egress filtering or AWS rejects with InvalidSignatureException. + - "/^amz-sdk-.*$/" + - "/^x-amzn-.*$/" - "/^x-[a-z0-9-]*(api-key|apikey|secret|token|auth|key)$/" log: diff --git a/tools/infra/cloudwatch/.env.example b/tools/infra/cloudwatch/.env.example new file mode 100644 index 00000000..ad555dc1 --- /dev/null +++ b/tools/infra/cloudwatch/.env.example @@ -0,0 +1,16 @@ +# The real AWS credentials are resolved by iron-proxy (via the secrets backend +# / 1Password), NOT by this tool — boto3 signs with placeholders and iron-proxy +# re-signs on the wire. Store a read-only CloudWatch IAM user's keys under the +# names below in your secrets backend, the same way as other tool credentials. +# Scope the IAM policy tightly — read-only CloudWatch Logs + Metrics only, e.g.: +# logs:DescribeLogGroups, logs:FilterLogEvents, logs:GetLogEvents, +# logs:StartQuery, logs:GetQueryResults, logs:StopQuery, +# cloudwatch:ListMetrics, cloudwatch:GetMetricData, +# cloudwatch:DescribeAlarms, cloudwatch:DescribeAlarmHistory +AWS_ACCESS_KEY_ID=your-access-key-id +AWS_SECRET_ACCESS_KEY=your-secret-access-key + +# Region the log groups / metrics live in. NOT a secret — this is the only AWS +# value the tool process itself reads (to pick the endpoint + signing scope). +# Defaults to us-east-1 if unset. +AWS_REGION=us-east-1 diff --git a/tools/infra/cloudwatch/client.py b/tools/infra/cloudwatch/client.py new file mode 100644 index 00000000..5ede3ff9 --- /dev/null +++ b/tools/infra/cloudwatch/client.py @@ -0,0 +1,376 @@ +"""AWS CloudWatch client for read-only logs, metrics, and alarms. + +Mirrors the useful read-only surface of the AWS CloudWatch MCP server using +boto3: browse log groups, tail/filter log events, run CloudWatch Logs Insights +queries, list metrics and pull metric data, and inspect alarms. + +AWS auth rides iron-proxy's ``aws_auth`` transform (declared in pyproject.toml): +boto3 signs each request with throwaway *placeholder* credentials, and iron-proxy +reads the region/service from the signature scope, strips it, and re-signs with +the real read-only IAM keys it resolves from the secrets backend. The real keys +never enter this process — the SigV4 analogue of the ``secrets`` placeholder +swap. Only the region is a real value: boto3 needs it to pick the endpoint host +and credential scope, and it isn't a secret. +""" + +from __future__ import annotations + +import os +from datetime import UTC, datetime, timedelta +from typing import Any + +_DEFAULT_REGION = "us-east-1" + +# boto3 must sign with *some* credentials; iron-proxy's aws_auth transform +# discards this signature and re-signs with the real keys, so the value is +# irrelevant beyond being non-empty. +_PLACEHOLDER_CREDENTIAL = "iron-proxy-resigns-this" + + +class CloudWatchClient: + """Read-only client for CloudWatch Logs and Metrics (boto3, SigV4). + + boto3 signs with placeholder credentials; iron-proxy's ``aws_auth`` transform + re-signs with the real read-only IAM keys (resolved from the secrets backend), + so credentials never reach this process. The region comes from ``AWS_REGION`` + (a non-secret), defaulting to ``us-east-1``. boto3 clients are built lazily on + first use so tool discovery never needs network access. + """ + + def __init__(self, region: str | None = None): + self._region = region + self.__logs: Any = None + self.__cw: Any = None + + # -- boto3 plumbing (lazy) ---------------------------------------------- + + @property + def region(self) -> str: + # Region is non-secret config, read straight from the env (not secret(), + # whose server-mode StubBackend returns the key name as a placeholder + # rather than the default). Defaults to us-east-1 when unset. + return self._region or os.getenv("AWS_REGION") or _DEFAULT_REGION # noqa: TID251 + + def _session(self) -> Any: + import boto3 # lazy: keeps import cheap and tests boto3-free + + # Placeholder credentials — iron-proxy re-signs on the wire. Passed + # explicitly so boto3 never reaches for IMDS / ambient AWS config. + return boto3.session.Session( + aws_access_key_id=_PLACEHOLDER_CREDENTIAL, + aws_secret_access_key=_PLACEHOLDER_CREDENTIAL, + region_name=self.region, + ) + + def _logs(self) -> Any: + if self.__logs is None: + self.__logs = self._session().client("logs") + return self.__logs + + def _cw(self) -> Any: + if self.__cw is None: + self.__cw = self._session().client("cloudwatch") + return self.__cw + + @staticmethod + def _call(fn: Any, **kwargs: Any) -> dict: + """Invoke a boto3 call, dropping None args and normalizing errors.""" + clean = {k: v for k, v in kwargs.items() if v is not None} + try: + return fn(**clean) + except Exception as exc: # botocore.ClientError et al. + raise RuntimeError(f"CloudWatch API error: {exc}") from exc + + # -- Logs: groups & events ---------------------------------------------- + + def list_log_groups( + self, + name_prefix: str | None = None, + limit: int = 50, + ) -> list[dict]: + """List CloudWatch log groups, optionally filtered by name prefix. + + Use this to discover log group names for filter_log_events / start_query. + + Args: + name_prefix: Only return groups whose name starts with this string. + limit: Max groups to return (1-50). + """ + resp = self._call( + self._logs().describe_log_groups, + logGroupNamePrefix=name_prefix, + limit=max(1, min(limit, 50)), + ) + return _clean(resp.get("logGroups", [])) + + def filter_log_events( + self, + log_group_name: str, + filter_pattern: str | None = None, + start_time: str | None = None, + end_time: str | None = None, + limit: int = 100, + ) -> dict: + """Search log events in a group within a time window. + + The workhorse for grepping logs. ``filter_pattern`` uses CloudWatch Logs + filter syntax (e.g. 'ERROR', '"timeout"', '{ $.level = "error" }'). + + Args: + log_group_name: Exact log group name (see list_log_groups). + filter_pattern: CloudWatch Logs filter pattern. Omit to return all events. + start_time: ISO-8601 timestamp or epoch (s/ms). Defaults to 1h before end. + end_time: ISO-8601 timestamp or epoch (s/ms). Defaults to now. + limit: Max events to return (1-10000). + """ + start_ms, end_ms = _resolve_window_ms(start_time, end_time) + resp = self._call( + self._logs().filter_log_events, + logGroupName=log_group_name, + filterPattern=filter_pattern, + startTime=start_ms, + endTime=end_ms, + limit=max(1, min(limit, 10000)), + ) + return { + "events": _clean(resp.get("events", [])), + "searched_log_streams": _clean(resp.get("searchedLogStreams", [])), + } + + # -- Logs Insights ------------------------------------------------------- + + def start_query( + self, + log_group_names: list[str] | str, + query_string: str, + start_time: str | None = None, + end_time: str | None = None, + limit: int = 100, + ) -> dict: + """Start a CloudWatch Logs Insights query. Returns a query_id to poll. + + Logs Insights is asynchronous: call this to start, then poll + get_query_results with the returned query_id until status is Complete. + + Args: + log_group_names: One name or a list of log group names to query. + query_string: Logs Insights query, e.g. + 'fields @timestamp, @message | filter @message like /ERROR/ | sort @timestamp desc'. + start_time: ISO-8601 timestamp or epoch (s/ms). Defaults to 1h before end. + end_time: ISO-8601 timestamp or epoch (s/ms). Defaults to now. + limit: Max rows the query may return (1-10000). + """ + names = [log_group_names] if isinstance(log_group_names, str) else list(log_group_names) + start_ms, end_ms = _resolve_window_ms(start_time, end_time) + resp = self._call( + self._logs().start_query, + logGroupNames=names, + queryString=query_string, + startTime=start_ms // 1000, # Insights wants epoch seconds + endTime=end_ms // 1000, + limit=max(1, min(limit, 10000)), + ) + return _clean(resp) + + def get_query_results(self, query_id: str) -> dict: + """Get results/status for a Logs Insights query started with start_query. + + Status is one of Scheduled, Running, Complete, Failed, Cancelled, Timeout. + Poll until status is Complete (or terminal) before trusting the results. + + Args: + query_id: The query_id returned by start_query. + """ + resp = self._call(self._logs().get_query_results, queryId=query_id) + return _clean(resp) + + def stop_query(self, query_id: str) -> dict: + """Stop a running Logs Insights query. + + Args: + query_id: The query_id returned by start_query. + """ + return _clean(self._call(self._logs().stop_query, queryId=query_id)) + + # -- Metrics ------------------------------------------------------------- + + def list_metrics( + self, + namespace: str | None = None, + metric_name: str | None = None, + limit: int = 100, + ) -> list[dict]: + """List available metrics, optionally filtered by namespace/name. + + Use to discover the namespace, metric name, and dimensions to pass to + get_metric_data. + + Args: + namespace: e.g. 'AWS/EC2', 'AWS/Lambda', or a custom namespace. + metric_name: e.g. 'CPUUtilization', 'Errors'. + limit: Max metrics to return (results are truncated client-side). + """ + resp = self._call( + self._cw().list_metrics, + Namespace=namespace, + MetricName=metric_name, + ) + return _clean(resp.get("Metrics", []))[: max(1, limit)] + + def get_metric_data( + self, + namespace: str, + metric_name: str, + dimensions: dict[str, str] | None = None, + stat: str = "Average", + period: int = 300, + start_time: str | None = None, + end_time: str | None = None, + ) -> dict: + """Fetch time-series data points for a single metric. + + Args: + namespace: Metric namespace, e.g. 'AWS/Lambda'. + metric_name: Metric name, e.g. 'Errors'. + dimensions: Dimension name→value map, e.g. {'FunctionName': 'my-fn'}. + stat: Statistic — Average, Sum, Minimum, Maximum, SampleCount, or p99 etc. + period: Granularity in seconds (must be a multiple of 60). + start_time: ISO-8601 timestamp or epoch (s/ms). Defaults to 1h before end. + end_time: ISO-8601 timestamp or epoch (s/ms). Defaults to now. + """ + start_dt, end_dt = _resolve_window_dt(start_time, end_time) + dims = [{"Name": k, "Value": v} for k, v in (dimensions or {}).items()] + resp = self._call( + self._cw().get_metric_data, + MetricDataQueries=[ + { + "Id": "m1", + "MetricStat": { + "Metric": { + "Namespace": namespace, + "MetricName": metric_name, + "Dimensions": dims, + }, + "Period": period, + "Stat": stat, + }, + "ReturnData": True, + } + ], + StartTime=start_dt, + EndTime=end_dt, + ) + return _clean(resp.get("MetricDataResults", [])) + + # -- Alarms -------------------------------------------------------------- + + def describe_alarms( + self, + state_value: str | None = None, + alarm_name_prefix: str | None = None, + limit: int = 50, + ) -> list[dict]: + """List metric alarms, optionally filtered by state and name prefix. + + Pass state_value='ALARM' to see only currently-firing alarms. + + Args: + state_value: 'OK', 'ALARM', or 'INSUFFICIENT_DATA'. + alarm_name_prefix: Only alarms whose name starts with this string. + limit: Max alarms to return (1-100). + """ + resp = self._call( + self._cw().describe_alarms, + StateValue=state_value, + AlarmNamePrefix=alarm_name_prefix, + MaxRecords=max(1, min(limit, 100)), + ) + return _clean(resp.get("MetricAlarms", [])) + + def get_alarm_history( + self, + alarm_name: str | None = None, + limit: int = 50, + ) -> list[dict]: + """Get state-change history for an alarm (or all alarms). + + Args: + alarm_name: Restrict to one alarm. Omit for history across all alarms. + limit: Max history items to return (1-100). + """ + resp = self._call( + self._cw().describe_alarm_history, + AlarmName=alarm_name, + MaxRecords=max(1, min(limit, 100)), + ) + return _clean(resp.get("AlarmHistoryItems", [])) + + # -- Lifecycle ----------------------------------------------------------- + + def close(self): + self.__logs = None + self.__cw = None + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +# -- Helpers ----------------------------------------------------------------- + + +def _to_epoch_ms(value: str | int | float | None) -> int | None: + """Coerce an ISO-8601 string or epoch (seconds or millis) to epoch millis.""" + if value is None: + return None + if isinstance(value, (int, float)): + # Heuristic: values past ~2001 in seconds are < 1e12; millis are larger. + return int(value if value > 1_000_000_000_000 else value * 1000) + text = str(value).strip().replace("Z", "+00:00") + dt = datetime.fromisoformat(text) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=UTC) + return int(dt.timestamp() * 1000) + + +def _resolve_window_ms(start: str | None, end: str | None) -> tuple[int, int]: + """Resolve a (start, end) window to epoch millis, defaulting to the last hour.""" + end_ms = _to_epoch_ms(end) + if end_ms is None: + end_ms = int(datetime.now(UTC).timestamp() * 1000) + start_ms = _to_epoch_ms(start) + if start_ms is None: + start_ms = end_ms - int(timedelta(hours=1).total_seconds() * 1000) + return start_ms, end_ms + + +def _resolve_window_dt(start: str | None, end: str | None) -> tuple[datetime, datetime]: + """Resolve a (start, end) window to tz-aware datetimes (CloudWatch metrics API).""" + start_ms, end_ms = _resolve_window_ms(start, end) + return ( + datetime.fromtimestamp(start_ms / 1000, tz=UTC), + datetime.fromtimestamp(end_ms / 1000, tz=UTC), + ) + + +def _clean(obj: Any) -> Any: + """Make a boto3 response JSON-serializable. + + Converts datetimes to ISO-8601, decodes bytes, and strips the boilerplate + ``ResponseMetadata`` envelope so it doesn't bloat the agent's context. + """ + if isinstance(obj, dict): + return {k: _clean(v) for k, v in obj.items() if k != "ResponseMetadata"} + if isinstance(obj, (list, tuple)): + return [_clean(v) for v in obj] + if isinstance(obj, datetime): + return obj.isoformat() + if isinstance(obj, (bytes, bytearray)): + return bytes(obj).decode("utf-8", "replace") + return obj + + +def _client() -> CloudWatchClient: + return CloudWatchClient() diff --git a/tools/infra/cloudwatch/pyproject.toml b/tools/infra/cloudwatch/pyproject.toml new file mode 100644 index 00000000..b09ed8bc --- /dev/null +++ b/tools/infra/cloudwatch/pyproject.toml @@ -0,0 +1,32 @@ +[project] +name = "cloudwatch" +description = "AWS CloudWatch — Logs Insights queries, log events, metrics, and alarms (read-only)" +version = "0.1.0" +requires-python = ">=3.11" +dependencies = [ + "boto3>=1.34", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + + +# AWS SigV4 rides iron-proxy's `aws_auth` transform: boto3 signs each request +# with throwaway placeholder credentials, and iron-proxy re-signs it with the +# real read-only IAM keys it resolves from `access_key_id`/`secret_access_key` +# (stored in the secrets backend like any other tool credential). The real keys +# never enter the tool process — same placeholder-swap model as the `secrets` +# transform, just for SigV4. Scope the IAM user to read-only CloudWatch. +[tool.centaur] +module = "client.py" +hosts = ["logs.*.amazonaws.com", "monitoring.*.amazonaws.com"] +secrets = [ + { type = "aws_auth", name = "cloudwatch", access_key_id = "AWS_ACCESS_KEY_ID", secret_access_key = "AWS_SECRET_ACCESS_KEY", hosts = [ + "logs.*.amazonaws.com", + "monitoring.*.amazonaws.com", + ], allowed_services = [ + "logs", + "monitoring", + ] }, +] diff --git a/tools/infra/cloudwatch/test_client.py b/tools/infra/cloudwatch/test_client.py new file mode 100644 index 00000000..3be0b2dd --- /dev/null +++ b/tools/infra/cloudwatch/test_client.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import importlib.util +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +spec = importlib.util.spec_from_file_location( + "cloudwatch_client", Path(__file__).with_name("client.py") +) +assert spec and spec.loader +module = importlib.util.module_from_spec(spec) +spec.loader.exec_module(module) +CloudWatchClient = module.CloudWatchClient + + +class FakeBotoClient: + """Records boto3 calls and returns canned responses, no network or boto3.""" + + def __init__(self, responses: dict[str, Any] | None = None) -> None: + self.calls: list[dict[str, Any]] = [] + self.responses = responses or {} + + def __getattr__(self, name: str): + def _method(**kwargs: Any) -> Any: + self.calls.append({"op": name, "kwargs": kwargs}) + return self.responses.get(name, {}) + + return _method + + +class RecordingCloudWatchClient(CloudWatchClient): + """Swaps boto3 logs/cloudwatch clients for recording fakes.""" + + def __init__(self, **responses: Any) -> None: + super().__init__(region="us-west-2") + self.logs = FakeBotoClient(responses) + self.cw = FakeBotoClient(responses) + + def _logs(self) -> Any: + return self.logs + + def _cw(self) -> Any: + return self.cw + + +def test_list_log_groups_clamps_limit_and_drops_none() -> None: + client = RecordingCloudWatchClient(describe_log_groups={"logGroups": [{"logGroupName": "/x"}]}) + + out = client.list_log_groups(limit=999) + + assert out == [{"logGroupName": "/x"}] + call = client.logs.calls[-1] + assert call["op"] == "describe_log_groups" + assert call["kwargs"] == {"limit": 50} # clamped, name_prefix=None dropped + + +def test_filter_log_events_defaults_to_last_hour() -> None: + client = RecordingCloudWatchClient() + + client.filter_log_events("/aws/lambda/fn", end_time="2026-05-28T12:00:00Z") + + kwargs = client.logs.calls[-1]["kwargs"] + assert kwargs["logGroupName"] == "/aws/lambda/fn" + assert kwargs["endTime"] == int(datetime(2026, 5, 28, 12, tzinfo=UTC).timestamp() * 1000) + assert kwargs["startTime"] == kwargs["endTime"] - 3_600_000 + assert "filterPattern" not in kwargs # None dropped + + +def test_filter_log_events_passes_pattern_and_clamps_limit() -> None: + client = RecordingCloudWatchClient() + + client.filter_log_events("/g", filter_pattern="ERROR", limit=99999) + + kwargs = client.logs.calls[-1]["kwargs"] + assert kwargs["filterPattern"] == "ERROR" + assert kwargs["limit"] == 10000 + + +def test_start_query_normalizes_names_and_uses_epoch_seconds() -> None: + client = RecordingCloudWatchClient(start_query={"queryId": "q-1"}) + + out = client.start_query( + "/only-one", + "fields @message", + start_time="2026-05-28T11:00:00Z", + end_time="2026-05-28T12:00:00Z", + ) + + assert out == {"queryId": "q-1"} + kwargs = client.logs.calls[-1]["kwargs"] + assert kwargs["logGroupNames"] == ["/only-one"] + assert kwargs["startTime"] == int(datetime(2026, 5, 28, 11, tzinfo=UTC).timestamp()) + assert kwargs["endTime"] == int(datetime(2026, 5, 28, 12, tzinfo=UTC).timestamp()) + + +def test_get_metric_data_builds_single_query() -> None: + client = RecordingCloudWatchClient(get_metric_data={"MetricDataResults": [{"Id": "m1"}]}) + + out = client.get_metric_data( + "AWS/Lambda", + "Errors", + dimensions={"FunctionName": "fn"}, + stat="Sum", + period=60, + start_time="2026-05-28T11:00:00Z", + end_time="2026-05-28T12:00:00Z", + ) + + assert out == [{"Id": "m1"}] + kwargs = client.cw.calls[-1]["kwargs"] + q = kwargs["MetricDataQueries"][0] + assert q["MetricStat"]["Metric"]["Namespace"] == "AWS/Lambda" + assert q["MetricStat"]["Metric"]["Dimensions"] == [{"Name": "FunctionName", "Value": "fn"}] + assert q["MetricStat"]["Stat"] == "Sum" + assert q["MetricStat"]["Period"] == 60 + assert kwargs["StartTime"] == datetime(2026, 5, 28, 11, tzinfo=UTC) + assert kwargs["EndTime"] == datetime(2026, 5, 28, 12, tzinfo=UTC) + + +def test_describe_alarms_filters_active() -> None: + client = RecordingCloudWatchClient(describe_alarms={"MetricAlarms": [{"AlarmName": "a"}]}) + + out = client.describe_alarms(state_value="ALARM") + + assert out == [{"AlarmName": "a"}] + kwargs = client.cw.calls[-1]["kwargs"] + assert kwargs["StateValue"] == "ALARM" + assert kwargs["MaxRecords"] == 50 + assert "AlarmNamePrefix" not in kwargs + + +def test_clean_strips_metadata_and_serializes_datetimes() -> None: + cleaned = module._clean( + { + "ResponseMetadata": {"RequestId": "abc"}, + "MetricAlarms": [{"StateUpdatedTimestamp": datetime(2026, 5, 28, tzinfo=UTC)}], + } + ) + + assert "ResponseMetadata" not in cleaned + assert cleaned["MetricAlarms"][0]["StateUpdatedTimestamp"] == "2026-05-28T00:00:00+00:00" + + +def test_to_epoch_ms_handles_seconds_and_millis() -> None: + assert module._to_epoch_ms(1_700_000_000) == 1_700_000_000_000 # seconds → ms + assert module._to_epoch_ms(1_700_000_000_000) == 1_700_000_000_000 # already ms + assert module._to_epoch_ms(None) is None + + +def test_api_errors_are_wrapped() -> None: + client = RecordingCloudWatchClient() + + def boom(**_: Any): + raise ValueError("AccessDenied") + + client.logs.describe_log_groups = boom # type: ignore[assignment] + + try: + client.list_log_groups() + except RuntimeError as exc: + assert "CloudWatch API error" in str(exc) + else: + raise AssertionError("expected RuntimeError") From 98a297deeb016d9510cd130ebe7d3406882ecab3 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Mon, 8 Jun 2026 19:24:00 +0000 Subject: [PATCH 23/25] fix(grafana): scope tool to splits.grafana.net + drop GRAFANA_URL as a secret MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The grafana tool was unusable on a NetworkPolicy/iron-proxy-enforced cluster (every call 405'd): - `hosts = []` rendered the GRAFANA_API_KEY secrets transform with empty host rules, so iron-proxy never injected the Bearer token and never allowlisted egress to Grafana. Set `hosts = ["splits.grafana.net"]`. - `GRAFANA_URL` was declared as a replace-mode secret, but the client reads it in-process (os.getenv) to build its base_url — a connection target can never be a wire-swapped placeholder, so it resolved to the literal "GRAFANA_URL" and the client dialed http://GRAFANA_URL, which the CONNECT-only proxy rejected with 405. Drop it from `secrets`; provide it as a plain https env var (sandbox.extraEnv for the Python api; apiRs.extraEnv + SESSION_SANDBOX_PASSTHROUGH_ENV for api-rs). Co-Authored-By: Claude Opus 4.8 (1M context) --- tools/infra/grafana/pyproject.toml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/infra/grafana/pyproject.toml b/tools/infra/grafana/pyproject.toml index f85bc155..d4b3ab87 100644 --- a/tools/infra/grafana/pyproject.toml +++ b/tools/infra/grafana/pyproject.toml @@ -26,8 +26,18 @@ build-backend = "hatchling.build" [tool.centaur] module = "client.py" -hosts = [] +# Egress allowlist + the host(s) the GRAFANA_API_KEY Bearer token is scoped to. +# MUST be non-empty: an empty list renders the iron-proxy `secrets` transform +# with no host rules, so the token is never injected AND egress to Grafana is +# denied (every call 405s/401s through the CONNECT-only proxy). +hosts = ["splits.grafana.net"] secrets = [ - "GRAFANA_URL", + # NOTE: GRAFANA_URL is NOT a secret — it's the base URL the client reads + # in-process (os.getenv) to pick its connection target, so it can never be + # a wire-swapped placeholder. Provide it as a plain env var via the chart + # (sandbox.extraEnv for the Python api, apiRs.extraEnv + passthrough for + # api-rs) and keep it https:// so requests are CONNECT-tunneled, not + # plaintext-forwarded (the latter hits the proxy's CONNECT-only listener + # and returns 405). {type = "http", name = "GRAFANA_API_KEY", match_headers = ["Authorization"]}, ] From aaed85572393b0d7d12b22dbde7ca962840ad074 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Mon, 8 Jun 2026 19:54:17 +0000 Subject: [PATCH 24/25] feat(api-rs): support the aws_auth secret type in tool discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit api-rs's iron-control tool discovery rejected `aws_auth` as an unknown secret type, so the CloudWatch tool's SigV4 re-signing credentials were never wired into the per-sandbox iron-proxy fragment — the tool worked on the Python-api path but not on api-rs (Discord). Add an AwsAuthSecret variant, a parser, and an `aws_auth` transform renderer mirroring the Python `_build_aws_auth_transforms` (merge by credential scheme + scope, union hosts; omit session_token / allowed_regions / allowed_services when empty). Credential sources use the `{placeholder: }` form like the other api-rs transforms. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../centaur-api-server/src/tool_discovery.rs | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/services/api-rs/crates/centaur-api-server/src/tool_discovery.rs b/services/api-rs/crates/centaur-api-server/src/tool_discovery.rs index a3122e3e..0d3fecfd 100644 --- a/services/api-rs/crates/centaur-api-server/src/tool_discovery.rs +++ b/services/api-rs/crates/centaur-api-server/src/tool_discovery.rs @@ -360,6 +360,7 @@ enum ToolSecret { Http(HttpSecret), OAuthToken(OAuthTokenSecret), GcpAuth(GcpAuthSecret), + AwsAuth(AwsAuthSecret), PgDsn(PgDsnSecret), } @@ -410,6 +411,17 @@ struct GcpAuthSecret { scopes: Vec, } +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +struct AwsAuthSecret { + name: String, + hosts: Vec, + access_key_id_ref: String, + secret_access_key_ref: String, + session_token_ref: Option, + allowed_regions: Vec, + allowed_services: Vec, +} + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] struct PgDsnSecret { name: String, @@ -467,6 +479,7 @@ fn parse_secret( "http" | "header" => parse_http_secret(table, name, secret_ref, default_hosts), "oauth_token" => parse_oauth_token_secret(table, name), "gcp_auth" => parse_gcp_auth_secret(table, name, secret_ref), + "aws_auth" => parse_aws_auth_secret(table, name), "pg_dsn" => parse_pg_dsn_secret(table, name, secret_ref), "brokered_token" | "hmac_sign" => Err(ToolDiscoveryError::Invalid(format!( "api-rs iron-control tool discovery does not yet support secret type {:?}", @@ -591,6 +604,38 @@ fn parse_gcp_auth_secret( })) } +fn parse_aws_auth_secret( + table: &toml::Table, + name: String, +) -> Result { + // iron-proxy re-signs SigV4 requests the tool signed with placeholder + // credentials, drawing the real keys from the resolved secret refs. `hosts` + // becomes the iron-proxy `rules`; it must be non-empty or the transform + // would match (and try to re-sign) nothing — the SigV4 analogue of the + // empty-`hosts` trap the `secrets` transform has. + let hosts = required_string_array(table.get("hosts"), "hosts")?; + if hosts.is_empty() { + return Err(ToolDiscoveryError::Invalid(format!( + "aws_auth entry {name:?} 'hosts' must be a non-empty array" + ))); + } + let access_key_id_ref = required_str(table, "access_key_id")?.to_owned(); + let secret_access_key_ref = required_str(table, "secret_access_key")?.to_owned(); + let session_token_ref = optional_str(table, "session_token").map(ToOwned::to_owned); + let allowed_regions = optional_string_array(table.get("allowed_regions"))?.unwrap_or_default(); + let allowed_services = + optional_string_array(table.get("allowed_services"))?.unwrap_or_default(); + Ok(ToolSecret::AwsAuth(AwsAuthSecret { + name, + hosts, + access_key_id_ref, + secret_access_key_ref, + session_token_ref, + allowed_regions, + allowed_services, + })) +} + fn parse_pg_dsn_secret( table: &toml::Table, name: String, @@ -647,6 +692,7 @@ fn fragment_from_secrets(secrets: Vec) -> Result Result, ToolDis Ok(transforms) } +fn aws_auth_transforms(secrets: &[ToolSecret]) -> Result, ToolDiscoveryError> { + // One transform per unique credential set + scope; entries that share the + // same refs/session-token/allowed regions+services merge, unioning hosts — + // mirrors the Python `_build_aws_auth_transforms`. + type SchemeKey = (String, String, Option, Vec, Vec); + let mut by_scheme = BTreeMap::>::new(); + for secret in secrets { + let ToolSecret::AwsAuth(secret) = secret else { + continue; + }; + let key = ( + secret.access_key_id_ref.clone(), + secret.secret_access_key_ref.clone(), + secret.session_token_ref.clone(), + secret.allowed_regions.clone(), + secret.allowed_services.clone(), + ); + by_scheme + .entry(key) + .or_default() + .extend(secret.hosts.iter().cloned()); + } + let mut transforms = Vec::new(); + for ((access_key_id_ref, secret_access_key_ref, session_token_ref, regions, services), hosts) in + by_scheme + { + let mut config = BTreeMap::new(); + config.insert( + "access_key_id".to_owned(), + yaml_map([("placeholder", yaml_string(&access_key_id_ref))])?, + ); + config.insert( + "secret_access_key".to_owned(), + yaml_map([("placeholder", yaml_string(&secret_access_key_ref))])?, + ); + if let Some(session_token_ref) = session_token_ref { + config.insert( + "session_token".to_owned(), + yaml_map([("placeholder", yaml_string(&session_token_ref))])?, + ); + } + if !regions.is_empty() { + config.insert("allowed_regions".to_owned(), yaml_value(regions)?); + } + if !services.is_empty() { + config.insert("allowed_services".to_owned(), yaml_value(services)?); + } + config.insert("rules".to_owned(), yaml_value(host_rules(hosts)?)?); + transforms.push(Transform { + name: "aws_auth".to_owned(), + config: TransformConfig { + extra: config, + ..Default::default() + }, + ..Default::default() + }); + } + Ok(transforms) +} + fn oauth_token_transform(secrets: &[ToolSecret]) -> Result, ToolDiscoveryError> { let mut tokens = Vec::new(); for secret in secrets { @@ -1091,6 +1197,53 @@ secrets = [ let _ = fs::remove_dir_all(temp); } + #[test] + fn renders_aws_auth_transform_from_secret() { + let temp = temp_dir("api-rs-aws"); + let base = temp.join("base"); + write_tool( + &base.join("cloudwatch"), + r#" +[project] +description = "cloudwatch" + +[tool.centaur] +secrets = [ + {type = "aws_auth", name = "cloudwatch", access_key_id = "AWS_ACCESS_KEY_ID", secret_access_key = "AWS_SECRET_ACCESS_KEY", hosts = ["logs.*.amazonaws.com", "monitoring.*.amazonaws.com"], allowed_services = ["logs", "monitoring"]}, +] +"#, + ); + + let discovered = discover_tool_proxy_fragment(&[base.clone()]).unwrap(); + assert_eq!(discovered.secret_count, 1); + + let aws = discovered + .fragment + .transforms + .iter() + .find(|transform| transform.name == "aws_auth") + .expect("aws_auth transform rendered"); + let extra = &aws.config.extra; + assert_eq!( + extra.get("access_key_id"), + Some(&yaml_map([("placeholder", yaml_string("AWS_ACCESS_KEY_ID"))]).unwrap()) + ); + assert_eq!( + extra.get("secret_access_key"), + Some(&yaml_map([("placeholder", yaml_string("AWS_SECRET_ACCESS_KEY"))]).unwrap()) + ); + assert_eq!( + extra.get("allowed_services"), + Some(&yaml_value(vec!["logs".to_owned(), "monitoring".to_owned()]).unwrap()) + ); + // No session token declared, and the host rules must be present so the + // re-signing transform actually matches the AWS upstreams. + assert!(!extra.contains_key("session_token")); + assert!(extra.contains_key("rules")); + + let _ = fs::remove_dir_all(temp); + } + fn temp_dir(prefix: &str) -> PathBuf { let suffix = SystemTime::now() .duration_since(UNIX_EPOCH) From 22f4a860cea87ea5312dff0a054c09b589da3338 Mon Sep 17 00:00:00 2001 From: Will Drach Date: Mon, 8 Jun 2026 20:10:53 +0000 Subject: [PATCH 25/25] feat(cloudwatch): add the CLI so the tool is usable by sandbox agents The CloudWatch tool shipped (#287) with only client.py and `_client()`, so it works via the tool-server's `call ` (Python api) but is invisible on the shim-based path (api-rs / Discord): `install_tool_shims.py` only installs a CLI for tools that declare `[project.scripts]`, so agents there saw grafana and vlogs but no `cloudwatch` command. Add a Typer `cli.py` wrapping the read-only CloudWatchClient methods (log groups, filter events, Logs Insights queries, metrics, alarms) plus the `[project.scripts]` entry and hatch wheel packaging, mirroring the grafana/vlogs tools. Verified the exact shim chain with `uvx --from tools/infra/cloudwatch cloudwatch --help`. Co-Authored-By: Claude Opus 4.8 (1M context) --- tools/infra/cloudwatch/cli.py | 169 ++++++++++++++++++++++++++ tools/infra/cloudwatch/pyproject.toml | 11 ++ 2 files changed, 180 insertions(+) create mode 100644 tools/infra/cloudwatch/cli.py diff --git a/tools/infra/cloudwatch/cli.py b/tools/infra/cloudwatch/cli.py new file mode 100644 index 00000000..5efa24ac --- /dev/null +++ b/tools/infra/cloudwatch/cli.py @@ -0,0 +1,169 @@ +"""CLI for AWS CloudWatch (read-only). + +Mirrors the other infra tools' CLIs (grafana, vlogs): a Typer app whose +commands wrap ``CloudWatchClient`` and print JSON to stdout. The agent reaches +this via the ``cloudwatch`` shim that ``install_tool_shims.py`` installs from +``[project.scripts]``; without that entry the tool is invisible on the +shim-based (api-rs) path even though its ``[tool.centaur]`` metadata loads. +""" + +import json + +import typer +from dotenv import load_dotenv + +load_dotenv() + +app = typer.Typer( + name="cloudwatch", + help="AWS CloudWatch CLI — Logs Insights, log events, metrics, and alarms (read-only)", + no_args_is_help=True, +) + + +def get_client(): + from .client import CloudWatchClient + + return CloudWatchClient() + + +def _emit(result) -> None: + # default=str keeps any stray datetime/Decimal JSON-serializable. + print(json.dumps(result, indent=2, default=str)) + + +@app.command("list-log-groups") +def list_log_groups( + prefix: str = typer.Option(None, "--prefix", "-p", help="Filter by log group name prefix"), + limit: int = typer.Option(50, "--limit", "-n", help="Max groups (1-50)"), +): + """List CloudWatch log groups (discover names for filter/start-query).""" + _emit(get_client().list_log_groups(name_prefix=prefix, limit=limit)) + + +@app.command("filter-log-events") +def filter_log_events( + log_group_name: str = typer.Argument(..., help="Exact log group name"), + filter_pattern: str = typer.Option( + None, "--filter", "-f", help="CloudWatch Logs filter pattern (e.g. ERROR)" + ), + start: str = typer.Option( + None, "--start", "-s", help="Start (ISO-8601 or epoch); default 1h before end" + ), + end: str = typer.Option(None, "--end", "-e", help="End (ISO-8601 or epoch); default now"), + limit: int = typer.Option(100, "--limit", "-n", help="Max events (1-10000)"), +): + """Search log events in a group within a time window.""" + _emit( + get_client().filter_log_events( + log_group_name=log_group_name, + filter_pattern=filter_pattern, + start_time=start, + end_time=end, + limit=limit, + ) + ) + + +@app.command("start-query") +def start_query( + query_string: str = typer.Argument(..., help="Logs Insights query string"), + log_group: list[str] = typer.Option( + ..., "--log-group", "-g", help="Log group name (repeat for multiple)" + ), + start: str = typer.Option(None, "--start", "-s", help="Start (ISO-8601 or epoch)"), + end: str = typer.Option(None, "--end", "-e", help="End (ISO-8601 or epoch)"), + limit: int = typer.Option(100, "--limit", "-n", help="Max rows (1-10000)"), +): + """Start a Logs Insights query; poll get-query-results with the returned query_id.""" + _emit( + get_client().start_query( + log_group_names=log_group, + query_string=query_string, + start_time=start, + end_time=end, + limit=limit, + ) + ) + + +@app.command("get-query-results") +def get_query_results( + query_id: str = typer.Argument(..., help="query_id returned by start-query"), +): + """Get results/status for a Logs Insights query (poll until status is Complete).""" + _emit(get_client().get_query_results(query_id=query_id)) + + +@app.command("stop-query") +def stop_query( + query_id: str = typer.Argument(..., help="query_id returned by start-query"), +): + """Stop a running Logs Insights query.""" + _emit(get_client().stop_query(query_id=query_id)) + + +@app.command("list-metrics") +def list_metrics( + namespace: str = typer.Option(None, "--namespace", "-N", help="e.g. AWS/EC2, AWS/Lambda"), + metric: str = typer.Option(None, "--metric", "-m", help="e.g. CPUUtilization, Errors"), + limit: int = typer.Option(100, "--limit", "-n", help="Max metrics"), +): + """List available metrics (discover namespace/name/dimensions for get-metric-data).""" + _emit(get_client().list_metrics(namespace=namespace, metric_name=metric, limit=limit)) + + +@app.command("get-metric-data") +def get_metric_data( + namespace: str = typer.Argument(..., help="Metric namespace, e.g. AWS/Lambda"), + metric_name: str = typer.Argument(..., help="Metric name, e.g. Errors"), + dimensions: str = typer.Option( + None, "--dimensions", "-d", help='JSON map, e.g. {"FunctionName":"my-fn"}' + ), + stat: str = typer.Option( + "Average", "--stat", help="Average, Sum, Minimum, Maximum, SampleCount, p99, ..." + ), + period: int = typer.Option(300, "--period", help="Granularity seconds (multiple of 60)"), + start: str = typer.Option(None, "--start", "-s", help="Start (ISO-8601 or epoch)"), + end: str = typer.Option(None, "--end", "-e", help="End (ISO-8601 or epoch)"), +): + """Fetch time-series data points for a single metric.""" + dims = json.loads(dimensions) if dimensions else None + _emit( + get_client().get_metric_data( + namespace=namespace, + metric_name=metric_name, + dimensions=dims, + stat=stat, + period=period, + start_time=start, + end_time=end, + ) + ) + + +@app.command("describe-alarms") +def describe_alarms( + state: str = typer.Option(None, "--state", help="OK, ALARM, or INSUFFICIENT_DATA"), + prefix: str = typer.Option(None, "--prefix", "-p", help="Filter by alarm name prefix"), + limit: int = typer.Option(50, "--limit", "-n", help="Max alarms (1-100)"), +): + """List metric alarms (pass --state ALARM for currently-firing alarms).""" + _emit( + get_client().describe_alarms( + state_value=state, alarm_name_prefix=prefix, limit=limit + ) + ) + + +@app.command("get-alarm-history") +def get_alarm_history( + alarm: str = typer.Option(None, "--alarm", "-a", help="Restrict to one alarm name"), + limit: int = typer.Option(50, "--limit", "-n", help="Max items (1-100)"), +): + """Get state-change history for an alarm (or across all alarms).""" + _emit(get_client().get_alarm_history(alarm_name=alarm, limit=limit)) + + +if __name__ == "__main__": + app() diff --git a/tools/infra/cloudwatch/pyproject.toml b/tools/infra/cloudwatch/pyproject.toml index b09ed8bc..fed5434b 100644 --- a/tools/infra/cloudwatch/pyproject.toml +++ b/tools/infra/cloudwatch/pyproject.toml @@ -5,8 +5,19 @@ version = "0.1.0" requires-python = ">=3.11" dependencies = [ "boto3>=1.34", + "typer>=0.12.0", + "python-dotenv>=1.0.0", ] +[project.scripts] +cloudwatch = "centaur_tool_cloudwatch.cli:app" + +[tool.hatch.build.targets.wheel] +packages = ["."] + +[tool.hatch.build.targets.wheel.sources] +"." = "centaur_tool_cloudwatch" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build"