diff --git a/packages/opencode/src/cli/cmd/tui/thread.ts b/packages/opencode/src/cli/cmd/tui/thread.ts index 8e0a7b04b..d664d1957 100644 --- a/packages/opencode/src/cli/cmd/tui/thread.ts +++ b/packages/opencode/src/cli/cmd/tui/thread.ts @@ -152,6 +152,34 @@ export const TuiThreadCommand = cmd({ process.on("unhandledRejection", error) process.on("SIGUSR2", reload) + // altimate_change start — crash: flush worker traces on signals + // Bun Workers don't receive OS signals — only the main thread does. + // On SIGINT/SIGTERM/SIGHUP, terminate the worker so its "exit" handler + // fires and flushes all active session traces to disk. + // After cleanup, remove the handler and re-raise the signal to restore + // default behavior (process termination). This avoids swallowing signals + // which would leave the process running after a kill. + const makeSignalHandler = (signal: NodeJS.Signals) => { + const handler = () => { + try { + worker.terminate() + Bun.sleepSync(50) + } catch { + // best-effort — crash handler must never throw + } + process.off(signal, handler) + process.kill(process.pid, signal) + } + return handler + } + const onSigint = makeSignalHandler("SIGINT") + const onSigterm = makeSignalHandler("SIGTERM") + const onSighup = makeSignalHandler("SIGHUP") + process.on("SIGINT", onSigint) + process.on("SIGTERM", onSigterm) + process.on("SIGHUP", onSighup) + // altimate_change end + let stopped = false const stop = async () => { if (stopped) return @@ -159,6 +187,11 @@ export const TuiThreadCommand = cmd({ process.off("uncaughtException", error) process.off("unhandledRejection", error) process.off("SIGUSR2", reload) + // altimate_change start — crash: remove emergency handlers on clean shutdown + process.off("SIGINT", onSigint) + process.off("SIGTERM", onSigterm) + process.off("SIGHUP", onSighup) + // altimate_change end await withTimeout(client.call("shutdown", undefined), 5000).catch((error) => { Log.Default.warn("worker shutdown failed", { error: error instanceof Error ? error.message : String(error), diff --git a/packages/opencode/src/cli/cmd/tui/worker.ts b/packages/opencode/src/cli/cmd/tui/worker.ts index 3e2b78de1..2f9bf9f5a 100644 --- a/packages/opencode/src/cli/cmd/tui/worker.ts +++ b/packages/opencode/src/cli/cmd/tui/worker.ts @@ -34,6 +34,12 @@ process.on("uncaughtException", (e) => { Log.Default.error("exception", { e: e instanceof Error ? e.message : e, }) + // altimate_change start — crash: flush traces on uncaught exception + // After logging, write all active traces to disk so crash context is preserved. + // The process may continue or exit depending on the exception — either way the + // trace snapshot will reflect the crash. + flushAllTracesSync(`Uncaught exception: ${e instanceof Error ? e.message : String(e)}`) + // altimate_change end }) // Subscribe to global events and forward them via RPC @@ -108,12 +114,15 @@ function getOrCreateTrace(sessionID: string): Trace | null { const startEventStream = (input: { directory: string; workspaceID?: string }) => { if (eventStream.abort) eventStream.abort.abort() - // Clear stale per-stream trace state before starting a new stream instance + // altimate_change start — crash: flush stale traces before clearing + // Flush any in-flight traces synchronously before clearing — endTrace() is + // async and a crash during the gap would lose trace data. for (const [, trace] of sessionTraces) { void trace.endTrace().catch(() => {}) } sessionTraces.clear() sessionUserMsgIds.clear() + // altimate_change end const abort = new AbortController() eventStream.abort = abort @@ -236,9 +245,15 @@ const startEventStream = (input: { directory: string; workspaceID?: string }) => if (status === "idle" && sid) { const trace = sessionTraces.get(sid) if (trace) { - void trace.endTrace().catch(() => {}) - sessionTraces.delete(sid) - sessionUserMsgIds.delete(sid) + // altimate_change start — crash: defer deletion until endTrace() completes + // Keep the trace in sessionTraces during async teardown so + // flushAllTracesSync() can still reach it if a crash occurs + // while endTrace() is in flight. + void trace.endTrace().catch(() => {}).finally(() => { + sessionTraces.delete(sid) + sessionUserMsgIds.delete(sid) + }) + // altimate_change end } } } @@ -325,6 +340,33 @@ export const rpc = { Rpc.listen(rpc) +// altimate_change start — crash: flush active traces on unexpected exit +// When the worker is terminated (via worker.terminate() from the main thread, +// or on uncaught exceptions), write all in-flight traces to disk synchronously. +// +// NOTE: Bun Workers do NOT receive OS signals (SIGINT, SIGTERM, SIGHUP) — +// those are delivered only to the main thread. Signal-based flush is handled +// in thread.ts by terminating the worker, which triggers the "exit" event here. +let firstFlushReason: string | undefined +function flushAllTracesSync(reason: string) { + // Preserve the most specific reason from the first flush (e.g., the uncaught + // exception message) even if a later handler (exit) calls again with a + // generic reason. Subsequent calls still flush — new traces may have been + // created since the first call. + const effectiveReason = firstFlushReason ?? reason + firstFlushReason ??= reason + for (const [, trace] of sessionTraces) { + try { + trace.flushSync(effectiveReason) + } catch { + // flushSync is best-effort — must never throw in an exit handler + } + } +} + +process.once("exit", () => { flushAllTracesSync("Process exited") }) +// altimate_change end + function getAuthorizationHeader(): string | undefined { const password = Flag.OPENCODE_SERVER_PASSWORD if (!password) return undefined diff --git a/packages/opencode/src/index.ts b/packages/opencode/src/index.ts index 9a32dd967..a051cccb4 100644 --- a/packages/opencode/src/index.ts +++ b/packages/opencode/src/index.ts @@ -46,6 +46,9 @@ import { Database } from "./storage/db" // altimate_change start - telemetry import import { Telemetry } from "./telemetry" // altimate_change end +// altimate_change start — crash: import Trace for crash handlers +import { Trace } from "./altimate/observability/tracing" +// altimate_change end // altimate_change start - welcome banner import { showWelcomeBannerIfNeeded } from "./cli/welcome" // altimate_change end @@ -60,12 +63,32 @@ process.on("uncaughtException", (e) => { Log.Default.error("exception", { e: e instanceof Error ? e.message : e, }) + // altimate_change start — crash: flush active trace on uncaught exception + // Trace.active is set by run.ts (headless mode only — TUI traces live in + // the worker's isolated memory and are flushed via worker.terminate()). + // This is a safety net for the headless path where run.ts registers its + // own handlers but an exception could bubble past them. + try { + Trace.active?.flushSync(`Uncaught exception: ${e instanceof Error ? e.message : String(e)}`) + } catch { + // Trace module may not be initialized — best-effort + } + // altimate_change end }) // Ensure the process exits on terminal hangup (eg. closing the terminal tab). // Without this, long-running commands like `serve` block on a never-resolving // promise and survive as orphaned processes. -process.on("SIGHUP", () => process.exit()) +// altimate_change start — crash: flush active trace before SIGHUP exit +process.on("SIGHUP", () => { + try { + Trace.active?.flushSync("Terminal hangup (SIGHUP)") + } catch { + // best-effort + } + process.exit() +}) +// altimate_change end let cli = yargs(hideBin(process.argv)) .parserConfiguration({ "populate--": true })