diff --git a/bin/pty b/bin/pty index ad5fcbb..08a9fbf 100755 --- a/bin/pty +++ b/bin/pty @@ -1,8 +1,9 @@ #!/usr/bin/env node import { existsSync } from 'node:fs'; -import { spawnSync } from 'node:child_process'; +import { spawn } from 'node:child_process'; import { fileURLToPath } from 'node:url'; import { dirname, join } from 'node:path'; +import { constants as osConstants } from 'node:os'; const __dirname = dirname(fileURLToPath(import.meta.url)); const cli = join(__dirname, '..', 'dist', 'cli.js'); @@ -12,9 +13,38 @@ if (!existsSync(cli)) { process.exit(1); } -const result = spawnSync(process.execPath, [cli, ...process.argv.slice(2)], { +// `spawn` (not `spawnSync`) so the wrapper stays event-loop-live and can +// react to signals it receives. Without this, signals delivered to the +// wrapper (e.g. systemd's SIGTERM under KillMode=process) terminated the +// wrapper without ever reaching the child supervisor, leaving an orphaned +// supervisor that kept holding `supervisor.lock` and made every subsequent +// service restart fail with "another supervisor is already running". +const child = spawn(process.execPath, [cli, ...process.argv.slice(2)], { stdio: 'inherit', env: process.env, }); -process.exit(result.status ?? 1); +const FORWARDED_SIGNALS = ['SIGTERM', 'SIGINT', 'SIGHUP', 'SIGQUIT', 'SIGUSR1', 'SIGUSR2']; +for (const sig of FORWARDED_SIGNALS) { + process.on(sig, () => { + // Forward and let the child decide when to exit. The wrapper's own + // exit is driven by the child's 'exit' event so the child has time + // to flush before we propagate its status. + try { child.kill(sig); } catch {} + }); +} + +child.on('exit', (code, signal) => { + if (signal) { + // Mirror shell convention (128 + signum) so callers can distinguish + // signal death from a nonzero exit code. + const signum = osConstants.signals[signal] ?? 0; + process.exit(128 + signum); + } + process.exit(code ?? 1); +}); + +child.on('error', (err) => { + console.error(`pty: failed to spawn cli: ${err.message}`); + process.exit(1); +}); diff --git a/tests/wrapper-signal-forwarding.test.ts b/tests/wrapper-signal-forwarding.test.ts new file mode 100644 index 0000000..7806570 --- /dev/null +++ b/tests/wrapper-signal-forwarding.test.ts @@ -0,0 +1,97 @@ +// Verifies that bin/pty forwards SIGTERM/SIGINT to the inner cli.js child. +// Without forwarding, systemd's KillMode=process leaves the inner supervisor +// orphaned and still holding supervisor.lock — the new unit invocation then +// fails with "another supervisor is already running". + +import { describe, it, expect, afterEach } from "vitest"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; +import { spawn } from "node:child_process"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const wrapperPath = path.join(__dirname, "..", "bin", "pty"); +const nodeBin = process.execPath; + +const sessionDirs: string[] = []; +const trackedPids: number[] = []; +afterEach(() => { + // Belt-and-braces: if a test failed mid-way, kill any inner cli.js it left + // behind so the next run isn't poisoned by an orphan holding the lock. + for (const pid of trackedPids) { + try { process.kill(pid, "SIGKILL"); } catch {} + } + trackedPids.length = 0; + for (const d of sessionDirs) { + try { fs.rmSync(d, { recursive: true, force: true }); } catch {} + } + sessionDirs.length = 0; +}); + +function makeSessionDir(): string { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "pty-wrap-")); + sessionDirs.push(dir); + return dir; +} + +/** Wait until predicate returns true or `timeoutMs` elapses. */ +async function waitFor(predicate: () => boolean, timeoutMs: number): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + if (predicate()) return true; + await new Promise((r) => setTimeout(r, 25)); + } + return predicate(); +} + +function isAlive(pid: number): boolean { + try { process.kill(pid, 0); return true; } catch { return false; } +} + +describe("bin/pty signal forwarding", () => { + it("propagates SIGTERM to the inner cli.js so the supervisor releases its lock", async () => { + const sessionDir = makeSessionDir(); + + // `supervisor start` is the canonical long-lived command and the one the + // dev3 regression hit. Its SIGTERM handler calls Supervisor.stop() which + // releases supervisor.lock — so a clean shutdown leaves no lock file. + const wrapper = spawn(nodeBin, [wrapperPath, "supervisor", "start"], { + env: { ...process.env, PTY_SESSION_DIR: sessionDir }, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + wrapper.stdout?.on("data", (d: Buffer) => { stdout += d.toString(); }); + wrapper.stderr?.on("data", (d: Buffer) => { stderr += d.toString(); }); + + let exitCode: number | null = null; + let exitSignal: NodeJS.Signals | null = null; + wrapper.on("exit", (c, s) => { exitCode = c; exitSignal = s; }); + + // Wait for the supervisor to fully start (it writes supervisor.pid). + const pidPath = path.join(sessionDir, "supervisor", "supervisor.pid"); + const lockPath = path.join(sessionDir, "supervisor.lock"); + const ready = await waitFor(() => fs.existsSync(pidPath) && fs.existsSync(lockPath), 5000); + expect(ready, `supervisor never started; stdout=${stdout} stderr=${stderr}`).toBe(true); + + const innerPid = parseInt(fs.readFileSync(pidPath, "utf-8").trim(), 10); + trackedPids.push(innerPid); + expect(innerPid).toBeGreaterThan(0); + expect(innerPid).not.toBe(wrapper.pid); + expect(isAlive(innerPid)).toBe(true); + + // The actual test: SIGTERM the wrapper, expect the inner cli.js to also die. + wrapper.kill("SIGTERM"); + + const wrapperExited = await waitFor(() => exitCode !== null || exitSignal !== null, 5000); + expect(wrapperExited, "wrapper did not exit after SIGTERM").toBe(true); + + const innerDied = await waitFor(() => !isAlive(innerPid), 5000); + expect(innerDied, `inner cli.js (pid ${innerPid}) survived wrapper SIGTERM — signal not forwarded`).toBe(true); + + // Clean shutdown should release the lock; a SIGKILLed supervisor would leave it. + expect(fs.existsSync(lockPath), "supervisor.lock not released — child shutdown was not graceful").toBe(false); + }, 20000); +});