diff --git a/.github/workflows/dev-scripts-smoke.yml b/.github/workflows/dev-scripts-smoke.yml new file mode 100644 index 0000000..b8905fd --- /dev/null +++ b/.github/workflows/dev-scripts-smoke.yml @@ -0,0 +1,28 @@ +name: dev-scripts-smoke + +# Regression guard for EVO-1609: the `kill-backend` npm script must not match +# (and therefore kill) its own invoking shell, while still matching real backend +# process signatures. Pure pattern assertion — no processes are started or killed. + +on: + pull_request: + paths: + - 'package.json' + - 'scripts/kill-backend-pattern.test.sh' + - '.github/workflows/dev-scripts-smoke.yml' + push: + branches: [main, develop] + paths: + - 'package.json' + - 'scripts/kill-backend-pattern.test.sh' + +jobs: + kill-backend-pattern: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + - name: kill-backend pattern must self-exclude and match real backends + run: npm run test:kill-backend diff --git a/README.md b/README.md index 7002068..0c51ce9 100644 --- a/README.md +++ b/README.md @@ -204,6 +204,48 @@ npm run dev:event-receiver # event-receiver — inbound webhook receiver npm run dev:event-process # event-process — broker-driven event processor ``` +### `dev:single` and the `kill-backend` step + +`dev:single` is `npm run kill-backend && RUN_MODE=single npm run dev`. The +`kill-backend` step clears leftover "production-style" backend processes +(`node dist/main`, `node dist/main.js`, or the watch build `node … dist/main.js`) +before Nest boots, using: + +```bash +pkill -u "$(id -u)" -f '[n]ode .*dist/(src/)?main(\.js)?( |$)' +``` + +- The `[n]` class makes the pattern match a real `node …` process but **not** the + literal `[n]ode …` text in `pkill`'s own command line — so the script does not kill + its own `sh -c` parent (this was bug EVO-1609: `dev:single` aborted with `Terminated` + before boot). The self-exclusion protects the immediate shell; a wrapper that echoes + the literal command (e.g. `set -x` / CI logs) can still match. +- `-u "$(id -u)"` scopes the scan to your own processes — it won't kill backends owned + by other users or by containers sharing the host PID namespace. +- `(src/)?` is defensive: Evo Flow's `nest build` emits `dist/main.js` (tsc strips the + `src/` root), but the optional branch keeps the pattern matching a `dist/src/main.js` + layout too (the sibling evo-campaign scaffold / a future monorepo build). +- `main(\.js)?( |$)` blocks lookalikes such as `dist/maintenance.js` / `dist/main-old.js`. + +Read-only preview of what `kill-backend` would target — should list only real +`node …dist/…main` processes, never the shell running the command: + +```bash +pgrep -af '[n]ode .*dist/(src/)?main(\.js)?( |$)' +``` + +The ERE is regression-guarded by `npm run test:kill-backend` +(`scripts/kill-backend-pattern.test.sh`, also run in CI via +`.github/workflows/dev-scripts-smoke.yml`) — it asserts the pattern matches the real +prod/Docker/dev-watch signatures and never matches its own command line. + +> **Caveat.** `kill-backend` targets detached `node` processes; a foreground +> `dev:single` / `nest start --watch` session should be stopped with Ctrl-C — if it +> keeps holding the port, the next boot can fail with `EADDRINUSE`. Separately, a +> `RUN_MODE=single` boot may still stop further down at the missing Kafka topic +> `journey_trigger_kafka_queue` (see EVO-1571 / EVO-1200) — that is independent of this +> fix. + --- ## Architecture diff --git a/package.json b/package.json index 8b65f10..bf40262 100644 --- a/package.json +++ b/package.json @@ -20,12 +20,13 @@ "dev:campaign-sender": "RUN_MODE=campaign-sender npm run dev", "dev:event-receiver": "RUN_MODE=event-receiver npm run dev", "dev:event-process": "RUN_MODE=event-process npm run dev", - "kill-backend": "pkill -f 'node.*backend.*dist/main' 2>/dev/null || true", + "kill-backend": "pkill -u \"$(id -u)\" -f '[n]ode .*dist/(src/)?main(\\.js)?( |$)' 2>/dev/null || true", "debug": "nest start --debug --watch", "start:prod": "node dist/main", "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix", "typecheck": "tsc -b --noEmit", "test": "jest", + "test:kill-backend": "bash scripts/kill-backend-pattern.test.sh", "test:watch": "jest --watch", "test:cov": "jest --coverage", "test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand", diff --git a/scripts/kill-backend-pattern.test.sh b/scripts/kill-backend-pattern.test.sh new file mode 100755 index 0000000..bbafd9f --- /dev/null +++ b/scripts/kill-backend-pattern.test.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# +# Regression guard for EVO-1609. +# +# The `kill-backend` npm script runs `pkill -f ''`. Because `pkill -f` is +# unanchored over each process's FULL command line, a sloppy ERE matches the very +# shell that invokes pkill (the literal pattern text sits in that shell's argv) and +# self-terminates `dev:single` before it can boot. +# +# This test extracts the ERE straight from package.json and asserts, with grep (no +# processes are killed), that it: +# * matches the real backend process signatures (prod + dev-watch), and +# * does NOT match its own `kill-backend` command line -> the deterministic, +# cross-platform proxy for "it will not kill its own parent shell". +# +# Run locally: npm run test:kill-backend (or: bash scripts/kill-backend-pattern.test.sh) + +set -u +cd "$(dirname "$0")/.." + +KILL_CMD="$(node -e "process.stdout.write(require('./package.json').scripts['kill-backend'])")" +PATTERN="$(printf '%s' "$KILL_CMD" | sed -E "s/.*-f '([^']*)'.*/\1/")" + +if [ -z "$PATTERN" ] || [ "$PATTERN" = "$KILL_CMD" ]; then + echo "FAIL: could not extract the -f '' pattern from kill-backend script value:" + echo " $KILL_CMD" + exit 1 +fi + +echo "kill-backend script : $KILL_CMD" +echo "extracted ERE : $PATTERN" +echo + +FAIL=0 +assert_match() { if printf '%s' "$2" | grep -Eq "$PATTERN"; then echo "ok match : $1"; else echo "FAIL want match : $1 -> '$2'"; FAIL=1; fi; } +assert_nomatch() { if printf '%s' "$2" | grep -Eq "$PATTERN"; then echo "FAIL want NO match: $1 -> '$2'"; FAIL=1; else echo "ok no-match : $1"; fi; } + +# --- must match: the real backend process signatures (evo-flow emits dist/main.js; +# tsc strips the src/ root since every input lives under src/) --- +assert_match "start:prod" "node dist/main" +assert_match "docker CMD" "node dist/main.js" +assert_match "dumb-init wrapper" "dumb-init -- node dist/main.js" +assert_match "nest start --watch" "node --enable-source-maps dist/main.js" +assert_match "with RUN_MODE + args" "RUN_MODE=single node dist/main.js --foo" +# defensive: the (src/)? branch also covers a dist/src/main.js layout (sibling +# evo-campaign scaffold / a future monorepo build) so the script stays portable. +assert_match "alt dist/src/ layout" "node --enable-source-maps dist/src/main.js" + +# --- must NOT match: lookalikes (no false positives) --- +assert_nomatch "maintenance script" "node dist/maintenance.js" +assert_nomatch "main-old artifact" "node dist/main-old.js" + +# --- must NOT match: its own command line (the self-kill guard for EVO-1609) --- +assert_nomatch "self (kill-backend cmd)" "$KILL_CMD" +assert_nomatch "self (sh -c wrapper)" "sh -c $KILL_CMD" + +echo +if [ "$FAIL" -ne 0 ]; then + echo "RESULT: FAILED — kill-backend pattern regressed (see EVO-1609)." + exit 1 +fi +echo "RESULT: PASS"