From e4fa005bd5fbebaed942adfd28b30458141066b1 Mon Sep 17 00:00:00 2001 From: 13ernkastel Date: Tue, 31 Mar 2026 13:43:53 +0800 Subject: [PATCH 1/6] ci(docs): check changed markdown links on pull requests --- .github/workflows/docs-links-pr.yaml | 55 ++++++++++++++++ test/check-docs-links.test.js | 63 +++++++++++++++++++ test/e2e/e2e-cloud-experimental/check-docs.sh | 15 ++--- 3 files changed, 126 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/docs-links-pr.yaml create mode 100644 test/check-docs-links.test.js diff --git a/.github/workflows/docs-links-pr.yaml b/.github/workflows/docs-links-pr.yaml new file mode 100644 index 000000000..f4c885588 --- /dev/null +++ b/.github/workflows/docs-links-pr.yaml @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +name: Docs Links PR + +on: + pull_request: + branches: [main] + types: [opened, reopened, synchronize] + paths: + - "**/*.md" + - ".github/workflows/docs-links-pr.yaml" + - "test/e2e/e2e-cloud-experimental/check-docs.sh" + +permissions: + contents: read + +jobs: + markdown-links: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Determine changed markdown files + id: changed + shell: bash + run: | + set -euo pipefail + base="${{ github.event.pull_request.base.sha }}" + head="${{ github.event.pull_request.head.sha }}" + mapfile -t md_files < <(git diff --name-only --diff-filter=ACMR "$base" "$head" -- '*.md' | LC_ALL=C sort -u) + + if [[ "${#md_files[@]}" -eq 0 ]]; then + echo "has_files=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "has_files=true" >> "$GITHUB_OUTPUT" + { + echo "files<> "$GITHUB_OUTPUT" + + - name: Run markdown link checker + if: steps.changed.outputs.has_files == 'true' + shell: bash + run: | + set -euo pipefail + mapfile -t md_files <<< "${{ steps.changed.outputs.files }}" + bash test/e2e/e2e-cloud-experimental/check-docs.sh --only-links --local-only "${md_files[@]}" diff --git a/test/check-docs-links.test.js b/test/check-docs-links.test.js new file mode 100644 index 000000000..f4ba6dcbb --- /dev/null +++ b/test/check-docs-links.test.js @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const CHECK_DOCS = path.join(import.meta.dirname, "e2e", "e2e-cloud-experimental", "check-docs.sh"); + +function runCheckDocs(filePath) { + return spawnSync("bash", [CHECK_DOCS, "--only-links", "--local-only", filePath], { + encoding: "utf-8", + }); +} + +describe("check-docs link validation", () => { + it("reports broken local markdown links with source line numbers", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync(path.join(tempDir, "exists.md"), "# ok\n"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "[working](./exists.md)", + "[broken](./missing.md)", + "```md", + "[ignored](./inside-code-fence.md)", + "```", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(1); + expect(`${result.stdout}${result.stderr}`).toContain(`broken local link in ${mdPath}:4 -> ./missing.md`); + expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md"); + }); + + it("ignores broken links inside fenced code blocks", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-codefence-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "```md", + "[example](./missing.md)", + "```", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(0); + }); +}); diff --git a/test/e2e/e2e-cloud-experimental/check-docs.sh b/test/e2e/e2e-cloud-experimental/check-docs.sh index 973476a44..623150ec0 100755 --- a/test/e2e/e2e-cloud-experimental/check-docs.sh +++ b/test/e2e/e2e-cloud-experimental/check-docs.sh @@ -224,13 +224,14 @@ extract_targets() { perl -CS -ne ' if (/^\s*```/) { $in = !$in; next; } next if $in; - while (/\!?\[[^\]]*\]\(([^)\s]+)(?:\s+["'"'"'][^)"'"'"']*["'"'"'])?\)/g) { print "$1\n"; } - while (/<(https?:[^>\s]+)>/g) { print "$1\n"; } + my $line = $.; + while (/\!?\[[^\]]*\]\(([^)\s]+)(?:\s+["'"'"'][^)"'"'"']*["'"'"'])?\)/g) { print $line . "\t" . $1 . "\n"; } + while (/<(https?:[^>\s]+)>/g) { print $line . "\t" . $1 . "\n"; } ' -- "$1" } check_local_ref() { - local md_path="$1" target="$2" + local md_path="$1" line_no="$2" target="$3" local stripped stripped="${target%%\#*}" @@ -251,7 +252,7 @@ check_local_ref() { if (cd "$(dirname "$md_path")" && [[ -e "$stripped" ]]); then return 0 fi - echo "check-docs: [links] broken local link in $md_path -> $target" >&2 + echo "check-docs: [links] broken local link in $md_path:$line_no -> $target" >&2 return 1 } @@ -367,11 +368,11 @@ run_links_check() { failures=1 continue fi - local target rc - while IFS= read -r target || [[ -n "$target" ]]; do + local line_no target rc + while IFS=$'\t' read -r line_no target || [[ -n "${target:-}" ]]; do [[ -z "$target" ]] && continue set +e - check_local_ref "$md" "$target" + check_local_ref "$md" "$line_no" "$target" rc=$? set -e if [[ "$rc" -eq 0 ]]; then From 113bbf44b690bc90c60d50cd1dccf29246764991 Mon Sep 17 00:00:00 2001 From: 13ernkastel Date: Tue, 31 Mar 2026 13:53:30 +0800 Subject: [PATCH 2/6] ci(docs): stabilize doc checker locale handling --- test/e2e/e2e-cloud-experimental/check-docs.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/e2e/e2e-cloud-experimental/check-docs.sh b/test/e2e/e2e-cloud-experimental/check-docs.sh index 623150ec0..0bdc41208 100755 --- a/test/e2e/e2e-cloud-experimental/check-docs.sh +++ b/test/e2e/e2e-cloud-experimental/check-docs.sh @@ -144,7 +144,7 @@ run_cli_check() { log "[cli] excluded: openshell, /nemoclaw slash, deprecated nemoclaw setup (not in --help)" log "[cli] phase 1/2: extract normalized usage lines from --help" - NO_COLOR=1 "$NODE" "$CLI_JS" --help 2>&1 | perl -CS -ne ' + NO_COLOR=1 "$NODE" "$CLI_JS" --help 2>&1 | LC_ALL=C perl -CS -ne ' s/\e\[[0-9;]*m//g; next unless /^\s*nemoclaw\s+/; if (/^\s*nemoclaw\s+(.+)/) { @@ -168,7 +168,7 @@ run_cli_check() { # log text: backticks are documentation markers, not command substitution log '[cli] phase 2/2: extract ### `nemoclaw …` headings from commands reference' # Allow optional MyST suffix on the same line, e.g. ### `nemoclaw onboard` {#anchor} - grep -E '^### `nemoclaw ' "$COMMANDS_MD" | perl -CS -ne ' + grep -E '^### `nemoclaw ' "$COMMANDS_MD" | LC_ALL=C perl -CS -ne ' if (/^### `([^`]+)`\s*(?:\{[^}]+\})?\s*$/) { print "$1\n"; } ' | LC_ALL=C sort -u >"$_tmp/doc.txt" @@ -221,7 +221,7 @@ collect_default_docs() { } extract_targets() { - perl -CS -ne ' + LC_ALL=C perl -CS -ne ' if (/^\s*```/) { $in = !$in; next; } next if $in; my $line = $.; From 1ff2346accfe9ad0530ba344e85e49521248ebc7 Mon Sep 17 00:00:00 2001 From: 13ernkastel Date: Tue, 31 Mar 2026 14:16:35 +0800 Subject: [PATCH 3/6] ci(docs): tighten markdown link filtering --- .github/workflows/docs-links-pr.yaml | 10 +++++++++- test/check-docs-links.test.js | 20 +++++++++++++++++++ test/e2e/e2e-cloud-experimental/check-docs.sh | 2 +- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs-links-pr.yaml b/.github/workflows/docs-links-pr.yaml index f4c885588..3d43f062e 100644 --- a/.github/workflows/docs-links-pr.yaml +++ b/.github/workflows/docs-links-pr.yaml @@ -32,7 +32,15 @@ jobs: set -euo pipefail base="${{ github.event.pull_request.base.sha }}" head="${{ github.event.pull_request.head.sha }}" - mapfile -t md_files < <(git diff --name-only --diff-filter=ACMR "$base" "$head" -- '*.md' | LC_ALL=C sort -u) + mapfile -t md_files < <( + git diff --name-only --diff-filter=ACMR "$base" "$head" -- \ + '*.md' \ + ':(exclude)node_modules/**' \ + ':(exclude)dist/**' \ + ':(exclude)vendor/**' \ + ':(exclude)build/**' \ + | LC_ALL=C sort -u + ) if [[ "${#md_files[@]}" -eq 0 ]]; then echo "has_files=false" >> "$GITHUB_OUTPUT" diff --git a/test/check-docs-links.test.js b/test/check-docs-links.test.js index f4ba6dcbb..3acc18f2b 100644 --- a/test/check-docs-links.test.js +++ b/test/check-docs-links.test.js @@ -60,4 +60,24 @@ describe("check-docs link validation", () => { expect(result.status).toBe(0); }); + + it("ignores broken links inside tilde-fenced code blocks", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-tildefence-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "~~~md", + "[example](./missing.md)", + "~~~", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(0); + }); }); diff --git a/test/e2e/e2e-cloud-experimental/check-docs.sh b/test/e2e/e2e-cloud-experimental/check-docs.sh index 0bdc41208..9fbb31647 100755 --- a/test/e2e/e2e-cloud-experimental/check-docs.sh +++ b/test/e2e/e2e-cloud-experimental/check-docs.sh @@ -222,7 +222,7 @@ collect_default_docs() { extract_targets() { LC_ALL=C perl -CS -ne ' - if (/^\s*```/) { $in = !$in; next; } + if (/^\s*(```|~~~)/) { $in = !$in; next; } next if $in; my $line = $.; while (/\!?\[[^\]]*\]\(([^)\s]+)(?:\s+["'"'"'][^)"'"'"']*["'"'"'])?\)/g) { print $line . "\t" . $1 . "\n"; } From 8af07c859fcd7822f214cc3be0e782dd17e8a2f4 Mon Sep 17 00:00:00 2001 From: 13ernkastel Date: Tue, 31 Mar 2026 14:26:21 +0800 Subject: [PATCH 4/6] test(docs): handle mixed markdown fence delimiters --- test/check-docs-links.test.js | 24 +++++++++++++++++++ test/e2e/e2e-cloud-experimental/check-docs.sh | 14 ++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/test/check-docs-links.test.js b/test/check-docs-links.test.js index 3acc18f2b..c4dec2be3 100644 --- a/test/check-docs-links.test.js +++ b/test/check-docs-links.test.js @@ -80,4 +80,28 @@ describe("check-docs link validation", () => { expect(result.status).toBe(0); }); + + it("keeps scanning disabled for mismatched or shorter fence closers", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-mixedfence-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "~~~~md", + "[still-ignored](./inside-code-fence.md)", + "```", + "[also-ignored](./inside-shorter-fence.md)", + "~~~~", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(0); + expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md"); + expect(`${result.stdout}${result.stderr}`).not.toContain("inside-shorter-fence.md"); + }); }); diff --git a/test/e2e/e2e-cloud-experimental/check-docs.sh b/test/e2e/e2e-cloud-experimental/check-docs.sh index 9fbb31647..731acf54e 100755 --- a/test/e2e/e2e-cloud-experimental/check-docs.sh +++ b/test/e2e/e2e-cloud-experimental/check-docs.sh @@ -222,7 +222,19 @@ collect_default_docs() { extract_targets() { LC_ALL=C perl -CS -ne ' - if (/^\s*(```|~~~)/) { $in = !$in; next; } + if (/^\s*(`{3,}|~{3,})/) { + my $fence = $1; + my $char = substr($fence, 0, 1); + my $length = length($fence); + if (!$in) { + ($in, $fch, $flen) = (1, $char, $length); + next; + } + if ($char eq $fch && $length >= $flen) { + ($in, $fch, $flen) = (0, "", 0); + next; + } + } next if $in; my $line = $.; while (/\!?\[[^\]]*\]\(([^)\s]+)(?:\s+["'"'"'][^)"'"'"']*["'"'"'])?\)/g) { print $line . "\t" . $1 . "\n"; } From 8d724c5525a2dbdb21ac491a9b6e04db095ba211 Mon Sep 17 00:00:00 2001 From: 13ernkastel Date: Tue, 31 Mar 2026 14:35:05 +0800 Subject: [PATCH 5/6] test(docs): reject trailing text on fence closers --- test/check-docs-links.test.js | 22 +++++++++++++++++++ test/e2e/e2e-cloud-experimental/check-docs.sh | 5 +++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/test/check-docs-links.test.js b/test/check-docs-links.test.js index c4dec2be3..31242ae4d 100644 --- a/test/check-docs-links.test.js +++ b/test/check-docs-links.test.js @@ -104,4 +104,26 @@ describe("check-docs link validation", () => { expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md"); expect(`${result.stdout}${result.stderr}`).not.toContain("inside-shorter-fence.md"); }); + + it("does not treat fence markers with trailing text as closing fences", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-fenceclose-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "```md", + "```not-a-close", + "[still-ignored](./inside-code-fence.md)", + "```", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(0); + expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md"); + }); }); diff --git a/test/e2e/e2e-cloud-experimental/check-docs.sh b/test/e2e/e2e-cloud-experimental/check-docs.sh index 731acf54e..71c1a1190 100755 --- a/test/e2e/e2e-cloud-experimental/check-docs.sh +++ b/test/e2e/e2e-cloud-experimental/check-docs.sh @@ -222,15 +222,16 @@ collect_default_docs() { extract_targets() { LC_ALL=C perl -CS -ne ' - if (/^\s*(`{3,}|~{3,})/) { + if (/^\s*(`{3,}|~{3,})(.*)$/) { my $fence = $1; + my $rest = $2; my $char = substr($fence, 0, 1); my $length = length($fence); if (!$in) { ($in, $fch, $flen) = (1, $char, $length); next; } - if ($char eq $fch && $length >= $flen) { + if ($char eq $fch && $length >= $flen && $rest =~ /^\s*$/) { ($in, $fch, $flen) = (0, "", 0); next; } From be3c9654e45bbb4a9f13a606ae14e0ad9d4434d5 Mon Sep 17 00:00:00 2001 From: 13ernkastel Date: Tue, 31 Mar 2026 15:36:57 +0800 Subject: [PATCH 6/6] chore(ci): rerun PR checks after DCO sign-off update Signed-off-by: 13ernkastel