diff --git a/.github/workflows/docs-links-pr.yaml b/.github/workflows/docs-links-pr.yaml new file mode 100644 index 000000000..3d43f062e --- /dev/null +++ b/.github/workflows/docs-links-pr.yaml @@ -0,0 +1,63 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +name: Docs Links PR + +on: + pull_request: + branches: [main] + types: [opened, reopened, synchronize] + paths: + - "**/*.md" + - ".github/workflows/docs-links-pr.yaml" + - "test/e2e/e2e-cloud-experimental/check-docs.sh" + +permissions: + contents: read + +jobs: + markdown-links: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Determine changed markdown files + id: changed + shell: bash + run: | + set -euo pipefail + base="${{ github.event.pull_request.base.sha }}" + head="${{ github.event.pull_request.head.sha }}" + mapfile -t md_files < <( + git diff --name-only --diff-filter=ACMR "$base" "$head" -- \ + '*.md' \ + ':(exclude)node_modules/**' \ + ':(exclude)dist/**' \ + ':(exclude)vendor/**' \ + ':(exclude)build/**' \ + | LC_ALL=C sort -u + ) + + if [[ "${#md_files[@]}" -eq 0 ]]; then + echo "has_files=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "has_files=true" >> "$GITHUB_OUTPUT" + { + echo "files<> "$GITHUB_OUTPUT" + + - name: Run markdown link checker + if: steps.changed.outputs.has_files == 'true' + shell: bash + run: | + set -euo pipefail + mapfile -t md_files <<< "${{ steps.changed.outputs.files }}" + bash test/e2e/e2e-cloud-experimental/check-docs.sh --only-links --local-only "${md_files[@]}" diff --git a/test/check-docs-links.test.js b/test/check-docs-links.test.js new file mode 100644 index 000000000..31242ae4d --- /dev/null +++ b/test/check-docs-links.test.js @@ -0,0 +1,129 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const CHECK_DOCS = path.join(import.meta.dirname, "e2e", "e2e-cloud-experimental", "check-docs.sh"); + +function runCheckDocs(filePath) { + return spawnSync("bash", [CHECK_DOCS, "--only-links", "--local-only", filePath], { + encoding: "utf-8", + }); +} + +describe("check-docs link validation", () => { + it("reports broken local markdown links with source line numbers", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync(path.join(tempDir, "exists.md"), "# ok\n"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "[working](./exists.md)", + "[broken](./missing.md)", + "```md", + "[ignored](./inside-code-fence.md)", + "```", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(1); + expect(`${result.stdout}${result.stderr}`).toContain(`broken local link in ${mdPath}:4 -> ./missing.md`); + expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md"); + }); + + it("ignores broken links inside fenced code blocks", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-codefence-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "```md", + "[example](./missing.md)", + "```", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(0); + }); + + it("ignores broken links inside tilde-fenced code blocks", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-tildefence-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "~~~md", + "[example](./missing.md)", + "~~~", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(0); + }); + + it("keeps scanning disabled for mismatched or shorter fence closers", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-mixedfence-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "~~~~md", + "[still-ignored](./inside-code-fence.md)", + "```", + "[also-ignored](./inside-shorter-fence.md)", + "~~~~", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(0); + expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md"); + expect(`${result.stdout}${result.stderr}`).not.toContain("inside-shorter-fence.md"); + }); + + it("does not treat fence markers with trailing text as closing fences", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-fenceclose-")); + const mdPath = path.join(tempDir, "guide.md"); + fs.writeFileSync( + mdPath, + [ + "# Guide", + "", + "```md", + "```not-a-close", + "[still-ignored](./inside-code-fence.md)", + "```", + "", + ].join("\n"), + ); + + const result = runCheckDocs(mdPath); + + expect(result.status).toBe(0); + expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md"); + }); +}); diff --git a/test/e2e/e2e-cloud-experimental/check-docs.sh b/test/e2e/e2e-cloud-experimental/check-docs.sh index 973476a44..71c1a1190 100755 --- a/test/e2e/e2e-cloud-experimental/check-docs.sh +++ b/test/e2e/e2e-cloud-experimental/check-docs.sh @@ -144,7 +144,7 @@ run_cli_check() { log "[cli] excluded: openshell, /nemoclaw slash, deprecated nemoclaw setup (not in --help)" log "[cli] phase 1/2: extract normalized usage lines from --help" - NO_COLOR=1 "$NODE" "$CLI_JS" --help 2>&1 | perl -CS -ne ' + NO_COLOR=1 "$NODE" "$CLI_JS" --help 2>&1 | LC_ALL=C perl -CS -ne ' s/\e\[[0-9;]*m//g; next unless /^\s*nemoclaw\s+/; if (/^\s*nemoclaw\s+(.+)/) { @@ -168,7 +168,7 @@ run_cli_check() { # log text: backticks are documentation markers, not command substitution log '[cli] phase 2/2: extract ### `nemoclaw …` headings from commands reference' # Allow optional MyST suffix on the same line, e.g. ### `nemoclaw onboard` {#anchor} - grep -E '^### `nemoclaw ' "$COMMANDS_MD" | perl -CS -ne ' + grep -E '^### `nemoclaw ' "$COMMANDS_MD" | LC_ALL=C perl -CS -ne ' if (/^### `([^`]+)`\s*(?:\{[^}]+\})?\s*$/) { print "$1\n"; } ' | LC_ALL=C sort -u >"$_tmp/doc.txt" @@ -221,16 +221,30 @@ collect_default_docs() { } extract_targets() { - perl -CS -ne ' - if (/^\s*```/) { $in = !$in; next; } + LC_ALL=C perl -CS -ne ' + if (/^\s*(`{3,}|~{3,})(.*)$/) { + my $fence = $1; + my $rest = $2; + my $char = substr($fence, 0, 1); + my $length = length($fence); + if (!$in) { + ($in, $fch, $flen) = (1, $char, $length); + next; + } + if ($char eq $fch && $length >= $flen && $rest =~ /^\s*$/) { + ($in, $fch, $flen) = (0, "", 0); + next; + } + } next if $in; - while (/\!?\[[^\]]*\]\(([^)\s]+)(?:\s+["'"'"'][^)"'"'"']*["'"'"'])?\)/g) { print "$1\n"; } - while (/<(https?:[^>\s]+)>/g) { print "$1\n"; } + my $line = $.; + while (/\!?\[[^\]]*\]\(([^)\s]+)(?:\s+["'"'"'][^)"'"'"']*["'"'"'])?\)/g) { print $line . "\t" . $1 . "\n"; } + while (/<(https?:[^>\s]+)>/g) { print $line . "\t" . $1 . "\n"; } ' -- "$1" } check_local_ref() { - local md_path="$1" target="$2" + local md_path="$1" line_no="$2" target="$3" local stripped stripped="${target%%\#*}" @@ -251,7 +265,7 @@ check_local_ref() { if (cd "$(dirname "$md_path")" && [[ -e "$stripped" ]]); then return 0 fi - echo "check-docs: [links] broken local link in $md_path -> $target" >&2 + echo "check-docs: [links] broken local link in $md_path:$line_no -> $target" >&2 return 1 } @@ -367,11 +381,11 @@ run_links_check() { failures=1 continue fi - local target rc - while IFS= read -r target || [[ -n "$target" ]]; do + local line_no target rc + while IFS=$'\t' read -r line_no target || [[ -n "${target:-}" ]]; do [[ -z "$target" ]] && continue set +e - check_local_ref "$md" "$target" + check_local_ref "$md" "$line_no" "$target" rc=$? set -e if [[ "$rc" -eq 0 ]]; then