Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions .github/workflows/docs-links-pr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

name: Docs Links PR

on:
pull_request:
branches: [main]
types: [opened, reopened, synchronize]
paths:
- "**/*.md"
- ".github/workflows/docs-links-pr.yaml"
- "test/e2e/e2e-cloud-experimental/check-docs.sh"

permissions:
contents: read

jobs:
markdown-links:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Checkout
uses: actions/checkout@v6
with:
fetch-depth: 0

- name: Determine changed markdown files
id: changed
shell: bash
run: |
set -euo pipefail
base="${{ github.event.pull_request.base.sha }}"
head="${{ github.event.pull_request.head.sha }}"
mapfile -t md_files < <(
git diff --name-only --diff-filter=ACMR "$base" "$head" -- \
'*.md' \
':(exclude)node_modules/**' \
':(exclude)dist/**' \
':(exclude)vendor/**' \
':(exclude)build/**' \
| LC_ALL=C sort -u
)

if [[ "${#md_files[@]}" -eq 0 ]]; then
echo "has_files=false" >> "$GITHUB_OUTPUT"
exit 0
fi

echo "has_files=true" >> "$GITHUB_OUTPUT"
{
echo "files<<EOF"
printf '%s\n' "${md_files[@]}"
echo "EOF"
} >> "$GITHUB_OUTPUT"

- name: Run markdown link checker
if: steps.changed.outputs.has_files == 'true'
shell: bash
run: |
set -euo pipefail
mapfile -t md_files <<< "${{ steps.changed.outputs.files }}"
bash test/e2e/e2e-cloud-experimental/check-docs.sh --only-links --local-only "${md_files[@]}"
129 changes: 129 additions & 0 deletions test/check-docs-links.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { describe, it, expect } from "vitest";
import { spawnSync } from "node:child_process";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";

const CHECK_DOCS = path.join(import.meta.dirname, "e2e", "e2e-cloud-experimental", "check-docs.sh");

function runCheckDocs(filePath) {
return spawnSync("bash", [CHECK_DOCS, "--only-links", "--local-only", filePath], {
encoding: "utf-8",
});
}

describe("check-docs link validation", () => {
it("reports broken local markdown links with source line numbers", () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-"));
const mdPath = path.join(tempDir, "guide.md");
fs.writeFileSync(path.join(tempDir, "exists.md"), "# ok\n");
fs.writeFileSync(
mdPath,
[
"# Guide",
"",
"[working](./exists.md)",
"[broken](./missing.md)",
"```md",
"[ignored](./inside-code-fence.md)",
"```",
"",
].join("\n"),
);

const result = runCheckDocs(mdPath);

expect(result.status).toBe(1);
expect(`${result.stdout}${result.stderr}`).toContain(`broken local link in ${mdPath}:4 -> ./missing.md`);
expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md");
});

it("ignores broken links inside fenced code blocks", () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-codefence-"));
const mdPath = path.join(tempDir, "guide.md");
fs.writeFileSync(
mdPath,
[
"# Guide",
"",
"```md",
"[example](./missing.md)",
"```",
"",
].join("\n"),
);

const result = runCheckDocs(mdPath);

expect(result.status).toBe(0);
});

it("ignores broken links inside tilde-fenced code blocks", () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-tildefence-"));
const mdPath = path.join(tempDir, "guide.md");
fs.writeFileSync(
mdPath,
[
"# Guide",
"",
"~~~md",
"[example](./missing.md)",
"~~~",
"",
].join("\n"),
);

const result = runCheckDocs(mdPath);

expect(result.status).toBe(0);
});

it("keeps scanning disabled for mismatched or shorter fence closers", () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-mixedfence-"));
const mdPath = path.join(tempDir, "guide.md");
fs.writeFileSync(
mdPath,
[
"# Guide",
"",
"~~~~md",
"[still-ignored](./inside-code-fence.md)",
"```",
"[also-ignored](./inside-shorter-fence.md)",
"~~~~",
"",
].join("\n"),
);

const result = runCheckDocs(mdPath);

expect(result.status).toBe(0);
expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md");
expect(`${result.stdout}${result.stderr}`).not.toContain("inside-shorter-fence.md");
});

it("does not treat fence markers with trailing text as closing fences", () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-check-docs-fenceclose-"));
const mdPath = path.join(tempDir, "guide.md");
fs.writeFileSync(
mdPath,
[
"# Guide",
"",
"```md",
"```not-a-close",
"[still-ignored](./inside-code-fence.md)",
"```",
"",
].join("\n"),
);

const result = runCheckDocs(mdPath);

expect(result.status).toBe(0);
expect(`${result.stdout}${result.stderr}`).not.toContain("inside-code-fence.md");
});
});
36 changes: 25 additions & 11 deletions test/e2e/e2e-cloud-experimental/check-docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ run_cli_check() {
log "[cli] excluded: openshell, /nemoclaw slash, deprecated nemoclaw setup (not in --help)"

log "[cli] phase 1/2: extract normalized usage lines from --help"
NO_COLOR=1 "$NODE" "$CLI_JS" --help 2>&1 | perl -CS -ne '
NO_COLOR=1 "$NODE" "$CLI_JS" --help 2>&1 | LC_ALL=C perl -CS -ne '
s/\e\[[0-9;]*m//g;
next unless /^\s*nemoclaw\s+/;
if (/^\s*nemoclaw\s+(.+)/) {
Expand All @@ -168,7 +168,7 @@ run_cli_check() {
# log text: backticks are documentation markers, not command substitution
log '[cli] phase 2/2: extract ### `nemoclaw …` headings from commands reference'
# Allow optional MyST suffix on the same line, e.g. ### `nemoclaw onboard` {#anchor}
grep -E '^### `nemoclaw ' "$COMMANDS_MD" | perl -CS -ne '
grep -E '^### `nemoclaw ' "$COMMANDS_MD" | LC_ALL=C perl -CS -ne '
if (/^### `([^`]+)`\s*(?:\{[^}]+\})?\s*$/) { print "$1\n"; }
' | LC_ALL=C sort -u >"$_tmp/doc.txt"

Expand Down Expand Up @@ -221,16 +221,30 @@ collect_default_docs() {
}

extract_targets() {
perl -CS -ne '
if (/^\s*```/) { $in = !$in; next; }
LC_ALL=C perl -CS -ne '
if (/^\s*(`{3,}|~{3,})(.*)$/) {
my $fence = $1;
my $rest = $2;
my $char = substr($fence, 0, 1);
my $length = length($fence);
if (!$in) {
($in, $fch, $flen) = (1, $char, $length);
next;
}
if ($char eq $fch && $length >= $flen && $rest =~ /^\s*$/) {
($in, $fch, $flen) = (0, "", 0);
next;
}
}
next if $in;
while (/\!?\[[^\]]*\]\(([^)\s]+)(?:\s+["'"'"'][^)"'"'"']*["'"'"'])?\)/g) { print "$1\n"; }
while (/<(https?:[^>\s]+)>/g) { print "$1\n"; }
my $line = $.;
while (/\!?\[[^\]]*\]\(([^)\s]+)(?:\s+["'"'"'][^)"'"'"']*["'"'"'])?\)/g) { print $line . "\t" . $1 . "\n"; }
while (/<(https?:[^>\s]+)>/g) { print $line . "\t" . $1 . "\n"; }
' -- "$1"
}

check_local_ref() {
local md_path="$1" target="$2"
local md_path="$1" line_no="$2" target="$3"
local stripped

stripped="${target%%\#*}"
Expand All @@ -251,7 +265,7 @@ check_local_ref() {
if (cd "$(dirname "$md_path")" && [[ -e "$stripped" ]]); then
return 0
fi
echo "check-docs: [links] broken local link in $md_path -> $target" >&2
echo "check-docs: [links] broken local link in $md_path:$line_no -> $target" >&2
return 1
}

Expand Down Expand Up @@ -367,11 +381,11 @@ run_links_check() {
failures=1
continue
fi
local target rc
while IFS= read -r target || [[ -n "$target" ]]; do
local line_no target rc
while IFS=$'\t' read -r line_no target || [[ -n "${target:-}" ]]; do
[[ -z "$target" ]] && continue
set +e
check_local_ref "$md" "$target"
check_local_ref "$md" "$line_no" "$target"
rc=$?
set -e
if [[ "$rc" -eq 0 ]]; then
Expand Down
Loading