Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@ainyc/aeo-audit",
"version": "1.4.0",
"version": "1.5.0",
"description": "The most comprehensive open-source Answer Engine Optimization (AEO) audit tool. Scores websites across 13 ranking factors that determine AI citation.",
"type": "module",
"main": "./dist/index.js",
Expand Down
4 changes: 3 additions & 1 deletion skills/aeo/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,11 @@ npx @ainyc/aeo-audit@1 "<url>" --sitemap --top-issues --format json

Flags:
- `--sitemap [url]` — auto-discover `/sitemap.xml` or provide an explicit URL
- `--limit <n>` — cap pages audited (sorted by sitemap priority)
- `--limit <n>` — cap pages audited (default 200, sorted by sitemap priority)
- `--top-issues` — skip per-page output, show only cross-cutting patterns

Pages are audited with bounded concurrency (5 in flight) to avoid hammering the target origin.

Returns:
- Per-page scores and grades
- Cross-cutting issues (factors failing across multiple pages)
Expand Down
9 changes: 8 additions & 1 deletion src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Options:
--include-geo Include optional geographic signals factor
--include-agent-skills Include optional agent skill exposure factor (Schema.org Action, MCP, form affordances)
--sitemap [url] Audit all pages from sitemap (auto-discovers /sitemap.xml or use explicit URL)
--limit <n> Max pages to audit in sitemap mode (sorted by sitemap priority)
--limit <n> Max pages to audit in sitemap mode (default 200, sorted by sitemap priority)
--top-issues In sitemap mode, skip per-page output and show only cross-cutting issues
-h, --help Show this help message

Expand Down Expand Up @@ -142,6 +142,13 @@ export async function main(argv: string[] = process.argv): Promise<number> {
sitemapUrl: args.sitemapUrl ?? undefined,
limit: args.limit ?? undefined,
topIssuesOnly: args.topIssues,
onPlan: (plan) => {
if (plan.truncated > 0) {
console.error(
`Notice: sitemap has ${plan.discovered} URLs; auditing top ${plan.willAudit} by priority (--limit ${plan.effectiveLimit}). ${plan.truncated} pages skipped. Pass --limit ${Math.max(plan.discovered, 9999)} to audit all.`,
)
}
},
}

const report = await runSitemapAudit(args.url, options)
Expand Down
6 changes: 5 additions & 1 deletion src/formatters/markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ export function formatSitemapMarkdown(report: SitemapAuditReport, topIssuesOnly
lines.push(``)
lines.push(`**Sitemap:** ${report.sitemapUrl}`)
lines.push(`**Aggregate Grade:** ${report.aggregateGrade} (${report.aggregateScore}/100)`)
lines.push(`**Pages:** ${report.pagesAudited} audited, ${report.pagesSkipped} skipped, ${report.pagesDiscovered} discovered`)
lines.push(`**Pages:** ${report.pagesAudited} audited of ${report.pagesDiscovered} discovered (${report.pagesFiltered} filtered as non-HTML, ${report.pagesTruncated} truncated by --limit ${report.effectiveLimit})`)
if (report.pagesTruncated > 0) {
lines.push(``)
lines.push(`> **Note:** ${report.pagesTruncated} additional pages were skipped because of the page limit. Pass \`--limit ${Math.max(report.pagesDiscovered, 9999)}\` to audit them all.`)
}
lines.push(`**Audited:** ${report.auditedAt}`)
lines.push(``)

Expand Down
5 changes: 4 additions & 1 deletion src/formatters/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ export function formatSitemapText(report: SitemapAuditReport, topIssuesOnly = fa
lines.push(`${DIM}${report.sitemapUrl}${RESET}`)
lines.push(``)
lines.push(` ${BOLD}Aggregate Grade:${RESET} ${gc}${BOLD}${report.aggregateGrade}${RESET} ${bar(report.aggregateScore, 30)} ${report.aggregateScore}/100`)
lines.push(` ${DIM}${report.pagesAudited} pages audited, ${report.pagesSkipped} skipped, ${report.pagesDiscovered} discovered${RESET}`)
lines.push(` ${DIM}${report.pagesAudited} pages audited of ${report.pagesDiscovered} discovered (${report.pagesFiltered} filtered, ${report.pagesTruncated} truncated by --limit ${report.effectiveLimit})${RESET}`)
if (report.pagesTruncated > 0) {
lines.push(` ${DIM}Note: ${report.pagesTruncated} additional pages skipped by --limit. Pass --limit ${Math.max(report.pagesDiscovered, 9999)} to audit them all.${RESET}`)
}
lines.push(``)

if (!topIssuesOnly) {
Expand Down
124 changes: 85 additions & 39 deletions src/sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import type {
const USER_AGENT = 'AINYC-AEO-Audit/1.0'
const SITEMAP_TIMEOUT_MS = 10_000
const SITEMAP_MAX_BYTES = 5 * 1024 * 1024
const DEFAULT_LIMIT = 200
const DEFAULT_CONCURRENCY = 5

const SKIP_EXTENSIONS = new Set(['.pdf', '.txt', '.xml', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.mp4', '.mp3', '.zip', '.gz', '.css', '.js'])

Expand Down Expand Up @@ -131,6 +133,26 @@ async function resolveSitemapUrls(sitemapUrl: string): Promise<SitemapEntry[]> {
return entries
}

async function mapWithConcurrency<T, R>(
items: T[],
concurrency: number,
worker: (item: T, index: number) => Promise<R>,
): Promise<R[]> {
const results = new Array<R>(items.length)
let nextIndex = 0
const workerCount = Math.max(1, Math.min(concurrency, items.length))
await Promise.all(
Array.from({ length: workerCount }, async () => {
while (true) {
const i = nextIndex++
if (i >= items.length) return
results[i] = await worker(items[i], i)
}
}),
)
return results
}

function buildCrossCuttingIssues(successPages: AuditReport[]): CrossCuttingIssue[] {
if (successPages.length === 0) return []

Expand Down Expand Up @@ -209,57 +231,78 @@ export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptio
const sitemapUrl = options.sitemapUrl || `${origin}/sitemap.xml`

// Fetch and parse sitemap
let entries = await resolveSitemapUrls(sitemapUrl)
const allEntries = await resolveSitemapUrls(sitemapUrl)
const discovered = allEntries.length

// Filter to HTML content pages
const allCount = entries.length
entries = entries.filter((e) => !shouldSkipUrl(e.loc))
const eligible = allEntries.filter((e) => !shouldSkipUrl(e.loc))
const filtered = discovered - eligible.length

// Sort by priority (highest first) if priorities exist
entries.sort((a, b) => (b.priority ?? 0.5) - (a.priority ?? 0.5))
eligible.sort((a, b) => (b.priority ?? 0.5) - (a.priority ?? 0.5))

// Apply limit
if (options.limit && options.limit > 0) {
entries = entries.slice(0, options.limit)
}
// Apply limit (default 200 when not specified — large sitemaps are common and
// a full sweep is rarely what the user wants).
const effectiveLimit = options.limit && options.limit > 0 ? options.limit : DEFAULT_LIMIT
const entries = eligible.slice(0, effectiveLimit)
const truncated = eligible.length - entries.length

if (entries.length === 0) {
throw new AeoAuditError('BAD_INPUT', 'No auditable URLs found in sitemap.')
}

const skipped = allCount - entries.length
options.onPlan?.({
discovered,
filtered,
truncated,
willAudit: entries.length,
effectiveLimit,
})

const auditOptions: RunAeoAuditOptions = {
factors: options.factors,
includeGeo: options.includeGeo,
}

// Audit each page (sequentially to avoid hammering the target)
const pageResults: SitemapPageResult[] = []
const successReports: AuditReport[] = []

for (const entry of entries) {
try {
const report = await runAeoAudit(entry.loc, auditOptions)
successReports.push(report)
pageResults.push({
url: report.finalUrl,
overallScore: report.overallScore,
overallGrade: report.overallGrade,
status: 'success',
factors: report.factors,
metadata: report.metadata,
})
} catch (error) {
const message = error instanceof Error ? error.message : String(error)
pageResults.push({
url: entry.loc,
overallScore: 0,
overallGrade: 'F',
status: 'error',
error: message,
})
}
}
// Audit pages with bounded concurrency: 5 workers is a polite ceiling for one
// origin while giving a meaningful speedup over fully sequential.
const settled = await mapWithConcurrency(
entries,
DEFAULT_CONCURRENCY,
async (entry): Promise<{ pageResult: SitemapPageResult; report: AuditReport | null }> => {
try {
const report = await runAeoAudit(entry.loc, auditOptions)
return {
pageResult: {
url: report.finalUrl,
overallScore: report.overallScore,
overallGrade: report.overallGrade,
status: 'success',
factors: report.factors,
metadata: report.metadata,
},
report,
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error)
return {
pageResult: {
url: entry.loc,
overallScore: 0,
overallGrade: 'F',
status: 'error',
error: message,
},
report: null,
}
}
},
)

const pageResults: SitemapPageResult[] = settled.map((s) => s.pageResult)
const successReports: AuditReport[] = settled
.map((s) => s.report)
.filter((r): r is AuditReport => r !== null)

// Calculate aggregate score from successful audits
const successScores = pageResults.filter((p) => p.status === 'success').map((p) => p.overallScore)
Expand All @@ -273,9 +316,12 @@ export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptio
return {
sitemapUrl,
auditedAt: new Date().toISOString(),
pagesDiscovered: allCount,
pagesDiscovered: discovered,
pagesAudited: entries.length,
pagesSkipped: skipped,
pagesSkipped: filtered + truncated,
pagesFiltered: filtered,
pagesTruncated: truncated,
effectiveLimit,
aggregateScore,
aggregateGrade: scoreToGrade(aggregateScore),
pages: pageResults,
Expand All @@ -284,4 +330,4 @@ export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptio
}
}

export { parseSitemapXml, shouldSkipUrl, buildCrossCuttingIssues }
export { parseSitemapXml, shouldSkipUrl, buildCrossCuttingIssues, mapWithConcurrency }
12 changes: 12 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,27 @@ export interface SitemapAuditReport {
pagesDiscovered: number
pagesAudited: number
pagesSkipped: number
pagesFiltered: number
pagesTruncated: number
effectiveLimit: number
aggregateScore: number
aggregateGrade: string
pages: SitemapPageResult[]
crossCuttingIssues: CrossCuttingIssue[]
prioritizedFixes: string[]
}

export interface SitemapAuditPlan {
discovered: number
filtered: number
truncated: number
willAudit: number
effectiveLimit: number
}

export interface SitemapAuditOptions extends RunAeoAuditOptions {
sitemapUrl?: string
limit?: number
topIssuesOnly?: boolean
onPlan?: (plan: SitemapAuditPlan) => void
}
40 changes: 39 additions & 1 deletion test/sitemap.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { test, expect } from 'vitest'

import { parseSitemapXml, shouldSkipUrl } from '../src/sitemap.js'
import { mapWithConcurrency, parseSitemapXml, shouldSkipUrl } from '../src/sitemap.js'

test('parseSitemapXml extracts loc and priority from url blocks', () => {
const xml = `<?xml version="1.0" encoding="UTF-8"?>
Expand Down Expand Up @@ -61,3 +61,41 @@ test('shouldSkipUrl allows HTML content pages', () => {
expect(shouldSkipUrl('https://example.com/page.html')).toBe(false)
expect(shouldSkipUrl('https://example.com/page.htm')).toBe(false)
})

test('mapWithConcurrency preserves input order and caps in-flight workers', async () => {
const items = Array.from({ length: 20 }, (_, i) => i)
let inFlight = 0
let peakInFlight = 0

const results = await mapWithConcurrency(items, 5, async (item) => {
inFlight += 1
peakInFlight = Math.max(peakInFlight, inFlight)
// Yield to the event loop a few times so workers actually overlap.
await new Promise((resolve) => setTimeout(resolve, 1))
inFlight -= 1
return item * 2
})

expect(results).toEqual(items.map((i) => i * 2))
expect(peakInFlight).toBeLessThanOrEqual(5)
expect(peakInFlight).toBeGreaterThan(1)
})

test('mapWithConcurrency handles empty input', async () => {
const results = await mapWithConcurrency<number, number>([], 5, async (n) => n)
expect(results).toEqual([])
})

test('mapWithConcurrency caps workers to item count when items < concurrency', async () => {
let peakInFlight = 0
let inFlight = 0
const results = await mapWithConcurrency([1, 2], 10, async (n) => {
inFlight += 1
peakInFlight = Math.max(peakInFlight, inFlight)
await new Promise((resolve) => setTimeout(resolve, 1))
inFlight -= 1
return n
})
expect(results).toEqual([1, 2])
expect(peakInFlight).toBeLessThanOrEqual(2)
})
Loading