diff --git a/CHANGELOG.md b/CHANGELOG.md index f88da46..62c0072 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## 4.0.0 (2026-06-09) + +### Breaking +- **Renamed the `ai-readable-content` factor to `ai-access-files` ("AI Access Files (llms.txt, sitemap)").** The factor that scores root-level AI access files (`/llms.txt`, `/llms-full.txt`, `/robots.txt`, `/sitemap.xml`, and per-page Markdown source endpoints) now uses the id `ai-access-files` and the display name **AI Access Files (llms.txt, sitemap)**. Breaking for anything keyed on the old identifier: + - `--factors ai-readable-content` is now **`--factors ai-access-files`**. + - All 20 finding codes are renamed from `ai-readable-content.*` to `ai-access-files.*` (e.g. `ai-readable-content.llms-txt.strong` → `ai-access-files.llms-txt.strong`). Full registry in [docs/finding-codes.md](docs/finding-codes.md). + - The analyzer export is renamed `analyzeAiReadableContent` → **`analyzeAiAccessFiles`** (subpath `@ainyc/aeo-audit/analyzers/ai-readable-content` → `…/ai-access-files`), and the `FACTOR_SPEC_RULES` key changes to match. + - Scores and the 5% weight are unchanged, and the report JSON **shape** is identical — but `schemaVersion` is bumped **`2.1` → `3.0`** to flag the breaking identifier rename, so agent parsers pinned to the old factor id or finding codes detect the drift via the major bump (per the documented "treat a major bump as breaking" contract). + ## 3.1.0 (2026-06-09) ### Added diff --git a/docs/api.md b/docs/api.md index 758559d..60e230b 100644 --- a/docs/api.md +++ b/docs/api.md @@ -32,7 +32,7 @@ const report = await runSitemapAudit('https://example.com', { factors: ['schema-validity', 'structured-data'], // Optional subset }) -console.log(report.schemaVersion) // '2.1', JSON shape version (see "Machine-readable output") +console.log(report.schemaVersion) // '3.0', JSON shape version (see "Machine-readable output") console.log(report.aggregateScore) // 84 console.log(report.pagesAudited) // 22 console.log(report.criticalDefects) // Binary per-page defects (multiple/missing H1, missing title/meta), grouped by defect diff --git a/docs/cli.md b/docs/cli.md index 135b255..f7e4316 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -187,7 +187,7 @@ The relaxation is **scoped to the single host you named on the CLI, and only tha ## Auxiliary file diagnostics -When fetching `/llms.txt`, `/llms-full.txt`, `/robots.txt`, and `/sitemap.xml` the audit runs a **content-negotiation probe** that surfaces as a finding on the **AI-Readable Content** factor: if a file returns OK to a bare request but a non-2xx response under `Accept: text/markdown`, the audit reports a content-negotiation trap. This catches Astro / Vercel / Starlight setups that redirect `.txt` → non-existent `.md` for markdown-accepting clients, which makes the file invisible to AI content-extraction tools, even though the file is "present" by every other measure. +When fetching `/llms.txt`, `/llms-full.txt`, `/robots.txt`, and `/sitemap.xml` the audit runs a **content-negotiation probe** that surfaces as a finding on the **AI Access Files (llms.txt, sitemap)** factor: if a file returns OK to a bare request but a non-2xx response under `Accept: text/markdown`, the audit reports a content-negotiation trap. This catches Astro / Vercel / Starlight setups that redirect `.txt` → non-existent `.md` for markdown-accepting clients, which makes the file invisible to AI content-extraction tools, even though the file is "present" by every other measure. ## Flag reference diff --git a/docs/finding-codes.md b/docs/finding-codes.md index 0eec9a9..8ff46e9 100644 --- a/docs/finding-codes.md +++ b/docs/finding-codes.md @@ -35,28 +35,28 @@ Every `AuditFinding` carries a stable `code` so integrations can key on a machin - `content-depth.lists.present` - `content-depth.lists.none` -### AI-Readable Content - -- `ai-readable-content.content-negotiation.found` -- `ai-readable-content.aux-resource.missing` -- `ai-readable-content.aux-resource.timeout` -- `ai-readable-content.aux-resource.unreachable` -- `ai-readable-content.aux-resource.not-html` -- `ai-readable-content.aux-resource.found` -- `ai-readable-content.llms-txt.strong` -- `ai-readable-content.llms-txt.short` -- `ai-readable-content.llms-full-txt.strong` -- `ai-readable-content.llms-full-txt.short` -- `ai-readable-content.robots-txt.found` -- `ai-readable-content.robots-txt.unreachable` -- `ai-readable-content.robots-txt.missing` -- `ai-readable-content.sitemap.found` -- `ai-readable-content.sitemap.unreachable` -- `ai-readable-content.sitemap.missing` -- `ai-readable-content.llms-txt-link.found` -- `ai-readable-content.llms-txt-link.missing` -- `ai-readable-content.markdown-endpoint.found` -- `ai-readable-content.markdown-endpoint.missing` +### AI Access Files (llms.txt, sitemap) + +- `ai-access-files.content-negotiation.found` +- `ai-access-files.aux-resource.missing` +- `ai-access-files.aux-resource.timeout` +- `ai-access-files.aux-resource.unreachable` +- `ai-access-files.aux-resource.not-html` +- `ai-access-files.aux-resource.found` +- `ai-access-files.llms-txt.strong` +- `ai-access-files.llms-txt.short` +- `ai-access-files.llms-full-txt.strong` +- `ai-access-files.llms-full-txt.short` +- `ai-access-files.robots-txt.found` +- `ai-access-files.robots-txt.unreachable` +- `ai-access-files.robots-txt.missing` +- `ai-access-files.sitemap.found` +- `ai-access-files.sitemap.unreachable` +- `ai-access-files.sitemap.missing` +- `ai-access-files.llms-txt-link.found` +- `ai-access-files.llms-txt-link.missing` +- `ai-access-files.markdown-endpoint.found` +- `ai-access-files.markdown-endpoint.missing` ### E-E-A-T Signals diff --git a/docs/scoring.md b/docs/scoring.md index 4206fd6..6aba8f7 100644 --- a/docs/scoring.md +++ b/docs/scoring.md @@ -15,7 +15,7 @@ AI answer engines are replacing traditional search for millions of queries. Gett |--------|--------|---------------| | Structured Data (JSON-LD) | 12% | Presence of LocalBusiness, FAQPage, Service, HowTo schemas | | Content Depth | 10% | Word count, heading hierarchy, paragraph structure, lists | -| AI-Readable Content | 5% | llms.txt, llms-full.txt, robots.txt, sitemap.xml availability, per-page Markdown source endpoints | +| AI Access Files (llms.txt, sitemap) | 5% | llms.txt, llms-full.txt, robots.txt, sitemap.xml availability, per-page Markdown source endpoints | | E-E-A-T Signals | 8% | Author meta, Person schema credentials, trust pages, reviews | | FAQ Content | 8% | FAQPage schema, details/summary blocks, question-style headings | | Citations & Authority | 8% | External links, authoritative domains, sameAs references | diff --git a/package.json b/package.json index 493a522..c39b67a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@ainyc/aeo-audit", - "version": "3.1.0", + "version": "4.0.0", "description": "The most comprehensive open-source Answer Engine Optimization (AEO) audit tool. Scores websites across 16 ranking factors that determine AI citation.", "type": "module", "main": "./dist/index.js", diff --git a/skills/aeo/SKILL.md b/skills/aeo/SKILL.md index 1696d19..270c6a5 100644 --- a/skills/aeo/SKILL.md +++ b/skills/aeo/SKILL.md @@ -132,7 +132,7 @@ Use `--format json` for the full report, or **`--format agent`** for just the de #### Auxiliary File Diagnostics -When the audit fetches `/llms.txt`, `/llms-full.txt`, `/robots.txt`, and `/sitemap.xml`, it probes once with `Accept: text/markdown` to detect a **content-negotiation** trap: file responds OK to a bare request but returns a non-2xx response when the client prefers markdown. This catches Astro / Vercel / Starlight setups that 307-redirect `.txt` → non-existent `.md` for markdown-accepting clients, making the file invisible to AI content-extraction tools even though the file exists. The diagnostic surfaces as a finding on the **AI-Readable Content** factor. +When the audit fetches `/llms.txt`, `/llms-full.txt`, `/robots.txt`, and `/sitemap.xml`, it probes once with `Accept: text/markdown` to detect a **content-negotiation** trap: file responds OK to a bare request but returns a non-2xx response when the client prefers markdown. This catches Astro / Vercel / Starlight setups that 307-redirect `.txt` → non-existent `.md` for markdown-accepting clients, making the file invisible to AI content-extraction tools even though the file exists. The diagnostic surfaces as a finding on the **AI Access Files (llms.txt, sitemap)** factor. ### Local Dev / Staging Targets @@ -280,7 +280,7 @@ Use when the user wants `llms.txt` or `llms-full.txt` created or improved. If a URL is provided: 1. Run: ```bash - npx @ainyc/aeo-audit@1 "" [flags] --format json --factors ai-readable-content + npx @ainyc/aeo-audit@1 "" [flags] --format json --factors ai-access-files ``` 2. Inspect existing AI-readable files if present. 3. Extract key content from the site. diff --git a/src/analyzers/ai-readable-content.ts b/src/analyzers/ai-access-files.ts similarity index 64% rename from src/analyzers/ai-readable-content.ts rename to src/analyzers/ai-access-files.ts index 0535530..64bb24c 100644 --- a/src/analyzers/ai-readable-content.ts +++ b/src/analyzers/ai-access-files.ts @@ -26,7 +26,7 @@ function pushDiagnosticFindings( if (diagnostics.contentNegotiation) { findings.push({ type: 'info', - code: 'ai-readable-content.content-negotiation.found', + code: 'ai-access-files.content-negotiation.found', message: `${label} returns a non-2xx response when fetched with \`Accept: text/markdown\` — content negotiation hides it from AI content extraction tools that prefer markdown.`, }) recommendations.push( @@ -43,31 +43,31 @@ function scoreAuxState( recommendations: string[], ): number { if (!auxEntry || auxEntry.state === 'missing') { - findings.push({ type: 'missing', code: 'ai-readable-content.aux-resource.missing', message: missingMessage }) + findings.push({ type: 'missing', code: 'ai-access-files.aux-resource.missing', message: missingMessage }) recommendations.push(`Create ${missingMessage.split(' ')[0]} at your site root.`) return 0 } if (auxEntry.state === 'timeout') { - findings.push({ type: 'timeout', code: 'ai-readable-content.aux-resource.timeout', message: unavailableMessage }) + findings.push({ type: 'timeout', code: 'ai-access-files.aux-resource.timeout', message: unavailableMessage }) return 8 } if (auxEntry.state === 'unreachable') { - findings.push({ type: 'unreachable', code: 'ai-readable-content.aux-resource.unreachable', message: unavailableMessage }) + findings.push({ type: 'unreachable', code: 'ai-access-files.aux-resource.unreachable', message: unavailableMessage }) return 8 } if (auxEntry.state === 'not-html') { - findings.push({ type: 'info', code: 'ai-readable-content.aux-resource.not-html', message: `${missingMessage.split(' ')[0]} returned an unexpected content type.` }) + findings.push({ type: 'info', code: 'ai-access-files.aux-resource.not-html', message: `${missingMessage.split(' ')[0]} returned an unexpected content type.` }) return 10 } - findings.push({ type: 'found', code: 'ai-readable-content.aux-resource.found', message: `${missingMessage.split(' ')[0]} is available.` }) + findings.push({ type: 'found', code: 'ai-access-files.aux-resource.found', message: `${missingMessage.split(' ')[0]} is available.` }) return 24 } -export function analyzeAiReadableContent(context: AuditContext): AnalysisResult { +export function analyzeAiAccessFiles(context: AuditContext): AnalysisResult { const findings: AnalysisResult['findings'] = [] const recommendations: string[] = [] const auxiliary = context.auxiliary || {} @@ -87,9 +87,9 @@ export function analyzeAiReadableContent(context: AuditContext): AnalysisResult const wordCount = countWords(auxiliary.llmsTxt.body || '') if (wordCount >= 100) { score += 8 - findings.push({ type: 'found', code: 'ai-readable-content.llms-txt.strong', message: '/llms.txt has useful content depth.' }) + findings.push({ type: 'found', code: 'ai-access-files.llms-txt.strong', message: '/llms.txt has useful content depth.' }) } else { - findings.push({ type: 'info', code: 'ai-readable-content.llms-txt.short', message: '/llms.txt is present but short.' }) + findings.push({ type: 'info', code: 'ai-access-files.llms-txt.short', message: '/llms.txt is present but short.' }) recommendations.push('Expand /llms.txt with concise service and entity context.') } } @@ -108,9 +108,9 @@ export function analyzeAiReadableContent(context: AuditContext): AnalysisResult const wordCount = countWords(auxiliary.llmsFullTxt.body || '') if (wordCount >= 200) { score += 10 - findings.push({ type: 'found', code: 'ai-readable-content.llms-full-txt.strong', message: '/llms-full.txt has strong long-form coverage.' }) + findings.push({ type: 'found', code: 'ai-access-files.llms-full-txt.strong', message: '/llms-full.txt has strong long-form coverage.' }) } else { - findings.push({ type: 'info', code: 'ai-readable-content.llms-full-txt.short', message: '/llms-full.txt exists but lacks detail.' }) + findings.push({ type: 'info', code: 'ai-access-files.llms-full-txt.short', message: '/llms-full.txt exists but lacks detail.' }) recommendations.push('Add complete offerings, FAQ, and service-area coverage to /llms-full.txt.') } } @@ -119,12 +119,12 @@ export function analyzeAiReadableContent(context: AuditContext): AnalysisResult const robotsState = auxiliary.robotsTxt?.state if (robotsState === 'ok') { score += 16 - findings.push({ type: 'found', code: 'ai-readable-content.robots-txt.found', message: 'robots.txt is accessible.' }) + findings.push({ type: 'found', code: 'ai-access-files.robots-txt.found', message: 'robots.txt is accessible.' }) } else if (robotsState === 'timeout' || robotsState === 'unreachable') { score += 6 - findings.push({ type: robotsState, code: 'ai-readable-content.robots-txt.unreachable', message: 'Could not reliably fetch /robots.txt.' }) + findings.push({ type: robotsState, code: 'ai-access-files.robots-txt.unreachable', message: 'Could not reliably fetch /robots.txt.' }) } else { - findings.push({ type: 'missing', code: 'ai-readable-content.robots-txt.missing', message: '/robots.txt is missing.' }) + findings.push({ type: 'missing', code: 'ai-access-files.robots-txt.missing', message: '/robots.txt is missing.' }) recommendations.push('Add a robots.txt file.') } pushDiagnosticFindings('/robots.txt', auxiliary.robotsTxt, findings, recommendations) @@ -133,12 +133,12 @@ export function analyzeAiReadableContent(context: AuditContext): AnalysisResult const sitemapState = auxiliary.sitemapXml?.state if (sitemapState === 'ok') { score += 16 - findings.push({ type: 'found', code: 'ai-readable-content.sitemap.found', message: 'sitemap.xml is accessible.' }) + findings.push({ type: 'found', code: 'ai-access-files.sitemap.found', message: 'sitemap.xml is accessible.' }) } else if (sitemapState === 'timeout' || sitemapState === 'unreachable') { score += 6 - findings.push({ type: sitemapState, code: 'ai-readable-content.sitemap.unreachable', message: 'Could not reliably fetch /sitemap.xml.' }) + findings.push({ type: sitemapState, code: 'ai-access-files.sitemap.unreachable', message: 'Could not reliably fetch /sitemap.xml.' }) } else { - findings.push({ type: 'missing', code: 'ai-readable-content.sitemap.missing', message: '/sitemap.xml is missing.' }) + findings.push({ type: 'missing', code: 'ai-access-files.sitemap.missing', message: '/sitemap.xml is missing.' }) recommendations.push('Add a sitemap.xml file.') } pushDiagnosticFindings('/sitemap.xml', auxiliary.sitemapXml, findings, recommendations) @@ -147,9 +147,9 @@ export function analyzeAiReadableContent(context: AuditContext): AnalysisResult const llmsLink = context.$('link[href*="llms.txt"]').length > 0 if (llmsLink) { score += 10 - findings.push({ type: 'found', code: 'ai-readable-content.llms-txt-link.found', message: 'HTML head links to llms.txt.' }) + findings.push({ type: 'found', code: 'ai-access-files.llms-txt-link.found', message: 'HTML head links to llms.txt.' }) } else { - findings.push({ type: 'info', code: 'ai-readable-content.llms-txt-link.missing', message: 'No llms.txt link detected in .' }) + findings.push({ type: 'info', code: 'ai-access-files.llms-txt-link.missing', message: 'No llms.txt link detected in .' }) recommendations.push('Add a reference to /llms.txt in your document head.') } @@ -161,9 +161,9 @@ export function analyzeAiReadableContent(context: AuditContext): AnalysisResult const markdownLinkHeader = /type="?text\/markdown"?/i.test(linkHeader) if (markdownLinkTag || markdownLinkHeader) { score += 10 - findings.push({ type: 'found', code: 'ai-readable-content.markdown-endpoint.found', message: 'Per-page Markdown source endpoint advertised (text/markdown alternate) — agents can fetch unrendered source.' }) + findings.push({ type: 'found', code: 'ai-access-files.markdown-endpoint.found', message: 'Per-page Markdown source endpoint advertised (text/markdown alternate) — agents can fetch unrendered source.' }) } else { - findings.push({ type: 'info', code: 'ai-readable-content.markdown-endpoint.missing', message: 'No per-page Markdown source endpoint advertised (text/markdown alternate link or Link header).' }) + findings.push({ type: 'info', code: 'ai-access-files.markdown-endpoint.missing', message: 'No per-page Markdown source endpoint advertised (text/markdown alternate link or Link header).' }) recommendations.push( `Expose a Markdown version of each page (a .md URL or content negotiation) and advertise it via . ${specCitation('markdown-source-endpoints')}`, ) diff --git a/src/index.ts b/src/index.ts index e35d5d3..049b7ff 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,7 +2,7 @@ import { load } from 'cheerio' import { fetchPage, normalizeTargetUrl } from './fetch-page.js' import { AeoAuditError } from './errors.js' import { analyzeStructuredData } from './analyzers/structured-data.js' -import { analyzeAiReadableContent } from './analyzers/ai-readable-content.js' +import { analyzeAiAccessFiles } from './analyzers/ai-access-files.js' import { analyzeEntityConsistency } from './analyzers/entity-consistency.js' import { analyzeContentDepth } from './analyzers/content-depth.js' import { analyzeDefinitionBlocks } from './analyzers/definition-blocks.js' @@ -64,7 +64,7 @@ export type { const ANALYZER_BY_ID: Record = { 'structured-data': analyzeStructuredData, - 'ai-readable-content': analyzeAiReadableContent, + 'ai-access-files': analyzeAiAccessFiles, 'entity-consistency': analyzeEntityConsistency, 'content-depth': analyzeContentDepth, 'definition-blocks': analyzeDefinitionBlocks, diff --git a/src/schema.ts b/src/schema.ts index 72d26db..b6a26b5 100644 --- a/src/schema.ts +++ b/src/schema.ts @@ -6,4 +6,4 @@ * Lives in its own module (not `index.ts`) so report builders can read it without * importing the audit entry points — which test suites routinely mock. */ -export const SCHEMA_VERSION = '2.1' +export const SCHEMA_VERSION = '3.0' diff --git a/src/scoring.ts b/src/scoring.ts index f2f41fa..42206a3 100644 --- a/src/scoring.ts +++ b/src/scoring.ts @@ -4,7 +4,7 @@ import type { FactorDefinition, RawFactorResult, ScoredFactorSummary } from './t export const FACTOR_DEFINITIONS: FactorDefinition[] = [ { id: 'structured-data', name: 'Structured Data (JSON-LD)', weight: 12 }, { id: 'content-depth', name: 'Content Depth', weight: 10 }, - { id: 'ai-readable-content', name: 'AI-Readable Content', weight: 5 }, + { id: 'ai-access-files', name: 'AI Access Files (llms.txt, sitemap)', weight: 5 }, { id: 'eeat-signals', name: 'E-E-A-T Signals', weight: 8 }, { id: 'faq-content', name: 'FAQ Content', weight: 8 }, { id: 'citations', name: 'Citations & Authority Signals', weight: 8 }, diff --git a/src/spec-references.ts b/src/spec-references.ts index 6995273..8f43583 100644 --- a/src/spec-references.ts +++ b/src/spec-references.ts @@ -57,7 +57,7 @@ export type SpecRuleId = keyof typeof SPEC_RULES */ export const FACTOR_SPEC_RULES: Record = { 'structured-data': ['structured-data-for-agents'], - 'ai-readable-content': ['llms-txt', 'llms-full-txt', 'markdown-source-endpoints', 'link-headers'], + 'ai-access-files': ['llms-txt', 'llms-full-txt', 'markdown-source-endpoints', 'link-headers'], 'ai-crawler-access': ['robots-for-ai-crawlers', 'content-signals'], 'agent-skill-exposure': ['mcp-and-tool-discovery', 'agent-skills-discovery', 'a2a-agent-cards', 'web-bot-auth'], } diff --git a/test/analyzers/ai-readable-content.test.ts b/test/analyzers/ai-access-files.test.ts similarity index 81% rename from test/analyzers/ai-readable-content.test.ts rename to test/analyzers/ai-access-files.test.ts index 0e31336..a731252 100644 --- a/test/analyzers/ai-readable-content.test.ts +++ b/test/analyzers/ai-access-files.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect } from 'vitest' import { load } from 'cheerio' -import { analyzeAiReadableContent } from '../../src/analyzers/ai-readable-content.js' +import { analyzeAiAccessFiles } from '../../src/analyzers/ai-access-files.js' import { getVisibleText, parseJsonLdScripts } from '../../src/analyzers/helpers.js' import type { AuditContext, AuxiliaryResource, AuxiliaryResources } from '../../src/types.js' @@ -38,7 +38,7 @@ function buildContext( // ─── Baseline ───────────────────────────────────────────────────────────────── describe('baseline with everything missing', () => { it('scores 0 when all auxiliary resources and the head link are absent', () => { - const result = analyzeAiReadableContent(buildContext()) + const result = analyzeAiAccessFiles(buildContext()) expect(result.score).toBe(0) for (const target of ['/llms.txt', '/llms-full.txt', '/robots.txt', '/sitemap.xml']) { expect(result.findings.some((f) => f.type === 'missing' && f.message.includes(target))).toBe(true) @@ -53,33 +53,33 @@ describe('/llms.txt state handling', () => { const shortBody: AuxiliaryResource = { state: 'ok', body: 'word '.repeat(10) } it('credits +24 + depth bonus for an ok llms.txt with >=100 words', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ llmsTxt: longBody }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ llmsTxt: longBody }))) expect(result.score).toBe(24 + 8) // base + depth; everything else is missing expect(result.findings.some((f) => f.type === 'found' && f.message.includes('/llms.txt is available'))).toBe(true) expect(result.findings.some((f) => f.type === 'found' && f.message.includes('useful content depth'))).toBe(true) }) it('credits +24 only (no depth) when llms.txt is present but short', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ llmsTxt: shortBody }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ llmsTxt: shortBody }))) expect(result.score).toBe(24) expect(result.findings.some((f) => f.type === 'info' && f.message.includes('present but short'))).toBe(true) expect(result.recommendations.some((r) => r.includes('Expand /llms.txt'))).toBe(true) }) it('credits +8 on timeout instead of failing hard', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ llmsTxt: { state: 'timeout', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ llmsTxt: { state: 'timeout', body: '' } }))) expect(result.score).toBe(8) expect(result.findings.some((f) => f.type === 'timeout' && f.message.includes('/llms.txt'))).toBe(true) }) it('credits +8 on unreachable', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ llmsTxt: { state: 'unreachable', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ llmsTxt: { state: 'unreachable', body: '' } }))) expect(result.score).toBe(8) expect(result.findings.some((f) => f.type === 'unreachable' && f.message.includes('/llms.txt'))).toBe(true) }) it('credits +10 on not-html', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ llmsTxt: { state: 'not-html', body: 'html' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ llmsTxt: { state: 'not-html', body: 'html' } }))) expect(result.score).toBe(10) expect(result.findings.some((f) => f.type === 'info' && f.message.includes('unexpected content type'))).toBe(true) }) @@ -91,20 +91,20 @@ describe('/llms-full.txt state handling', () => { const shortBody: AuxiliaryResource = { state: 'ok', body: 'word '.repeat(50) } it('credits +24 + depth bonus for an ok llms-full.txt with >=200 words', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ llmsFullTxt: longBody }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ llmsFullTxt: longBody }))) expect(result.score).toBe(24 + 10) expect(result.findings.some((f) => f.type === 'found' && f.message.includes('strong long-form coverage'))).toBe(true) }) it('credits +24 only (no depth) when llms-full.txt is present but short', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ llmsFullTxt: shortBody }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ llmsFullTxt: shortBody }))) expect(result.score).toBe(24) expect(result.findings.some((f) => f.type === 'info' && f.message.includes('exists but lacks detail'))).toBe(true) expect(result.recommendations.some((r) => r.includes('complete offerings'))).toBe(true) }) it('credits +8 on timeout', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ llmsFullTxt: { state: 'timeout', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ llmsFullTxt: { state: 'timeout', body: '' } }))) expect(result.score).toBe(8) expect(result.findings.some((f) => f.type === 'timeout' && f.message.includes('/llms-full.txt'))).toBe(true) }) @@ -113,25 +113,25 @@ describe('/llms-full.txt state handling', () => { // ─── /robots.txt state handling ────────────────────────────────────────────── describe('/robots.txt state handling', () => { it('credits +16 when robots.txt is ok', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ robotsTxt: { state: 'ok', body: 'User-agent: *' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ robotsTxt: { state: 'ok', body: 'User-agent: *' } }))) expect(result.score).toBe(16) expect(result.findings.some((f) => f.type === 'found' && f.message.includes('robots.txt is accessible'))).toBe(true) }) it('credits +6 on timeout', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ robotsTxt: { state: 'timeout', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ robotsTxt: { state: 'timeout', body: '' } }))) expect(result.score).toBe(6) expect(result.findings.some((f) => f.type === 'timeout' && f.message.includes('robots.txt'))).toBe(true) }) it('credits +6 on unreachable', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ robotsTxt: { state: 'unreachable', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ robotsTxt: { state: 'unreachable', body: '' } }))) expect(result.score).toBe(6) expect(result.findings.some((f) => f.type === 'unreachable')).toBe(true) }) it('flags missing robots.txt and recommends adding one', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ robotsTxt: { state: 'missing', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ robotsTxt: { state: 'missing', body: '' } }))) expect(result.findings.some((f) => f.type === 'missing' && f.message.includes('/robots.txt'))).toBe(true) expect(result.recommendations.some((r) => r.includes('robots.txt'))).toBe(true) }) @@ -140,19 +140,19 @@ describe('/robots.txt state handling', () => { // ─── /sitemap.xml state handling ───────────────────────────────────────────── describe('/sitemap.xml state handling', () => { it('credits +16 when sitemap is ok', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ sitemapXml: { state: 'ok', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ sitemapXml: { state: 'ok', body: '' } }))) expect(result.score).toBe(16) expect(result.findings.some((f) => f.type === 'found' && f.message.includes('sitemap.xml is accessible'))).toBe(true) }) it('credits +6 on timeout', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ sitemapXml: { state: 'timeout', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ sitemapXml: { state: 'timeout', body: '' } }))) expect(result.score).toBe(6) expect(result.findings.some((f) => f.type === 'timeout' && f.message.includes('sitemap.xml'))).toBe(true) }) it('flags missing sitemap and recommends adding one', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux({ sitemapXml: { state: 'missing', body: '' } }))) + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux({ sitemapXml: { state: 'missing', body: '' } }))) expect(result.findings.some((f) => f.type === 'missing' && f.message.includes('/sitemap.xml'))).toBe(true) expect(result.recommendations.some((r) => r.includes('sitemap.xml'))).toBe(true) }) @@ -162,7 +162,7 @@ describe('/sitemap.xml state handling', () => { describe('head link to llms.txt', () => { it('credits +10 when the links to llms.txt', () => { const html = 'T' - const result = analyzeAiReadableContent(buildContext(html)) + const result = analyzeAiAccessFiles(buildContext(html)) expect(result.score).toBe(10) // only the link; all aux is missing expect(result.findings.some((f) => f.type === 'found' && f.message.includes('HTML head links to llms.txt'))).toBe(true) }) @@ -171,13 +171,13 @@ describe('head link to llms.txt', () => { // The current matcher is substring-based on href*="llms.txt" so any link with llms.txt triggers. // This test pins that behavior; if we tighten it later we should update. const html = 'T' - const result = analyzeAiReadableContent(buildContext(html)) + const result = analyzeAiAccessFiles(buildContext(html)) expect(result.score).toBe(10) expect(result.findings.some((f) => f.message.includes('HTML head links to llms.txt'))).toBe(true) }) it('flags info + recommendation when no head link is present', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml)) + const result = analyzeAiAccessFiles(buildContext(bareHtml)) expect(result.findings.some((f) => f.type === 'info' && f.message.includes('No llms.txt link'))).toBe(true) expect(result.recommendations.some((r) => r.includes(''))).toBe(true) }) @@ -187,13 +187,13 @@ describe('head link to llms.txt', () => { describe('per-page Markdown source endpoint', () => { it('credits +10 and a found finding for a text/markdown alternate link', () => { const html = 'T' - const result = analyzeAiReadableContent(buildContext(html)) + const result = analyzeAiAccessFiles(buildContext(html)) expect(result.score).toBe(10) // only the markdown endpoint; all aux missing, no llms.txt link expect(result.findings.some((f) => f.type === 'found' && f.message.includes('Markdown source endpoint'))).toBe(true) }) it('credits +10 when the endpoint is advertised via a Link response header', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml, aux(), { + const result = analyzeAiAccessFiles(buildContext(bareHtml, aux(), { link: '; rel="alternate"; type="text/markdown"', })) expect(result.score).toBe(10) @@ -201,7 +201,7 @@ describe('per-page Markdown source endpoint', () => { }) it('flags info + a spec-cited recommendation when no markdown endpoint is advertised', () => { - const result = analyzeAiReadableContent(buildContext(bareHtml)) + const result = analyzeAiAccessFiles(buildContext(bareHtml)) expect(result.findings.some((f) => f.type === 'info' && f.message.includes('Markdown source endpoint'))).toBe(true) expect(result.recommendations.some((r) => r.includes('text/markdown') && r.includes('specification.website'), @@ -213,7 +213,7 @@ describe('per-page Markdown source endpoint', () => { describe('fully ai-readable page', () => { it('reaches near-full score with every signal satisfied', () => { const html = 'T' - const result = analyzeAiReadableContent(buildContext(html, { + const result = analyzeAiAccessFiles(buildContext(html, { llmsTxt: { state: 'ok', body: 'word '.repeat(120) }, llmsFullTxt: { state: 'ok', body: 'word '.repeat(220) }, robotsTxt: { state: 'ok', body: 'User-agent: *' }, diff --git a/test/analyzers/legacy.test.ts b/test/analyzers/legacy.test.ts index 786db7c..8681401 100644 --- a/test/analyzers/legacy.test.ts +++ b/test/analyzers/legacy.test.ts @@ -3,7 +3,7 @@ import { load } from 'cheerio' import { parseJsonLdScripts, getVisibleText, extractSchemaTypes } from '../../src/analyzers/helpers.js' import { analyzeStructuredData } from '../../src/analyzers/structured-data.js' -import { analyzeAiReadableContent } from '../../src/analyzers/ai-readable-content.js' +import { analyzeAiAccessFiles } from '../../src/analyzers/ai-access-files.js' import { analyzeContentDepth } from '../../src/analyzers/content-depth.js' import { analyzeContentFreshness } from '../../src/analyzers/content-freshness.js' import { scoreFactors } from '../../src/scoring.js' @@ -42,7 +42,7 @@ test('ai-readable analyzer handles timeout as uncertain instead of hard-missing' }, }) - const result = analyzeAiReadableContent(context) + const result = analyzeAiAccessFiles(context) expect(result.score).toBeGreaterThan(0) expect(result.findings.some((finding) => finding.type === 'timeout')).toBe(true) }) @@ -145,8 +145,8 @@ test('scoring engine clamps factor scores and computes a weighted overall', () = recommendations: [], }, { - id: 'ai-readable-content', - name: 'AI-Readable Content', + id: 'ai-access-files', + name: 'AI Access Files (llms.txt, sitemap)', weight: 12, score: 20, findings: [], diff --git a/test/e2e/cli.test.ts b/test/e2e/cli.test.ts index 7482b43..1506e6b 100644 --- a/test/e2e/cli.test.ts +++ b/test/e2e/cli.test.ts @@ -181,7 +181,7 @@ test('compiled CLI returns the expected JSON report for the fixture site', async assert.equal(report.overallScore, 76) assert.equal( report.summary, - 'Overall score 76/100. Strongest signals: AI-Readable Content, Schema Validity. Biggest opportunities: Schema Completeness, E-E-A-T Signals.', + 'Overall score 76/100. Strongest signals: AI Access Files (llms.txt, sitemap), Schema Validity. Biggest opportunities: Schema Completeness, E-E-A-T Signals.', ) assert.deepEqual(report.metadata, { fetchTimeMs: 0, @@ -203,7 +203,7 @@ test('compiled CLI returns the expected JSON report for the fixture site', async [ { id: 'structured-data', score: 78 }, { id: 'content-depth', score: 59 }, - { id: 'ai-readable-content', score: 100 }, + { id: 'ai-access-files', score: 100 }, { id: 'eeat-signals', score: 25 }, { id: 'faq-content', score: 82 }, { id: 'citations', score: 66 }, diff --git a/test/fetch-auxiliary.test.ts b/test/fetch-auxiliary.test.ts index d0c9aeb..e471e90 100644 --- a/test/fetch-auxiliary.test.ts +++ b/test/fetch-auxiliary.test.ts @@ -2,7 +2,7 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest' import { load } from 'cheerio' import { fetchPage } from '../src/fetch-page.js' -import { analyzeAiReadableContent } from '../src/analyzers/ai-readable-content.js' +import { analyzeAiAccessFiles } from '../src/analyzers/ai-access-files.js' import type { AuditContext } from '../src/types.js' // The fetch tests need to bypass the SSRF guard (which blocks loopback IPs). @@ -69,7 +69,7 @@ describe('fetchPage auxiliary diagnostics', () => { expect(page.auxiliary.llmsTxt?.state).toBe('ok') expect(page.auxiliary.llmsTxt?.diagnostics?.contentNegotiation).toBe(true) - const result = analyzeAiReadableContent(makeContext(page.auxiliary)) + const result = analyzeAiAccessFiles(makeContext(page.auxiliary)) expect(result.findings.some((f) => f.message.includes('content negotiation'))).toBe(true) expect(result.recommendations.some((r) => r.includes('Accept'))).toBe(true) }) @@ -137,7 +137,7 @@ describe('fetchPage auxiliary diagnostics', () => { expect(page.auxiliary.sitemapXml?.url).toBe(`${ORIGIN}/sitemap-index.xml`) expect(page.auxiliary.sitemapXml?.diagnostics?.contentNegotiation).toBe(true) - const result = analyzeAiReadableContent(makeContext(page.auxiliary)) + const result = analyzeAiAccessFiles(makeContext(page.auxiliary)) const negotiationFinding = result.findings.find((f) => f.message.includes('content negotiation')) expect(negotiationFinding?.message).toContain('/sitemap-index.xml') expect(negotiationFinding?.message).not.toContain('/sitemap.xml ') diff --git a/test/finding-codes.test.ts b/test/finding-codes.test.ts index 584dcf7..4b27818 100644 --- a/test/finding-codes.test.ts +++ b/test/finding-codes.test.ts @@ -5,7 +5,7 @@ import { readFileSync } from 'node:fs' // namespaced `.[.]` (issue: agent-native finding codes). const ANALYZERS: Record = { 'structured-data.ts': 'structured-data', - 'ai-readable-content.ts': 'ai-readable-content', + 'ai-access-files.ts': 'ai-access-files', 'entity-consistency.ts': 'entity-consistency', 'content-depth.ts': 'content-depth', 'definition-blocks.ts': 'definition-blocks', diff --git a/test/static-audit.test.ts b/test/static-audit.test.ts index dd1e5ae..4164ecd 100644 --- a/test/static-audit.test.ts +++ b/test/static-audit.test.ts @@ -134,7 +134,7 @@ describe('runStaticAudit critical defects (issue #42)', () => { expect(topFix.affectedPages).toContain('https://example.com/') // The report carries a schema version so agent parsers can detect shape drift. - expect(result.report.schemaVersion).toBe('2.1') + expect(result.report.schemaVersion).toBe('3.0') }) })