Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions src/core/ai-index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down Expand Up @@ -137,6 +141,59 @@ describe('generateAIIndex', () => {
expect(entry?.keywords).not.toContain('ux');
});

it('should use configured max chunk length', () => {
const config: ResolvedAeoConfig = {
...baseConfig,
aiIndex: {
...baseConfig.aiIndex,
maxChunkLength: 20,
},
pages: [
{
pathname: '/chunked',
title: 'Chunked',
content: [
'First paragraph content.',
'Second paragraph content.',
'Third paragraph content.',
].join('\n\n'),
},
],
};

const result = generateAIIndex(config);
const index = JSON.parse(result);
const entries = index.entries
.filter((e: any) => e.url === 'https://example.com/chunked')
.sort((a: any, b: any) => a.metadata.chunkIndex - b.metadata.chunkIndex);

expect(entries).toHaveLength(3);
expect(entries.map((entry: any) => entry.metadata.chunkIndex)).toEqual([0, 1, 2]);
});

it('should use configured max keywords', () => {
const config: ResolvedAeoConfig = {
...baseConfig,
aiIndex: {
...baseConfig.aiIndex,
maxKeywords: 2,
},
pages: [
{
pathname: '/keywords',
title: 'Keywords',
content: 'alpha alpha alpha beta beta gamma delta epsilon',
},
],
};

const result = generateAIIndex(config);
const index = JSON.parse(result);
const entry = index.entries.find((e: any) => e.url === 'https://example.com/keywords');

expect(entry?.keywords).toEqual(['alpha', 'beta']);
});

it('should handle pages without content', () => {
const result = generateAIIndex(baseConfig);
const index = JSON.parse(result);
Expand Down
16 changes: 9 additions & 7 deletions src/core/ai-index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import { createHash } from 'crypto';
import type { ResolvedAeoConfig, AIIndexEntry } from '../types';
import { parseFrontmatter, extractTitle } from './utils';

function extractKeywords(content: string): string[] {
function extractKeywords(content: string, maxKeywords: number): string[] {
if (maxKeywords < 1) return [];

const words = content
.normalize('NFC')
.toLowerCase()
Expand All @@ -22,11 +24,11 @@ function extractKeywords(content: string): string[] {

return Object.entries(wordCount)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.slice(0, maxKeywords)
.map(([word]) => word);
}

function chunkContent(content: string, maxLength: number = 2000): string[] {
function chunkContent(content: string, maxLength: number): string[] {
const chunks: string[] = [];
const paragraphs = content.split('\n\n');

Expand Down Expand Up @@ -66,9 +68,9 @@ function collectAIIndexEntries(dir: string, config: ResolvedAeoConfig, base: str
const urlPath = relativePath.replace(/\.mdx?$/, '');
const url = `${config.url}/${urlPath}`;

const chunks = chunkContent(mainContent);
const chunks = chunkContent(mainContent, config.aiIndex.maxChunkLength);
const title = frontmatter.title || extractTitle(mainContent);
const keywords = extractKeywords(mainContent);
const keywords = extractKeywords(mainContent, config.aiIndex.maxKeywords);

chunks.forEach((chunk, index) => {
const id = createHash('sha256')
Expand Down Expand Up @@ -115,8 +117,8 @@ export function generateAIIndex(config: ResolvedAeoConfig): string {
const content = page.content || '';

if (content) {
const chunks = chunkContent(content);
const keywords = extractKeywords(content);
const chunks = chunkContent(content, config.aiIndex.maxChunkLength);
const keywords = extractKeywords(content, config.aiIndex.maxKeywords);

chunks.forEach((chunk, index) => {
const id = createHash('sha256')
Expand Down
1 change: 1 addition & 0 deletions src/core/audit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ function makeConfig(overrides: Partial<ResolvedAeoConfig> = {}): ResolvedAeoConf
outDir: './out',
contentDir: '',
generators: { robotsTxt: true, llmsTxt: true, llmsFullTxt: true, rawMarkdown: true, manifest: true, sitemap: true, aiIndex: true, schema: true },
aiIndex: { maxChunkLength: 2000, maxKeywords: 10 },
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '/sitemap.xml' },
schema: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/generate-wrapper.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/llms-full.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/llms-txt.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/manifest.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ const baseConfig: ResolvedAeoConfig = {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
4 changes: 4 additions & 0 deletions src/core/raw-markdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ const createConfig = (overrides = {}): ResolvedAeoConfig => ({
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down
1 change: 1 addition & 0 deletions src/core/report.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ function makeConfig(): ResolvedAeoConfig {
outDir: './out',
contentDir: '',
generators: { robotsTxt: true, llmsTxt: true, llmsFullTxt: true, rawMarkdown: true, manifest: true, sitemap: true, aiIndex: true, schema: true },
aiIndex: { maxChunkLength: 2000, maxKeywords: 10 },
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '/sitemap.xml' },
schema: { enabled: true, organization: { name: 'Test Co', url: 'https://test.com', logo: '', sameAs: [] }, defaultType: 'WebPage' },
og: { enabled: true, image: '', twitterHandle: '', type: 'website' },
Expand Down
6 changes: 5 additions & 1 deletion src/core/robots.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ describe('generateRobotsTxt', () => {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down Expand Up @@ -104,4 +108,4 @@ describe('generateRobotsTxt', () => {
expect(bingbotMatches.length).toBe(1)
expect(semrushMatches.length).toBe(1)
})
})
})
6 changes: 5 additions & 1 deletion src/core/sitemap.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ describe('generateSitemap', () => {
aiIndex: true,
schema: true,
},
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},
robots: { allow: ['/'], disallow: [], crawlDelay: 0, sitemap: '' },
widget: {
enabled: true,
Expand Down Expand Up @@ -281,4 +285,4 @@ describe('generateSitemap', () => {
expect(sitemap).toContain('<loc>https://example.com/docs/guide</loc>');
expect(sitemap).toContain('<loc>https://example.com/docs/api/reference</loc>');
});
});
});
15 changes: 15 additions & 0 deletions src/core/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ describe('utils', () => {
expect(result.url).toBe('https://example.com');
expect(result.generators.robotsTxt).toBe(true);
expect(result.generators.llmsTxt).toBe(true);
expect(result.aiIndex).toEqual({
maxChunkLength: 2000,
maxKeywords: 10,
});
expect(result.widget.enabled).toBe(true);
expect(result.widget.position).toBe('bottom-right');
});
Expand Down Expand Up @@ -54,6 +58,17 @@ describe('utils', () => {
expect(result.widget.theme.background).toBe('rgba(18, 18, 24, 0.9)');
});

it('should handle partial aiIndex config', () => {
const result = resolveConfig({
aiIndex: {
maxKeywords: 5,
},
});

expect(result.aiIndex.maxKeywords).toBe(5);
expect(result.aiIndex.maxChunkLength).toBe(2000);
});

it('should resolve robots config', () => {
const result = resolveConfig({
robots: { disallow: ['/admin'], crawlDelay: 5 },
Expand Down
6 changes: 5 additions & 1 deletion src/core/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ export function resolveConfig(config: AeoConfig = {}): ResolvedAeoConfig {
aiIndex: config.generators?.aiIndex !== false,
schema: config.generators?.schema !== false,
},
aiIndex: {
maxChunkLength: config.aiIndex?.maxChunkLength ?? 2000,
maxKeywords: config.aiIndex?.maxKeywords ?? 10,
},
robots: {
allow: config.robots?.allow || ['/'],
disallow: config.robots?.disallow || [],
Expand Down Expand Up @@ -199,4 +203,4 @@ export function getAllMarkdownFiles(

scanDirectory(projectRoot);
return files;
}
}
10 changes: 9 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ export interface AeoConfig {
aiIndex?: boolean;
schema?: boolean;
};
aiIndex?: {
maxChunkLength?: number;
maxKeywords?: number;
};
robots?: {
allow?: string[];
disallow?: string[];
Expand Down Expand Up @@ -77,6 +81,10 @@ export interface ResolvedAeoConfig {
aiIndex: boolean;
schema: boolean;
};
aiIndex: {
maxChunkLength: number;
maxKeywords: number;
};
robots: {
allow: string[];
disallow: string[];
Expand Down Expand Up @@ -172,4 +180,4 @@ export interface FrameworkInfo {
framework: FrameworkType;
contentDir: string;
outDir: string;
}
}
13 changes: 13 additions & 0 deletions website/src/content/docs/reference/configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ export default defineConfig({
schema: true,
},

// Configure ai-index.json generation
aiIndex: {
maxChunkLength: 2000,
maxKeywords: 10,
},

// Customize robots.txt
robots: {
allow: ['/'],
Expand Down Expand Up @@ -103,6 +109,13 @@ export default defineConfig({
| `aiIndex` | `boolean` | `true` | Generate `ai-index.json` |
| `schema` | `boolean` | `false` | Generate JSON-LD structured data |

### `aiIndex`

| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `maxChunkLength` | `number` | `2000` | Maximum content length per `ai-index.json` chunk |
| `maxKeywords` | `number` | `10` | Maximum keywords extracted for each `ai-index.json` entry |

### `robots`

| Option | Type | Default | Description |
Expand Down
Loading