diff --git a/packages/lighthouse/NotFound.astro b/packages/lighthouse/NotFound.astro new file mode 100644 index 0000000..5b201f6 --- /dev/null +++ b/packages/lighthouse/NotFound.astro @@ -0,0 +1,233 @@ +--- +/** + * NotFound - Drop-in 404 page component with fuzzy matching + * + * @example + * ```astro + * --- + * import { NotFound } from '@sailkit/lighthouse/NotFound.astro'; + * import { getAllCollectionsSorted } from '../utils/collections'; + * + * const collections = await getAllCollectionsSorted(); + * const pages = collections.flatMap(c => + * c.entries.map(e => ({ + * url: `/${c.name}/${e.slug}/`, + * title: e.data.title, + * section: c.displayName + * })) + * ); + * --- + * + * ``` + */ + +export interface Props { + /** List of valid pages to match against */ + pages: Array<{ url: string; title: string; section?: string }>; + /** Score threshold for auto-redirect (default: 0.6) */ + autoRedirectThreshold?: number; + /** Maximum suggestions to show (default: 5) */ + maxSuggestions?: number; + /** Delay before auto-redirect in ms (default: 1500) */ + redirectDelay?: number; + /** Custom class for container */ + class?: string; +} + +const { + pages, + autoRedirectThreshold = 0.6, + maxSuggestions = 5, + redirectDelay = 1500, + class: className = '' +} = Astro.props; + +// Serialize config for client-side script +const clientConfig = { + pages, + autoRedirectThreshold, + maxSuggestions, + redirectDelay, +}; +--- + +
+
404
+

Page Not Found

+

+ The page you're looking for doesn't exist or may have been moved. +

+ +
+

+ + Found a match! Redirecting to ... +

+
+ +
+

Did you mean one of these?

+
    + +
+
+ +
+ Looking for similar pages... +
+ +
+ + Go Home + +
+
+ + diff --git a/packages/lighthouse/README.md b/packages/lighthouse/README.md index a4da5cf..c2ab2f3 100644 --- a/packages/lighthouse/README.md +++ b/packages/lighthouse/README.md @@ -55,7 +55,8 @@ const defaultMatcher = createCompositeMatcher([ ```astro // src/pages/404.astro --- -import { NotFound } from 'astro-lighthouse'; +import NotFound from '@sailkit/lighthouse/NotFound.astro'; +import Layout from '../layouts/Layout.astro'; const pages = posts.map(p => ({ url: `/posts/${p.slug}/`, @@ -63,11 +64,18 @@ const pages = posts.map(p => ({ section: 'Posts' })); --- - + + + + Go Home + Browse Posts + + + ``` ## Behavior diff --git a/packages/lighthouse/package.json b/packages/lighthouse/package.json new file mode 100644 index 0000000..c9e4274 --- /dev/null +++ b/packages/lighthouse/package.json @@ -0,0 +1,39 @@ +{ + "name": "@sailkit/lighthouse", + "version": "0.1.0", + "description": "404 recovery with fuzzy matching and auto-redirect", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "types": "./dist/index.d.ts" + }, + "./NotFound.astro": "./NotFound.astro", + "./styles.css": "./styles.css" + }, + "files": [ + "dist", + "NotFound.astro", + "styles.css" + ], + "scripts": { + "build": "tsc", + "test": "vitest run", + "test:watch": "vitest", + "typecheck": "tsc --noEmit" + }, + "devDependencies": { + "typescript": "^5.3.0", + "vitest": "^2.0.0" + }, + "keywords": [ + "404", + "fuzzy-matching", + "levenshtein", + "redirect", + "astro" + ], + "license": "MIT" +} diff --git a/packages/lighthouse/src/core.test.ts b/packages/lighthouse/src/core.test.ts new file mode 100644 index 0000000..180ebb8 --- /dev/null +++ b/packages/lighthouse/src/core.test.ts @@ -0,0 +1,79 @@ +import { describe, it, expect } from 'vitest'; +import { findMatches, shouldAutoRedirect } from './core.js'; +import type { Page, ScoredPage } from './types.js'; + +const pages: Page[] = [ + { url: '/concepts/hallucination/', title: 'Hallucination', section: 'Concepts' }, + { url: '/concepts/context-collapse/', title: 'Context Collapse', section: 'Concepts' }, + { url: '/patterns/context-management/', title: 'Context Management', section: 'Patterns' }, + { url: '/failure-modes/lost-in-middle/', title: 'Lost in the Middle', section: 'Failure Modes' }, + { url: '/', title: 'Home' }, +]; + +describe('findMatches', () => { + it('returns matches sorted by score', () => { + const matches = findMatches('/concepts/hallucination/', pages); + expect(matches.length).toBeGreaterThan(0); + expect(matches[0].url).toBe('/concepts/hallucination/'); + expect(matches[0].score).toBe(1); + }); + + it('finds content that moved sections', () => { + // Same slug, different section + const matches = findMatches('/old-section/hallucination/', pages); + expect(matches.length).toBeGreaterThan(0); + expect(matches[0].url).toBe('/concepts/hallucination/'); + }); + + it('filters by threshold', () => { + const matches = findMatches('/xyz123/', pages, { threshold: 0.5 }); + // Very different path should have low scores + expect(matches.length).toBe(0); + }); + + it('respects maxResults', () => { + const matches = findMatches('/context/', pages, { maxResults: 2 }); + expect(matches.length).toBeLessThanOrEqual(2); + }); + + it('uses custom matcher', () => { + const customMatcher = { score: () => 0.99 }; + const matches = findMatches('/anything/', pages, { matcher: customMatcher }); + expect(matches.every((m) => m.score === 0.99)).toBe(true); + }); +}); + +describe('shouldAutoRedirect', () => { + it('returns false for no matches', () => { + expect(shouldAutoRedirect([])).toBe(false); + }); + + it('returns true for single match', () => { + const matches: ScoredPage[] = [{ url: '/test/', title: 'Test', score: 0.3 }]; + expect(shouldAutoRedirect(matches)).toBe(true); + }); + + it('returns true for high score clear winner', () => { + const matches: ScoredPage[] = [ + { url: '/test1/', title: 'Test 1', score: 0.9 }, + { url: '/test2/', title: 'Test 2', score: 0.3 }, + ]; + expect(shouldAutoRedirect(matches, 0.6)).toBe(true); + }); + + it('returns false when no clear winner', () => { + const matches: ScoredPage[] = [ + { url: '/test1/', title: 'Test 1', score: 0.7 }, + { url: '/test2/', title: 'Test 2', score: 0.65 }, + ]; + expect(shouldAutoRedirect(matches, 0.6)).toBe(false); + }); + + it('returns false when score below threshold', () => { + const matches: ScoredPage[] = [ + { url: '/test1/', title: 'Test 1', score: 0.5 }, + { url: '/test2/', title: 'Test 2', score: 0.2 }, + ]; + expect(shouldAutoRedirect(matches, 0.6)).toBe(false); + }); +}); diff --git a/packages/lighthouse/src/core.ts b/packages/lighthouse/src/core.ts new file mode 100644 index 0000000..a6650b4 --- /dev/null +++ b/packages/lighthouse/src/core.ts @@ -0,0 +1,50 @@ +import type { Page, ScoredPage, FindMatchesConfig } from './types.js'; +import { defaultMatcher } from './matchers.js'; + +/** + * Find pages that best match the requested path. + * Returns matches sorted by score (highest first), filtered by threshold. + */ +export function findMatches( + requestedPath: string, + pages: Page[], + config: FindMatchesConfig = {} +): ScoredPage[] { + const { matcher = defaultMatcher, threshold = 0.15, maxResults = 5 } = config; + + // Score all pages + const scored: ScoredPage[] = pages.map((page) => ({ + ...page, + score: matcher.score(requestedPath, page), + })); + + // Sort by score descending + scored.sort((a, b) => b.score - a.score); + + // Filter by threshold and limit results + return scored.filter((p) => p.score > threshold).slice(0, maxResults); +} + +/** + * Determine if we should auto-redirect based on matches. + * Auto-redirect if: + * 1. Single match (only one result above threshold) + * 2. OR strong match with clear winner (high score AND significantly better than alternatives) + */ +export function shouldAutoRedirect( + matches: ScoredPage[], + autoRedirectThreshold: number = 0.6 +): boolean { + if (matches.length === 0) return false; + + // Single match - always redirect + if (matches.length === 1) return true; + + const bestMatch = matches[0]; + + // Strong match with clear winner + const strongMatch = bestMatch.score > autoRedirectThreshold; + const clearWinner = matches.length > 1 && bestMatch.score > matches[1].score + 0.2; + + return strongMatch && clearWinner; +} diff --git a/packages/lighthouse/src/index.ts b/packages/lighthouse/src/index.ts new file mode 100644 index 0000000..7df89bb --- /dev/null +++ b/packages/lighthouse/src/index.ts @@ -0,0 +1,22 @@ +// Types +export type { + Page, + ScoredPage, + Matcher, + FindMatchesConfig, + CompositeMatcherConfig, + NotFoundConfig, +} from './types.js'; + +// Core functions +export { findMatches, shouldAutoRedirect } from './core.js'; + +// Matchers +export { + levenshteinDistance, + levenshteinMatcher, + exactSlugMatcher, + tokenOverlapMatcher, + createCompositeMatcher, + defaultMatcher, +} from './matchers.js'; diff --git a/packages/lighthouse/src/matchers.test.ts b/packages/lighthouse/src/matchers.test.ts new file mode 100644 index 0000000..9a4fe3c --- /dev/null +++ b/packages/lighthouse/src/matchers.test.ts @@ -0,0 +1,129 @@ +import { describe, it, expect } from 'vitest'; +import { + levenshteinDistance, + levenshteinMatcher, + exactSlugMatcher, + tokenOverlapMatcher, + createCompositeMatcher, + defaultMatcher, +} from './matchers.js'; +import type { Page } from './types.js'; + +describe('levenshteinDistance', () => { + it('returns 0 for identical strings', () => { + expect(levenshteinDistance('hello', 'hello')).toBe(0); + }); + + it('returns correct distance for single edit', () => { + expect(levenshteinDistance('hello', 'hallo')).toBe(1); + }); + + it('returns length for completely different strings', () => { + expect(levenshteinDistance('abc', 'xyz')).toBe(3); + }); + + it('handles empty strings', () => { + expect(levenshteinDistance('', 'hello')).toBe(5); + expect(levenshteinDistance('hello', '')).toBe(5); + expect(levenshteinDistance('', '')).toBe(0); + }); +}); + +describe('levenshteinMatcher', () => { + const page: Page = { url: '/concepts/context-collapse/', title: 'Context Collapse' }; + + it('scores identical paths as 1', () => { + expect(levenshteinMatcher.score('/concepts/context-collapse/', page)).toBe(1); + }); + + it('scores similar paths highly', () => { + const score = levenshteinMatcher.score('/concepts/context-collaps/', page); + expect(score).toBeGreaterThan(0.9); + }); + + it('scores very different paths low', () => { + const score = levenshteinMatcher.score('/foo/bar/', page); + expect(score).toBeLessThan(0.5); + }); +}); + +describe('exactSlugMatcher', () => { + const page: Page = { url: '/concepts/hallucination/', title: 'Hallucination' }; + + it('scores exact slug match as 1', () => { + const score = exactSlugMatcher.score('/old-section/hallucination/', page); + expect(score).toBe(1); + }); + + it('scores partial slug match as 0.5', () => { + const score = exactSlugMatcher.score('/concepts/hallucinations/', page); + expect(score).toBe(0.5); + }); + + it('scores no slug match as 0', () => { + const score = exactSlugMatcher.score('/concepts/context/', page); + expect(score).toBe(0); + }); +}); + +describe('tokenOverlapMatcher', () => { + const page: Page = { url: '/concepts/context-collapse/', title: 'Context Collapse' }; + + it('scores high when tokens overlap', () => { + const score = tokenOverlapMatcher.score('/context-collapse/', page); + expect(score).toBeGreaterThan(0.5); + }); + + it('scores partial token matches', () => { + const score = tokenOverlapMatcher.score('/collapse/', page); + expect(score).toBeGreaterThan(0); + }); + + it('scores zero for no overlap', () => { + const score = tokenOverlapMatcher.score('/foo/bar/', page); + expect(score).toBe(0); + }); +}); + +describe('createCompositeMatcher', () => { + it('combines matchers with weights', () => { + const page: Page = { url: '/concepts/test/', title: 'Test' }; + + const composite = createCompositeMatcher([ + { matcher: { score: () => 1 }, weight: 0.5 }, + { matcher: { score: () => 0 }, weight: 0.5 }, + ]); + + expect(composite.score('/test/', page)).toBe(0.5); + }); + + it('normalizes weights', () => { + const page: Page = { url: '/test/', title: 'Test' }; + + const composite = createCompositeMatcher([ + { matcher: { score: () => 1 }, weight: 2 }, + { matcher: { score: () => 0 }, weight: 2 }, + ]); + + expect(composite.score('/test/', page)).toBe(0.5); + }); +}); + +describe('defaultMatcher', () => { + const pages: Page[] = [ + { url: '/concepts/hallucination/', title: 'Hallucination' }, + { url: '/patterns/context-management/', title: 'Context Management' }, + ]; + + it('prioritizes exact slug matches', () => { + // Slug matches exactly but section different + const score = defaultMatcher.score('/old/hallucination/', pages[0]); + expect(score).toBeGreaterThan(0.5); + }); + + it('scores similar URLs higher than different ones', () => { + const similar = defaultMatcher.score('/concepts/hallucinations/', pages[0]); + const different = defaultMatcher.score('/foo/bar/', pages[0]); + expect(similar).toBeGreaterThan(different); + }); +}); diff --git a/packages/lighthouse/src/matchers.ts b/packages/lighthouse/src/matchers.ts new file mode 100644 index 0000000..fa103f8 --- /dev/null +++ b/packages/lighthouse/src/matchers.ts @@ -0,0 +1,135 @@ +import type { Matcher, Page, CompositeMatcherConfig } from './types.js'; + +/** + * Calculate Levenshtein distance between two strings. + * Returns the number of single-character edits (insertions, deletions, substitutions) needed. + */ +export function levenshteinDistance(a: string, b: string): number { + const matrix: number[][] = Array(b.length + 1) + .fill(null) + .map(() => Array(a.length + 1).fill(null)); + + for (let i = 0; i <= a.length; i++) matrix[0][i] = i; + for (let j = 0; j <= b.length; j++) matrix[j][0] = j; + + for (let j = 1; j <= b.length; j++) { + for (let i = 1; i <= a.length; i++) { + const indicator = a[i - 1] === b[j - 1] ? 0 : 1; + matrix[j][i] = Math.min( + matrix[j][i - 1] + 1, // deletion + matrix[j - 1][i] + 1, // insertion + matrix[j - 1][i - 1] + indicator // substitution + ); + } + } + + return matrix[b.length][a.length]; +} + +/** + * Levenshtein-based similarity matcher. + * Scores based on edit distance between requested path and page URL. + */ +export const levenshteinMatcher: Matcher = { + score(requestedPath: string, page: Page): number { + const requested = requestedPath.toLowerCase(); + const pageUrl = page.url.toLowerCase(); + const maxLen = Math.max(requested.length, pageUrl.length); + if (maxLen === 0) return 1; + const distance = levenshteinDistance(requested, pageUrl); + return 1 - distance / maxLen; + }, +}; + +/** + * Exact slug matcher. + * Gives high score when the last path segment (slug) matches exactly. + * Useful for detecting content that moved to a different section. + */ +export const exactSlugMatcher: Matcher = { + score(requestedPath: string, page: Page): number { + const requestedParts = requestedPath.toLowerCase().split('/').filter(Boolean); + const pageParts = page.url.toLowerCase().split('/').filter(Boolean); + + const requestedSlug = requestedParts[requestedParts.length - 1]; + const pageSlug = pageParts[pageParts.length - 1]; + + if (requestedSlug && pageSlug && requestedSlug === pageSlug) { + return 1; + } + + // Partial match - slug contains or is contained by + if (requestedSlug && pageSlug) { + if (pageSlug.includes(requestedSlug) || requestedSlug.includes(pageSlug)) { + return 0.5; + } + } + + return 0; + }, +}; + +/** + * Token overlap matcher. + * Scores based on how many words/tokens from the URL appear in the page URL or title. + */ +export const tokenOverlapMatcher: Matcher = { + score(requestedPath: string, page: Page): number { + const requested = requestedPath.toLowerCase(); + const pageUrl = page.url.toLowerCase(); + const pageTitle = page.title.toLowerCase(); + + // Tokenize - split on non-alphanumeric characters + const requestedTokens = requested.split(/[^a-z0-9]+/).filter(Boolean); + const urlTokens = pageUrl.split(/[^a-z0-9]+/).filter(Boolean); + const titleTokens = pageTitle.split(/\s+/).filter(Boolean); + const allPageTokens = [...urlTokens, ...titleTokens]; + + if (requestedTokens.length === 0) return 0; + + let matches = 0; + for (const reqToken of requestedTokens) { + for (const pageToken of allPageTokens) { + if (pageToken.includes(reqToken) || reqToken.includes(pageToken)) { + matches++; + break; + } + } + } + + return matches / requestedTokens.length; + }, +}; + +/** + * Create a composite matcher that combines multiple matchers with weights. + */ +export function createCompositeMatcher(strategies: CompositeMatcherConfig[]): Matcher { + // Normalize weights to sum to 1 + const totalWeight = strategies.reduce((sum, s) => sum + s.weight, 0); + const normalized = strategies.map((s) => ({ + ...s, + weight: s.weight / totalWeight, + })); + + return { + score(requestedPath: string, page: Page): number { + let totalScore = 0; + for (const { matcher, weight } of normalized) { + totalScore += matcher.score(requestedPath, page) * weight; + } + return Math.min(totalScore, 1); + }, + }; +} + +/** + * Default matcher optimized for 404 recovery. + * Prioritizes exact slug matches (content moved), then Levenshtein similarity, + * then token overlap. + */ +export const defaultMatcher = createCompositeMatcher([ + { matcher: exactSlugMatcher, weight: 0.6 }, + { matcher: levenshteinMatcher, weight: 0.2 }, + { matcher: tokenOverlapMatcher, weight: 0.2 }, +]); diff --git a/packages/lighthouse/src/types.ts b/packages/lighthouse/src/types.ts new file mode 100644 index 0000000..9be061f --- /dev/null +++ b/packages/lighthouse/src/types.ts @@ -0,0 +1,65 @@ +/** + * A page that can be matched against for 404 recovery. + */ +export interface Page { + /** Full URL path to this page */ + url: string; + /** Display title of the page */ + title: string; + /** Optional section/category name */ + section?: string; +} + +/** + * A page with its match score. + */ +export interface ScoredPage extends Page { + /** Match score from 0-1, higher is better */ + score: number; +} + +/** + * A matcher that scores how well a page matches a requested path. + */ +export interface Matcher { + /** Calculate score from 0-1, higher is better */ + score(requestedPath: string, page: Page): number; +} + +/** + * Configuration for findMatches function. + */ +export interface FindMatchesConfig { + /** Custom matcher to use (default: defaultMatcher) */ + matcher?: Matcher; + /** Minimum score threshold (default: 0.15) */ + threshold?: number; + /** Maximum number of matches to return (default: 5) */ + maxResults?: number; +} + +/** + * Configuration for creating a composite matcher. + */ +export interface CompositeMatcherConfig { + /** The matcher to use */ + matcher: Matcher; + /** Weight from 0-1 for this matcher's contribution to final score */ + weight: number; +} + +/** + * Configuration for the NotFound component. + */ +export interface NotFoundConfig { + /** List of valid pages to match against */ + pages: Page[]; + /** Score threshold for auto-redirect (default: 0.6) */ + autoRedirectThreshold?: number; + /** Maximum suggestions to show (default: 5) */ + maxSuggestions?: number; + /** Custom matcher (default: defaultMatcher) */ + matcher?: Matcher; + /** Delay before auto-redirect in ms (default: 1500) */ + redirectDelay?: number; +} diff --git a/packages/lighthouse/styles.css b/packages/lighthouse/styles.css new file mode 100644 index 0000000..11024ed --- /dev/null +++ b/packages/lighthouse/styles.css @@ -0,0 +1,147 @@ +/* Default styles for lighthouse 404 page */ +/* Can be overridden by importing site's design system */ + +.lighthouse-container { + text-align: center; + padding: 2rem 0; + max-width: 600px; + margin: 0 auto; +} + +.lighthouse-code { + font-size: 4rem; + font-weight: bold; + color: var(--lighthouse-accent, #6366f1); + margin-bottom: 0.5rem; +} + +.lighthouse-title { + font-size: 1.5rem; + color: var(--lighthouse-text, #1f2937); + margin-bottom: 1rem; +} + +.lighthouse-description { + color: var(--lighthouse-text-muted, #6b7280); + margin-bottom: 2rem; + line-height: 1.6; +} + +/* Redirect notice */ +.lighthouse-redirect { + background-color: var(--lighthouse-surface, #f3f4f6); + border: 1px solid var(--lighthouse-accent, #6366f1); + border-radius: 0.5rem; + padding: 1rem; + margin: 1.5rem auto; + max-width: 400px; +} + +.lighthouse-redirect p { + margin: 0; + color: var(--lighthouse-text, #1f2937); +} + +.lighthouse-redirect a { + color: var(--lighthouse-accent, #6366f1); +} + +/* Suggestions */ +.lighthouse-suggestions { + margin: 2rem auto; + max-width: 400px; + text-align: left; +} + +.lighthouse-suggestions-title { + font-size: 1rem; + color: var(--lighthouse-text, #1f2937); + margin-bottom: 1rem; + text-align: center; +} + +.lighthouse-suggestion-list { + list-style: none; + padding: 0; + margin: 0; +} + +.lighthouse-suggestion-item { + margin-bottom: 0.75rem; +} + +.lighthouse-suggestion-link { + display: block; + padding: 0.75rem 1rem; + background-color: var(--lighthouse-surface, #f3f4f6); + border: 1px solid var(--lighthouse-border, #e5e7eb); + border-radius: 0.5rem; + text-decoration: none; + transition: all 0.2s ease; +} + +.lighthouse-suggestion-link:hover { + border-color: var(--lighthouse-accent, #6366f1); + background-color: var(--lighthouse-accent-dim, #eef2ff); +} + +.lighthouse-suggestion-title { + color: var(--lighthouse-accent, #6366f1); + font-weight: 500; + display: block; +} + +.lighthouse-suggestion-section { + font-size: 0.75rem; + color: var(--lighthouse-text-muted, #6b7280); + margin-top: 0.25rem; + display: block; +} + +.lighthouse-suggestion-url { + font-size: 0.75rem; + color: var(--lighthouse-text-muted, #6b7280); + font-family: monospace; + opacity: 0.7; + display: block; + margin-top: 0.25rem; +} + +/* Spinner */ +.lighthouse-spinner { + display: inline-block; + width: 1rem; + height: 1rem; + border: 2px solid var(--lighthouse-border, #e5e7eb); + border-top-color: var(--lighthouse-accent, #6366f1); + border-radius: 50%; + animation: lighthouse-spin 0.8s linear infinite; + margin-right: 0.5rem; + vertical-align: middle; +} + +@keyframes lighthouse-spin { + to { + transform: rotate(360deg); + } +} + +/* Actions */ +.lighthouse-actions { + margin-top: 2rem; + display: flex; + gap: 1rem; + justify-content: center; + flex-wrap: wrap; +} + +/* Utility classes */ +.lighthouse-hidden { + display: none; +} + +.lighthouse-searching { + color: var(--lighthouse-text-muted, #6b7280); + text-align: center; + padding: 1rem; +} diff --git a/packages/lighthouse/tsconfig.json b/packages/lighthouse/tsconfig.json new file mode 100644 index 0000000..1a26890 --- /dev/null +++ b/packages/lighthouse/tsconfig.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "declaration": true, + "declarationMap": true, + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} diff --git a/packages/lighthouse/vitest.config.ts b/packages/lighthouse/vitest.config.ts new file mode 100644 index 0000000..6ec74ee --- /dev/null +++ b/packages/lighthouse/vitest.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + include: ['src/**/*.test.ts'], + }, +});