joshribakoff · joshribakoff · Dec 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
 node_modules/
 dist/
 .astro/
+.DS_Store
+
+# Generated audio files (large, regenerate locally)
+public/audio/
diff --git a/package.json b/package.json
@@ -9,6 +9,8 @@
     "test": "vitest run",
     "test:watch": "vitest",
     "lint:assets": "npx tsx scripts/lint-assets.ts",
+    "audio:generate": "npx tsx scripts/generate-audio.ts",
+    "audio:clean": "rm -rf public/audio",
     "prepare": "husky"
   },
   "lint-staged": {

diff --git a/scripts/audio/content-hash.ts b/scripts/audio/content-hash.ts
@@ -0,0 +1,37 @@
+/**
+ * Content hashing for smart audio regeneration.
+ * Hash is designed to ignore formatting changes but catch text changes.
+ */
+
+import { createHash } from 'crypto';
+
+/**
+ * Compute a content hash that ignores formatting differences.
+ *
+ * Changes that DON'T trigger regeneration:
+ * - Heading level changes (h1 -> h2)
+ * - Bold/italic changes
+ * - Whitespace differences
+ * - Adding images (without alt text)
+ *
+ * Changes that DO trigger regeneration:
+ * - Any text content changes
+ * - Alt text changes
+ * - Link text changes
+ */
+export function computeContentHash(speakableText: string): string {
+  // Normalize for hashing:
+  // - lowercase (case changes don't affect speech much)
+  // - collapse all whitespace to single space
+  // - trim
+  const normalized = speakableText
+    .toLowerCase()
+    .replace(/\s+/g, ' ')
+    .trim();
+
+  // Use SHA-256, truncated to 16 chars for readability
+  return createHash('sha256')
+    .update(normalized)
+    .digest('hex')
+    .substring(0, 16);
+}
diff --git a/scripts/audio/extract-text.ts b/scripts/audio/extract-text.ts
@@ -0,0 +1,82 @@
+/**
+ * Extract speakable text from markdown content.
+ * Strips formatting while preserving text that should be read aloud.
+ */
+
+export interface ExtractionResult {
+  title: string;
+  speakableText: string;
+}
+
+/**
+ * Extract speakable text from markdown/MDX content.
+ *
+ * Includes: paragraphs, headings, lists, bold/italic text, image alt text, link text
+ * Excludes: frontmatter, code blocks, HTML tags, MDX imports, URLs
+ */
+export function extractSpeakableText(markdown: string): ExtractionResult {
+  let text = markdown;
+
+  // 1. Extract and remove frontmatter, capturing title
+  let title = '';
+  const frontmatterMatch = text.match(/^---\n([\s\S]*?)\n---/);
+  if (frontmatterMatch) {
+    const frontmatter = frontmatterMatch[1];
+    const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
+    if (titleMatch) {
+      title = titleMatch[1].trim();
+    }
+    text = text.replace(/^---\n[\s\S]*?\n---\n?/, '');
+  }
+
+  // 2. Remove MDX import statements
+  text = text.replace(/^import\s+.*$/gm, '');
+
+  // 3. Remove fenced code blocks (```...```)
+  text = text.replace(/```[\s\S]*?```/g, '');
+
+  // 4. Remove inline code (`...`) - just remove the backticks, keep the text
+  // Actually, let's remove inline code entirely as it's usually technical
+  text = text.replace(/`[^`]+`/g, '');
+
+  // 5. Extract image alt text: ![alt](url) -> alt
+  text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');
+
+  // 6. Extract link text: [text](url) -> text
+  text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
+
+  // 7. Remove MDX/JSX component tags but try to keep text content
+  // Remove self-closing tags: <Component ... />
+  text = text.replace(/<[A-Z][a-zA-Z]*\s[^>]*\/>/g, '');
+  // Remove opening/closing tags: <Component>...</Component>
+  text = text.replace(/<\/?[A-Z][a-zA-Z]*[^>]*>/g, '');
+
+  // 8. Remove HTML comments
+  text = text.replace(/<!--[\s\S]*?-->/g, '');
+
+  // 9. Remove remaining HTML tags
+  text = text.replace(/<[^>]+>/g, '');
+
+  // 10. Remove markdown heading markers (##, ###, etc.) but keep text
+  text = text.replace(/^#{1,6}\s+/gm, '');
+
+  // 11. Remove bold/italic markers but keep text
+  text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // **bold**
+  text = text.replace(/\*([^*]+)\*/g, '$1');     // *italic*
+  text = text.replace(/__([^_]+)__/g, '$1');     // __bold__
+  text = text.replace(/_([^_]+)_/g, '$1');       // _italic_
+
+  // 12. Remove list markers
+  text = text.replace(/^[\s]*[-*+]\s+/gm, '');   // Unordered lists
+  text = text.replace(/^[\s]*\d+\.\s+/gm, '');   // Ordered lists
+
+  // 13. Remove blockquote markers
+  text = text.replace(/^>\s*/gm, '');
+
+  // 14. Normalize whitespace
+  text = text.replace(/\n{3,}/g, '\n\n');  // Max 2 newlines
+  text = text.replace(/[ \t]+/g, ' ');      // Collapse spaces
+  text = text.trim();
+
+  return { title, speakableText: text };
+}
diff --git a/scripts/audio/manifest.ts b/scripts/audio/manifest.ts
@@ -0,0 +1,116 @@
+/**
+ * Manifest for tracking generated audio files and their content hashes.
+ * Enables incremental regeneration - only regenerate when content changes.
+ */
+
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
+import { dirname } from 'path';
+import { getVoiceName } from './openai-wrapper.js';
+
+export interface ManifestEntry {
+  hash: string;
+  duration: number;  // seconds
+  size: number;      // bytes
+  generatedAt: string;
+  filename: string;  // e.g., "01-context.mp3"
+}
+
+export interface Manifest {
+  version: number;
+  voice: string;
+  entries: Record<string, ManifestEntry>;
+}
+
+const MANIFEST_PATH = 'public/audio/manifest.json';
+const CURRENT_VERSION = 1;
+
+/**
+ * Load manifest from disk, or create empty one if it doesn't exist.
+ */
+export function loadManifest(): Manifest {
+  const currentVoice = getVoiceName();
+
+  if (!existsSync(MANIFEST_PATH)) {
+    return {
+      version: CURRENT_VERSION,
+      voice: currentVoice,
+      entries: {},
+    };
+  }
+
+  try {
+    const content = readFileSync(MANIFEST_PATH, 'utf-8');
+    const manifest = JSON.parse(content) as Manifest;
+
+    // Handle version migrations if needed in the future
+    if (manifest.version !== CURRENT_VERSION) {
+      console.log(`Manifest version mismatch (${manifest.version} -> ${CURRENT_VERSION}), regenerating all`);
+      return {
+        version: CURRENT_VERSION,
+        voice: currentVoice,
+        entries: {},
+      };
+    }
+
+    // If voice changed, regenerate all
+    if (manifest.voice !== currentVoice) {
+      console.log(`Voice changed (${manifest.voice} -> ${currentVoice}), regenerating all`);
+      return {
+        version: CURRENT_VERSION,
+        voice: currentVoice,
+        entries: {},
+      };
+    }
+
+    return manifest;
+  } catch (error) {
+    console.warn('Failed to parse manifest, starting fresh:', error);
+    return {
+      version: CURRENT_VERSION,
+      voice: currentVoice,
+      entries: {},
+    };
+  }
+}
+
+/**
+ * Save manifest to disk.
+ */
+export function saveManifest(manifest: Manifest): void {
+  // Ensure directory exists
+  const dir = dirname(MANIFEST_PATH);
+  if (!existsSync(dir)) {
+    mkdirSync(dir, { recursive: true });
+  }
+
+  writeFileSync(MANIFEST_PATH, JSON.stringify(manifest, null, 2));
+}
+
+/**
+ * Check if content needs regeneration based on hash.
+ */
+export function needsRegeneration(manifest: Manifest, key: string, hash: string): boolean {
+  const entry = manifest.entries[key];
+  if (!entry) return true;
+  return entry.hash !== hash;
+}
+
+/**
+ * Update manifest entry after generation.
+ */
+export function updateEntry(
+  manifest: Manifest,
+  key: string,
+  hash: string,
+  duration: number,
+  size: number,
+  filename: string
+): void {
+  manifest.entries[key] = {
+    hash,
+    duration,
+    size,
+    generatedAt: new Date().toISOString(),
+    filename,
+  };
+}
diff --git a/scripts/audio/openai-wrapper.ts b/scripts/audio/openai-wrapper.ts
@@ -0,0 +1,124 @@
+/**
+ * Wrapper for calling OpenAI TTS API from Node.js.
+ * Uses the tts-1 model with the 'alloy' voice.
+ *
+ * Requires OPENAI_API_KEY environment variable.
+ */
+
+import { mkdirSync, statSync, existsSync, writeFileSync } from 'fs';
+import { dirname, resolve } from 'path';
+
+// OpenAI TTS configuration
+const OPENAI_API_URL = 'https://api.openai.com/v1/audio/speech';
+const MODEL = 'tts-1';
+const VOICE = 'alloy';
+const RESPONSE_FORMAT = 'mp3';
+
+export interface AudioResult {
+  duration: number;  // seconds (estimated)
+  size: number;      // bytes
+}
+
+/**
+ * Get the OpenAI API key from environment variable.
+ * Throws if not set.
+ */
+function getApiKey(): string {
+  const apiKey = process.env.OPENAI_API_KEY;
+  if (!apiKey) {
+    throw new Error(
+      'OPENAI_API_KEY environment variable is not set.\n' +
+      'Set it with: export OPENAI_API_KEY=your-api-key'
+    );
+  }
+  return apiKey;
+}
+
+/**
+ * Generate audio file from text using OpenAI TTS API.
+ *
+ * @param text - The text to convert to speech
+ * @param outputPath - Where to save the audio file
+ * @returns Audio metadata (duration estimate, size)
+ */
+export async function generateAudio(text: string, outputPath: string): Promise<AudioResult> {
+  const apiKey = getApiKey();
+
+  // Ensure output directory exists
+  const outDir = dirname(outputPath);
+  if (!existsSync(outDir)) {
+    mkdirSync(outDir, { recursive: true });
+  }
+
+  // Resolve to absolute path for output
+  const absoluteOutputPath = resolve(process.cwd(), outputPath);
+
+  // OpenAI TTS has a limit of 4096 characters per request
+  // For longer texts, we'd need to chunk - but for now, truncate with warning
+  const MAX_CHARS = 4096;
+  let inputText = text;
+  if (text.length > MAX_CHARS) {
+    console.warn(`  Warning: Text truncated from ${text.length} to ${MAX_CHARS} chars`);
+    inputText = text.substring(0, MAX_CHARS);
+  }
+
+  // Call OpenAI TTS API
+  const response = await fetch(OPENAI_API_URL, {
+    method: 'POST',
+    headers: {
+      'Authorization': `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: MODEL,
+      voice: VOICE,
+      input: inputText,
+      response_format: RESPONSE_FORMAT,
+    }),
+  });
+
+  if (!response.ok) {
+    const errorBody = await response.text();
+    throw new Error(`OpenAI API error (${response.status}): ${errorBody}`);
+  }
+
+  // Get the audio data as a buffer
+  const arrayBuffer = await response.arrayBuffer();
+  const buffer = Buffer.from(arrayBuffer);
+
+  // Write to file
+  writeFileSync(absoluteOutputPath, buffer);
+
+  // Get file stats
+  const stats = statSync(absoluteOutputPath);
+
+  // Estimate duration from MP3 file size
+  // Typical MP3 at 128kbps: duration = size / (128000 / 8) = size / 16000
+  // OpenAI uses variable bitrate, but this gives a reasonable estimate
+  const estimatedDuration = stats.size / 16000;
+
+  return {
+    duration: Math.round(estimatedDuration * 100) / 100,
+    size: stats.size,
+  };
+}
+
+/**
+ * Check if OpenAI API is available (API key is set).
+ */
+export function checkOpenAIAvailable(): boolean {
+  const apiKey = process.env.OPENAI_API_KEY;
+  if (!apiKey) {
+    console.error('OPENAI_API_KEY environment variable is not set.');
+    console.error('Set it with: export OPENAI_API_KEY=your-api-key');
+    return false;
+  }
+  return true;
+}
+
+/**
+ * Get the current voice being used (for manifest).
+ */
+export function getVoiceName(): string {
+  return `openai-${MODEL}-${VOICE}`;
+}