joshribakoff · joshribakoff · Dec 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,10 @@
 node_modules/
 dist/
 .astro/
+.DS_Store
+
+# Generated audio files (large, regenerate locally)
+public/audio/
+
+# Piper TTS proof of concept (large model files)
+piper-poc/
diff --git a/package.json b/package.json
@@ -9,6 +9,8 @@
     "test": "vitest run",
     "test:watch": "vitest",
     "lint:assets": "npx tsx scripts/lint-assets.ts",
+    "audio:generate": "npx tsx scripts/generate-audio.ts",
+    "audio:clean": "rm -rf public/audio",
     "prepare": "husky"
   },
   "lint-staged": {

diff --git a/scripts/audio/content-hash.ts b/scripts/audio/content-hash.ts
@@ -0,0 +1,37 @@
+/**
+ * Content hashing for smart audio regeneration.
+ * Hash is designed to ignore formatting changes but catch text changes.
+ */
+
+import { createHash } from 'crypto';
+
+/**
+ * Compute a content hash that ignores formatting differences.
+ *
+ * Changes that DON'T trigger regeneration:
+ * - Heading level changes (h1 -> h2)
+ * - Bold/italic changes
+ * - Whitespace differences
+ * - Adding images (without alt text)
+ *
+ * Changes that DO trigger regeneration:
+ * - Any text content changes
+ * - Alt text changes
+ * - Link text changes
+ */
+export function computeContentHash(speakableText: string): string {
+  // Normalize for hashing:
+  // - lowercase (case changes don't affect speech much)
+  // - collapse all whitespace to single space
+  // - trim
+  const normalized = speakableText
+    .toLowerCase()
+    .replace(/\s+/g, ' ')
+    .trim();
+
+  // Use SHA-256, truncated to 16 chars for readability
+  return createHash('sha256')
+    .update(normalized)
+    .digest('hex')
+    .substring(0, 16);
+}
diff --git a/scripts/audio/extract-text.ts b/scripts/audio/extract-text.ts
@@ -0,0 +1,82 @@
+/**
+ * Extract speakable text from markdown content.
+ * Strips formatting while preserving text that should be read aloud.
+ */
+
+export interface ExtractionResult {
+  title: string;
+  speakableText: string;
+}
+
+/**
+ * Extract speakable text from markdown/MDX content.
+ *
+ * Includes: paragraphs, headings, lists, bold/italic text, image alt text, link text
+ * Excludes: frontmatter, code blocks, HTML tags, MDX imports, URLs
+ */
+export function extractSpeakableText(markdown: string): ExtractionResult {
+  let text = markdown;
+
+  // 1. Extract and remove frontmatter, capturing title
+  let title = '';
+  const frontmatterMatch = text.match(/^---\n([\s\S]*?)\n---/);
+  if (frontmatterMatch) {
+    const frontmatter = frontmatterMatch[1];
+    const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
+    if (titleMatch) {
+      title = titleMatch[1].trim();
+    }
+    text = text.replace(/^---\n[\s\S]*?\n---\n?/, '');
+  }
+
+  // 2. Remove MDX import statements
+  text = text.replace(/^import\s+.*$/gm, '');
+
+  // 3. Remove fenced code blocks (```...```)
+  text = text.replace(/```[\s\S]*?```/g, '');
+
+  // 4. Remove inline code (`...`) - just remove the backticks, keep the text
+  // Actually, let's remove inline code entirely as it's usually technical
+  text = text.replace(/`[^`]+`/g, '');
+
+  // 5. Extract image alt text: ![alt](url) -> alt
+  text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');
+
+  // 6. Extract link text: [text](url) -> text
+  text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
+
+  // 7. Remove MDX/JSX component tags but try to keep text content
+  // Remove self-closing tags: <Component ... />
+  text = text.replace(/<[A-Z][a-zA-Z]*\s[^>]*\/>/g, '');
+  // Remove opening/closing tags: <Component>...</Component>
+  text = text.replace(/<\/?[A-Z][a-zA-Z]*[^>]*>/g, '');
+
+  // 8. Remove HTML comments
+  text = text.replace(/<!--[\s\S]*?-->/g, '');
+
+  // 9. Remove remaining HTML tags
+  text = text.replace(/<[^>]+>/g, '');
+
+  // 10. Remove markdown heading markers (##, ###, etc.) but keep text
+  text = text.replace(/^#{1,6}\s+/gm, '');
+
+  // 11. Remove bold/italic markers but keep text
+  text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // **bold**
+  text = text.replace(/\*([^*]+)\*/g, '$1');     // *italic*
+  text = text.replace(/__([^_]+)__/g, '$1');     // __bold__
+  text = text.replace(/_([^_]+)_/g, '$1');       // _italic_
+
+  // 12. Remove list markers
+  text = text.replace(/^[\s]*[-*+]\s+/gm, '');   // Unordered lists
+  text = text.replace(/^[\s]*\d+\.\s+/gm, '');   // Ordered lists
+
+  // 13. Remove blockquote markers
+  text = text.replace(/^>\s*/gm, '');
+
+  // 14. Normalize whitespace
+  text = text.replace(/\n{3,}/g, '\n\n');  // Max 2 newlines
+  text = text.replace(/[ \t]+/g, ' ');      // Collapse spaces
+  text = text.trim();
+
+  return { title, speakableText: text };
+}
diff --git a/scripts/audio/manifest.ts b/scripts/audio/manifest.ts
@@ -0,0 +1,104 @@
+/**
+ * Manifest for tracking generated audio files and their content hashes.
+ * Enables incremental regeneration - only regenerate when content changes.
+ */
+
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
+import { dirname } from 'path';
+
+export interface ManifestEntry {
+  hash: string;
+  duration: number;  // seconds
+  size: number;      // bytes
+  generatedAt: string;
+  filename: string;  // e.g., "01-context.wav"
+}
+
+export interface Manifest {
+  version: number;
+  voice: string;
+  entries: Record<string, ManifestEntry>;
+}
+
+const MANIFEST_PATH = 'public/audio/manifest.json';
+const CURRENT_VERSION = 1;
+const DEFAULT_VOICE = 'en_US-lessac-medium';
+
+/**
+ * Load manifest from disk, or create empty one if it doesn't exist.
+ */
+export function loadManifest(): Manifest {
+  if (!existsSync(MANIFEST_PATH)) {
+    return {
+      version: CURRENT_VERSION,
+      voice: DEFAULT_VOICE,
+      entries: {},
+    };
+  }
+
+  try {
+    const content = readFileSync(MANIFEST_PATH, 'utf-8');
+    const manifest = JSON.parse(content) as Manifest;
+
+    // Handle version migrations if needed in the future
+    if (manifest.version !== CURRENT_VERSION) {
+      console.log(`Manifest version mismatch (${manifest.version} -> ${CURRENT_VERSION}), regenerating all`);
+      return {
+        version: CURRENT_VERSION,
+        voice: DEFAULT_VOICE,
+        entries: {},
+      };
+    }
+
+    return manifest;
+  } catch (error) {
+    console.warn('Failed to parse manifest, starting fresh:', error);
+    return {
+      version: CURRENT_VERSION,
+      voice: DEFAULT_VOICE,
+      entries: {},
+    };
+  }
+}
+
+/**
+ * Save manifest to disk.
+ */
+export function saveManifest(manifest: Manifest): void {
+  // Ensure directory exists
+  const dir = dirname(MANIFEST_PATH);
+  if (!existsSync(dir)) {
+    mkdirSync(dir, { recursive: true });
+  }
+
+  writeFileSync(MANIFEST_PATH, JSON.stringify(manifest, null, 2));
+}
+
+/**
+ * Check if content needs regeneration based on hash.
+ */
+export function needsRegeneration(manifest: Manifest, key: string, hash: string): boolean {
+  const entry = manifest.entries[key];
+  if (!entry) return true;
+  return entry.hash !== hash;
+}
+
+/**
+ * Update manifest entry after generation.
+ */
+export function updateEntry(
+  manifest: Manifest,
+  key: string,
+  hash: string,
+  duration: number,
+  size: number,
+  filename: string
+): void {
+  manifest.entries[key] = {
+    hash,
+    duration,
+    size,
+    generatedAt: new Date().toISOString(),
+    filename,
+  };
+}
diff --git a/scripts/audio/piper-wrapper.ts b/scripts/audio/piper-wrapper.ts
@@ -0,0 +1,91 @@
+/**
+ * Wrapper for calling Piper TTS from Node.js.
+ * Uses the existing piper-poc setup with its venv and model.
+ */
+
+import { execSync } from 'child_process';
+import { mkdirSync, statSync, writeFileSync, unlinkSync, existsSync } from 'fs';
+import { dirname, resolve } from 'path';
+
+// Path to the piper-poc directory relative to project root
+const PIPER_DIR = resolve(process.cwd(), 'piper-poc');
+
+export interface AudioResult {
+  duration: number;  // seconds
+  size: number;      // bytes
+}
+
+/**
+ * Generate audio file from text using Piper TTS.
+ *
+ * @param text - The text to convert to speech
+ * @param outputPath - Where to save the WAV file
+ * @returns Audio metadata (duration, size)
+ */
+export async function generateAudio(text: string, outputPath: string): Promise<AudioResult> {
+  // Ensure output directory exists
+  const outDir = dirname(outputPath);
+  if (!existsSync(outDir)) {
+    mkdirSync(outDir, { recursive: true });
+  }
+
+  // Write text to temp file to handle special characters safely
+  const tempTextFile = `/tmp/tts-input-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`;
+
+  try {
+    writeFileSync(tempTextFile, text, 'utf-8');
+
+    // Resolve to absolute path for output
+    const absoluteOutputPath = resolve(process.cwd(), outputPath);
+
+    // Call the tts.sh script
+    execSync(`./tts.sh "${tempTextFile}" "${absoluteOutputPath}"`, {
+      cwd: PIPER_DIR,
+      stdio: 'pipe',
+    });
+
+    // Get file stats
+    const stats = statSync(absoluteOutputPath);
+
+    // Calculate duration from WAV file
+    // WAV at 22050 Hz, 16-bit mono: bytes = samples * 2, duration = samples / 22050
+    // Subtract 44 bytes for WAV header
+    const dataBytes = stats.size - 44;
+    const samples = dataBytes / 2;
+    const duration = samples / 22050;
+
+    return {
+      duration: Math.round(duration * 100) / 100,
+      size: stats.size,
+    };
+  } finally {
+    // Cleanup temp file
+    try {
+      unlinkSync(tempTextFile);
+    } catch {
+      // Ignore cleanup errors
+    }
+  }
+}
+
+/**
+ * Check if Piper is available and properly set up.
+ */
+export function checkPiperAvailable(): boolean {
+  const venvPath = resolve(PIPER_DIR, 'venv');
+  const modelPath = resolve(PIPER_DIR, 'en_US-lessac-medium.onnx');
+
+  if (!existsSync(venvPath)) {
+    console.error('Piper venv not found at:', venvPath);
+    console.error('Run the piper-poc setup first.');
+    return false;
+  }
+
+  if (!existsSync(modelPath)) {
+    console.error('Piper model not found at:', modelPath);
+    console.error('Download the model first.');
+    return false;
+  }
+
+  return true;
+}