Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
node_modules/
dist/
.astro/
.DS_Store

# Generated audio files (large, regenerate locally)
public/audio/

# Piper TTS proof of concept (large model files)
piper-poc/
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
"test": "vitest run",
"test:watch": "vitest",
"lint:assets": "npx tsx scripts/lint-assets.ts",
"audio:generate": "npx tsx scripts/generate-audio.ts",
"audio:clean": "rm -rf public/audio",
"prepare": "husky"
},
"lint-staged": {
Expand Down
37 changes: 37 additions & 0 deletions scripts/audio/content-hash.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* Content hashing for smart audio regeneration.
* Hash is designed to ignore formatting changes but catch text changes.
*/

import { createHash } from 'crypto';

/**
* Compute a content hash that ignores formatting differences.
*
* Changes that DON'T trigger regeneration:
* - Heading level changes (h1 -> h2)
* - Bold/italic changes
* - Whitespace differences
* - Adding images (without alt text)
*
* Changes that DO trigger regeneration:
* - Any text content changes
* - Alt text changes
* - Link text changes
*/
export function computeContentHash(speakableText: string): string {
// Normalize for hashing:
// - lowercase (case changes don't affect speech much)
// - collapse all whitespace to single space
// - trim
const normalized = speakableText
.toLowerCase()
.replace(/\s+/g, ' ')
.trim();

// Use SHA-256, truncated to 16 chars for readability
return createHash('sha256')
.update(normalized)
.digest('hex')
.substring(0, 16);
}
82 changes: 82 additions & 0 deletions scripts/audio/extract-text.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/**
* Extract speakable text from markdown content.
* Strips formatting while preserving text that should be read aloud.
*/

export interface ExtractionResult {
title: string;
speakableText: string;
}

/**
* Extract speakable text from markdown/MDX content.
*
* Includes: paragraphs, headings, lists, bold/italic text, image alt text, link text
* Excludes: frontmatter, code blocks, HTML tags, MDX imports, URLs
*/
export function extractSpeakableText(markdown: string): ExtractionResult {
let text = markdown;

// 1. Extract and remove frontmatter, capturing title
let title = '';
const frontmatterMatch = text.match(/^---\n([\s\S]*?)\n---/);
if (frontmatterMatch) {
const frontmatter = frontmatterMatch[1];
const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
if (titleMatch) {
title = titleMatch[1].trim();
}
text = text.replace(/^---\n[\s\S]*?\n---\n?/, '');
}

// 2. Remove MDX import statements
text = text.replace(/^import\s+.*$/gm, '');

// 3. Remove fenced code blocks (```...```)
text = text.replace(/```[\s\S]*?```/g, '');

// 4. Remove inline code (`...`) - just remove the backticks, keep the text
// Actually, let's remove inline code entirely as it's usually technical
text = text.replace(/`[^`]+`/g, '');

// 5. Extract image alt text: ![alt](url) -> alt
text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');

// 6. Extract link text: [text](url) -> text
text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');

// 7. Remove MDX/JSX component tags but try to keep text content
// Remove self-closing tags: <Component ... />
text = text.replace(/<[A-Z][a-zA-Z]*\s[^>]*\/>/g, '');
// Remove opening/closing tags: <Component>...</Component>
text = text.replace(/<\/?[A-Z][a-zA-Z]*[^>]*>/g, '');

// 8. Remove HTML comments
text = text.replace(/<!--[\s\S]*?-->/g, '');

// 9. Remove remaining HTML tags
text = text.replace(/<[^>]+>/g, '');

// 10. Remove markdown heading markers (##, ###, etc.) but keep text
text = text.replace(/^#{1,6}\s+/gm, '');

// 11. Remove bold/italic markers but keep text
text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // **bold**
text = text.replace(/\*([^*]+)\*/g, '$1'); // *italic*
text = text.replace(/__([^_]+)__/g, '$1'); // __bold__
text = text.replace(/_([^_]+)_/g, '$1'); // _italic_

// 12. Remove list markers
text = text.replace(/^[\s]*[-*+]\s+/gm, ''); // Unordered lists
text = text.replace(/^[\s]*\d+\.\s+/gm, ''); // Ordered lists

// 13. Remove blockquote markers
text = text.replace(/^>\s*/gm, '');

// 14. Normalize whitespace
text = text.replace(/\n{3,}/g, '\n\n'); // Max 2 newlines
text = text.replace(/[ \t]+/g, ' '); // Collapse spaces
text = text.trim();

return { title, speakableText: text };
}
104 changes: 104 additions & 0 deletions scripts/audio/manifest.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* Manifest for tracking generated audio files and their content hashes.
* Enables incremental regeneration - only regenerate when content changes.
*/

import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
import { dirname } from 'path';

export interface ManifestEntry {
hash: string;
duration: number; // seconds
size: number; // bytes
generatedAt: string;
filename: string; // e.g., "01-context.wav"
}

export interface Manifest {
version: number;
voice: string;
entries: Record<string, ManifestEntry>;
}

const MANIFEST_PATH = 'public/audio/manifest.json';
const CURRENT_VERSION = 1;
const DEFAULT_VOICE = 'en_US-lessac-medium';

/**
* Load manifest from disk, or create empty one if it doesn't exist.
*/
export function loadManifest(): Manifest {
if (!existsSync(MANIFEST_PATH)) {
return {
version: CURRENT_VERSION,
voice: DEFAULT_VOICE,
entries: {},
};
}

try {
const content = readFileSync(MANIFEST_PATH, 'utf-8');
const manifest = JSON.parse(content) as Manifest;

// Handle version migrations if needed in the future
if (manifest.version !== CURRENT_VERSION) {
console.log(`Manifest version mismatch (${manifest.version} -> ${CURRENT_VERSION}), regenerating all`);
return {
version: CURRENT_VERSION,
voice: DEFAULT_VOICE,
entries: {},
};
}

return manifest;
} catch (error) {
console.warn('Failed to parse manifest, starting fresh:', error);
return {
version: CURRENT_VERSION,
voice: DEFAULT_VOICE,
entries: {},
};
}
}

/**
* Save manifest to disk.
*/
export function saveManifest(manifest: Manifest): void {
// Ensure directory exists
const dir = dirname(MANIFEST_PATH);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}

writeFileSync(MANIFEST_PATH, JSON.stringify(manifest, null, 2));
}

/**
* Check if content needs regeneration based on hash.
*/
export function needsRegeneration(manifest: Manifest, key: string, hash: string): boolean {
const entry = manifest.entries[key];
if (!entry) return true;
return entry.hash !== hash;
}

/**
* Update manifest entry after generation.
*/
export function updateEntry(
manifest: Manifest,
key: string,
hash: string,
duration: number,
size: number,
filename: string
): void {
manifest.entries[key] = {
hash,
duration,
size,
generatedAt: new Date().toISOString(),
filename,
};
}
91 changes: 91 additions & 0 deletions scripts/audio/piper-wrapper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* Wrapper for calling Piper TTS from Node.js.
* Uses the existing piper-poc setup with its venv and model.
*/

import { execSync } from 'child_process';
import { mkdirSync, statSync, writeFileSync, unlinkSync, existsSync } from 'fs';
import { dirname, resolve } from 'path';

// Path to the piper-poc directory relative to project root
const PIPER_DIR = resolve(process.cwd(), 'piper-poc');

export interface AudioResult {
duration: number; // seconds
size: number; // bytes
}

/**
* Generate audio file from text using Piper TTS.
*
* @param text - The text to convert to speech
* @param outputPath - Where to save the WAV file
* @returns Audio metadata (duration, size)
*/
export async function generateAudio(text: string, outputPath: string): Promise<AudioResult> {
// Ensure output directory exists
const outDir = dirname(outputPath);
if (!existsSync(outDir)) {
mkdirSync(outDir, { recursive: true });
}

// Write text to temp file to handle special characters safely
const tempTextFile = `/tmp/tts-input-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`;

try {
writeFileSync(tempTextFile, text, 'utf-8');

// Resolve to absolute path for output
const absoluteOutputPath = resolve(process.cwd(), outputPath);

// Call the tts.sh script
execSync(`./tts.sh "${tempTextFile}" "${absoluteOutputPath}"`, {
cwd: PIPER_DIR,
stdio: 'pipe',
});

// Get file stats
const stats = statSync(absoluteOutputPath);

// Calculate duration from WAV file
// WAV at 22050 Hz, 16-bit mono: bytes = samples * 2, duration = samples / 22050
// Subtract 44 bytes for WAV header
const dataBytes = stats.size - 44;
const samples = dataBytes / 2;
const duration = samples / 22050;

return {
duration: Math.round(duration * 100) / 100,
size: stats.size,
};
} finally {
// Cleanup temp file
try {
unlinkSync(tempTextFile);
} catch {
// Ignore cleanup errors
}
}
}

/**
* Check if Piper is available and properly set up.
*/
export function checkPiperAvailable(): boolean {
const venvPath = resolve(PIPER_DIR, 'venv');
const modelPath = resolve(PIPER_DIR, 'en_US-lessac-medium.onnx');

if (!existsSync(venvPath)) {
console.error('Piper venv not found at:', venvPath);
console.error('Run the piper-poc setup first.');
return false;
}

if (!existsSync(modelPath)) {
console.error('Piper model not found at:', modelPath);
console.error('Download the model first.');
return false;
}

return true;
}
Loading