Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
node_modules/
dist/
.astro/
.DS_Store

# Generated audio files (large, regenerate locally)
public/audio/
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
"test": "vitest run",
"test:watch": "vitest",
"lint:assets": "npx tsx scripts/lint-assets.ts",
"audio:generate": "npx tsx scripts/generate-audio.ts",
"audio:clean": "rm -rf public/audio",
"prepare": "husky"
},
"lint-staged": {
Expand Down
37 changes: 37 additions & 0 deletions scripts/audio/content-hash.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* Content hashing for smart audio regeneration.
* Hash is designed to ignore formatting changes but catch text changes.
*/

import { createHash } from 'crypto';

/**
* Compute a content hash that ignores formatting differences.
*
* Changes that DON'T trigger regeneration:
* - Heading level changes (h1 -> h2)
* - Bold/italic changes
* - Whitespace differences
* - Adding images (without alt text)
*
* Changes that DO trigger regeneration:
* - Any text content changes
* - Alt text changes
* - Link text changes
*/
export function computeContentHash(speakableText: string): string {
// Normalize for hashing:
// - lowercase (case changes don't affect speech much)
// - collapse all whitespace to single space
// - trim
const normalized = speakableText
.toLowerCase()
.replace(/\s+/g, ' ')
.trim();

// Use SHA-256, truncated to 16 chars for readability
return createHash('sha256')
.update(normalized)
.digest('hex')
.substring(0, 16);
}
82 changes: 82 additions & 0 deletions scripts/audio/extract-text.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/**
* Extract speakable text from markdown content.
* Strips formatting while preserving text that should be read aloud.
*/

export interface ExtractionResult {
title: string;
speakableText: string;
}

/**
* Extract speakable text from markdown/MDX content.
*
* Includes: paragraphs, headings, lists, bold/italic text, image alt text, link text
* Excludes: frontmatter, code blocks, HTML tags, MDX imports, URLs
*/
export function extractSpeakableText(markdown: string): ExtractionResult {
let text = markdown;

// 1. Extract and remove frontmatter, capturing title
let title = '';
const frontmatterMatch = text.match(/^---\n([\s\S]*?)\n---/);
if (frontmatterMatch) {
const frontmatter = frontmatterMatch[1];
const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
if (titleMatch) {
title = titleMatch[1].trim();
}
text = text.replace(/^---\n[\s\S]*?\n---\n?/, '');
}

// 2. Remove MDX import statements
text = text.replace(/^import\s+.*$/gm, '');

// 3. Remove fenced code blocks (```...```)
text = text.replace(/```[\s\S]*?```/g, '');

// 4. Remove inline code (`...`) - just remove the backticks, keep the text
// Actually, let's remove inline code entirely as it's usually technical
text = text.replace(/`[^`]+`/g, '');

// 5. Extract image alt text: ![alt](url) -> alt
text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');

// 6. Extract link text: [text](url) -> text
text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');

// 7. Remove MDX/JSX component tags but try to keep text content
// Remove self-closing tags: <Component ... />
text = text.replace(/<[A-Z][a-zA-Z]*\s[^>]*\/>/g, '');
// Remove opening/closing tags: <Component>...</Component>
text = text.replace(/<\/?[A-Z][a-zA-Z]*[^>]*>/g, '');

// 8. Remove HTML comments
text = text.replace(/<!--[\s\S]*?-->/g, '');

// 9. Remove remaining HTML tags
text = text.replace(/<[^>]+>/g, '');

// 10. Remove markdown heading markers (##, ###, etc.) but keep text
text = text.replace(/^#{1,6}\s+/gm, '');

// 11. Remove bold/italic markers but keep text
text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // **bold**
text = text.replace(/\*([^*]+)\*/g, '$1'); // *italic*
text = text.replace(/__([^_]+)__/g, '$1'); // __bold__
text = text.replace(/_([^_]+)_/g, '$1'); // _italic_

// 12. Remove list markers
text = text.replace(/^[\s]*[-*+]\s+/gm, ''); // Unordered lists
text = text.replace(/^[\s]*\d+\.\s+/gm, ''); // Ordered lists

// 13. Remove blockquote markers
text = text.replace(/^>\s*/gm, '');

// 14. Normalize whitespace
text = text.replace(/\n{3,}/g, '\n\n'); // Max 2 newlines
text = text.replace(/[ \t]+/g, ' '); // Collapse spaces
text = text.trim();

return { title, speakableText: text };
}
116 changes: 116 additions & 0 deletions scripts/audio/manifest.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/**
* Manifest for tracking generated audio files and their content hashes.
* Enables incremental regeneration - only regenerate when content changes.
*/

import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
import { dirname } from 'path';
import { getVoiceName } from './openai-wrapper.js';

export interface ManifestEntry {
hash: string;
duration: number; // seconds
size: number; // bytes
generatedAt: string;
filename: string; // e.g., "01-context.mp3"
}

export interface Manifest {
version: number;
voice: string;
entries: Record<string, ManifestEntry>;
}

const MANIFEST_PATH = 'public/audio/manifest.json';
const CURRENT_VERSION = 1;

/**
* Load manifest from disk, or create empty one if it doesn't exist.
*/
export function loadManifest(): Manifest {
const currentVoice = getVoiceName();

if (!existsSync(MANIFEST_PATH)) {
return {
version: CURRENT_VERSION,
voice: currentVoice,
entries: {},
};
}

try {
const content = readFileSync(MANIFEST_PATH, 'utf-8');
const manifest = JSON.parse(content) as Manifest;

// Handle version migrations if needed in the future
if (manifest.version !== CURRENT_VERSION) {
console.log(`Manifest version mismatch (${manifest.version} -> ${CURRENT_VERSION}), regenerating all`);
return {
version: CURRENT_VERSION,
voice: currentVoice,
entries: {},
};
}

// If voice changed, regenerate all
if (manifest.voice !== currentVoice) {
console.log(`Voice changed (${manifest.voice} -> ${currentVoice}), regenerating all`);
return {
version: CURRENT_VERSION,
voice: currentVoice,
entries: {},
};
}

return manifest;
} catch (error) {
console.warn('Failed to parse manifest, starting fresh:', error);
return {
version: CURRENT_VERSION,
voice: currentVoice,
entries: {},
};
}
}

/**
* Save manifest to disk.
*/
export function saveManifest(manifest: Manifest): void {
// Ensure directory exists
const dir = dirname(MANIFEST_PATH);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}

writeFileSync(MANIFEST_PATH, JSON.stringify(manifest, null, 2));
}

/**
* Check if content needs regeneration based on hash.
*/
export function needsRegeneration(manifest: Manifest, key: string, hash: string): boolean {
const entry = manifest.entries[key];
if (!entry) return true;
return entry.hash !== hash;
}

/**
* Update manifest entry after generation.
*/
export function updateEntry(
manifest: Manifest,
key: string,
hash: string,
duration: number,
size: number,
filename: string
): void {
manifest.entries[key] = {
hash,
duration,
size,
generatedAt: new Date().toISOString(),
filename,
};
}
124 changes: 124 additions & 0 deletions scripts/audio/openai-wrapper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/**
* Wrapper for calling OpenAI TTS API from Node.js.
* Uses the tts-1 model with the 'alloy' voice.
*
* Requires OPENAI_API_KEY environment variable.
*/

import { mkdirSync, statSync, existsSync, writeFileSync } from 'fs';
import { dirname, resolve } from 'path';

// OpenAI TTS configuration
const OPENAI_API_URL = 'https://api.openai.com/v1/audio/speech';
const MODEL = 'tts-1';
const VOICE = 'alloy';
const RESPONSE_FORMAT = 'mp3';

export interface AudioResult {
duration: number; // seconds (estimated)
size: number; // bytes
}

/**
* Get the OpenAI API key from environment variable.
* Throws if not set.
*/
function getApiKey(): string {
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) {
throw new Error(
'OPENAI_API_KEY environment variable is not set.\n' +
'Set it with: export OPENAI_API_KEY=your-api-key'
);
}
return apiKey;
}

/**
* Generate audio file from text using OpenAI TTS API.
*
* @param text - The text to convert to speech
* @param outputPath - Where to save the audio file
* @returns Audio metadata (duration estimate, size)
*/
export async function generateAudio(text: string, outputPath: string): Promise<AudioResult> {
const apiKey = getApiKey();

// Ensure output directory exists
const outDir = dirname(outputPath);
if (!existsSync(outDir)) {
mkdirSync(outDir, { recursive: true });
}

// Resolve to absolute path for output
const absoluteOutputPath = resolve(process.cwd(), outputPath);

// OpenAI TTS has a limit of 4096 characters per request
// For longer texts, we'd need to chunk - but for now, truncate with warning
const MAX_CHARS = 4096;
let inputText = text;
if (text.length > MAX_CHARS) {
console.warn(` Warning: Text truncated from ${text.length} to ${MAX_CHARS} chars`);
inputText = text.substring(0, MAX_CHARS);
}

// Call OpenAI TTS API
const response = await fetch(OPENAI_API_URL, {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: MODEL,
voice: VOICE,
input: inputText,
response_format: RESPONSE_FORMAT,
}),
});

if (!response.ok) {
const errorBody = await response.text();
throw new Error(`OpenAI API error (${response.status}): ${errorBody}`);
}

// Get the audio data as a buffer
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);

// Write to file
writeFileSync(absoluteOutputPath, buffer);

// Get file stats
const stats = statSync(absoluteOutputPath);

// Estimate duration from MP3 file size
// Typical MP3 at 128kbps: duration = size / (128000 / 8) = size / 16000
// OpenAI uses variable bitrate, but this gives a reasonable estimate
const estimatedDuration = stats.size / 16000;

return {
duration: Math.round(estimatedDuration * 100) / 100,
size: stats.size,
};
}

/**
* Check if OpenAI API is available (API key is set).
*/
export function checkOpenAIAvailable(): boolean {
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) {
console.error('OPENAI_API_KEY environment variable is not set.');
console.error('Set it with: export OPENAI_API_KEY=your-api-key');
return false;
}
return true;
}

/**
* Get the current voice being used (for manifest).
*/
export function getVoiceName(): string {
return `openai-${MODEL}-${VOICE}`;
}
Loading