diff --git a/.gitignore b/.gitignore
index 9bbb966..88cc081 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,5 @@ yarn-error.log*
 .turbo
 todo.md
 plan.md
+eval/cache
+eval/results
diff --git a/eval/chunkers/ast.ts b/eval/chunkers/ast.ts
new file mode 100644
index 0000000..3794ce7
--- /dev/null
+++ b/eval/chunkers/ast.ts
@@ -0,0 +1,40 @@
+/**
+ * AST-aware chunker wrapper for evaluation
+ *
+ * Wraps the astchunk library for use in the evaluation harness.
+ * Uses the built-in contextualizedText for better embedding quality.
+ */
+
+import { chunk } from '../../src'
+
+/**
+ * Chunk a file using AST-aware chunking and return results
+ * in a format compatible with the evaluation
+ *
+ * @param filepath - Path to the file
+ * @param code - Source code content
+ * @param maxNws - Maximum NWS characters per chunk (default: 1500)
+ */
+export async function chunkFile(
+	filepath: string,
+	code: string,
+	maxNws: number = 1500,
+): Promise<
+	Array<{
+		id: string
+		text: string
+		startLine: number
+		endLine: number
+	}>
+> {
+	const chunks = await chunk(filepath, code, {
+		maxChunkSize: maxNws,
+	})
+
+	return chunks.map((c) => ({
+		id: `${filepath}:${c.lineRange.start}-${c.lineRange.end}`,
+		text: c.contextualizedText,
+		startLine: c.lineRange.start,
+		endLine: c.lineRange.end,
+	}))
+}
diff --git a/eval/debug_chunks.ts b/eval/debug_chunks.ts
new file mode 100644
index 0000000..26ea5b5
--- /dev/null
+++ b/eval/debug_chunks.ts
@@ -0,0 +1,77 @@
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import { chunk } from '../src'
+import { chunkFixed } from './chunkers/fixed'
+
+// Check deepmind_tracr/tracr/craft/transformers.py
+// Assume we're looking for lines 100-150
+const testFile = join(
+	import.meta.dir,
+	'data/repoeval/repositories/function_level/deepmind_tracr/tracr/craft/transformers.py',
+)
+const code = readFileSync(testFile, 'utf-8')
+const targetStart = 100
+const targetEnd = 150
+
+console.log('File:', testFile)
+console.log('Target lines:', targetStart, '-', targetEnd)
+console.log('')
+
+function countNws(text: string): number {
+	let count = 0
+	for (let i = 0; i < text.length; i++) {
+		if (text.charCodeAt(i) > 32) count++
+	}
+	return count
+}
+
+function overlaps(
+	chunkStart: number,
+	chunkEnd: number,
+	tStart: number,
+	tEnd: number,
+): boolean {
+	return !(chunkEnd < tStart || chunkStart > tEnd)
+}
+
+for (const maxSize of [1500, 1800]) {
+	console.log(`\n=== Max chunk size: ${maxSize} ===`)
+
+	const astChunks = await chunk(testFile, code, { maxChunkSize: maxSize })
+	const fixedChunks = chunkFixed(code, maxSize)
+
+	console.log('\nAST chunks:')
+	for (const c of astChunks) {
+		const overlap = overlaps(
+			c.lineRange.start,
+			c.lineRange.end,
+			targetStart,
+			targetEnd,
+		)
+		console.log(
+			`  Lines ${c.lineRange.start}-${c.lineRange.end} (${countNws(c.text)} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
+		)
+	}
+
+	console.log('\nFixed chunks:')
+	for (const c of fixedChunks) {
+		const overlap = overlaps(c.startLine, c.endLine, targetStart, targetEnd)
+		console.log(
+			`  Lines ${c.startLine}-${c.endLine} (${c.nwsCount} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
+		)
+	}
+
+	const astRelevant = astChunks.filter((c) =>
+		overlaps(c.lineRange.start, c.lineRange.end, targetStart, targetEnd),
+	)
+	const fixedRelevant = fixedChunks.filter((c) =>
+		overlaps(c.startLine, c.endLine, targetStart, targetEnd),
+	)
+
+	console.log(
+		`\nRelevant chunks: AST=${astRelevant.length}, Fixed=${fixedRelevant.length}`,
+	)
+	console.log(
+		`Total chunks: AST=${astChunks.length}, Fixed=${fixedChunks.length}`,
+	)
+}
diff --git a/eval/download.ts b/eval/download.ts
new file mode 100644
index 0000000..c2e05af
--- /dev/null
+++ b/eval/download.ts
@@ -0,0 +1,149 @@
+/**
+ * Download RepoEval benchmark data
+ *
+ * Downloads:
+ * 1. Task datasets (queries, ground truth) from Microsoft CodeT repo
+ * 2. Function-level Python repositories for chunking
+ */
+
+import { existsSync } from 'node:fs'
+import { mkdir, writeFile } from 'node:fs/promises'
+import { join } from 'node:path'
+
+const DATA_DIR = join(import.meta.dir, 'data', 'repoeval')
+const DATASETS_DIR = join(DATA_DIR, 'datasets')
+const REPOS_DIR = join(DATA_DIR, 'repositories', 'function_level')
+
+// Function-level repositories from RepoEval
+const REPOS_FUNCTION = [
+	'amazon-science_patchcore-inspection',
+	'deepmind_tracr',
+	'facebookresearch_omnivore',
+	'google_lightweight_mmm',
+	'lucidrains_imagen-pytorch',
+	'maxhumber_redframes',
+]
+
+async function downloadAndExtractZip(
+	url: string,
+	destDir: string,
+): Promise<void> {
+	console.log(`Downloading from ${url}...`)
+
+	const response = await fetch(url)
+	if (!response.ok) {
+		throw new Error(`Failed to download: ${response.statusText}`)
+	}
+
+	const arrayBuffer = await response.arrayBuffer()
+	const tempZipPath = join(destDir, '_temp.zip')
+
+	await mkdir(destDir, { recursive: true })
+	await writeFile(tempZipPath, new Uint8Array(arrayBuffer))
+
+	// Use unzip command
+	const proc = Bun.spawn(['unzip', '-o', '-q', tempZipPath, '-d', destDir], {
+		cwd: destDir,
+	})
+	await proc.exited
+
+	// Clean up temp file
+	await Bun.spawn(['rm', tempZipPath]).exited
+
+	console.log(`Extracted to ${destDir}`)
+}
+
+async function downloadDatasets(): Promise<void> {
+	if (existsSync(DATASETS_DIR)) {
+		console.log('Datasets already downloaded, skipping...')
+		return
+	}
+
+	const datasetsUrl =
+		'https://github.com/microsoft/CodeT/raw/main/RepoCoder/datasets/datasets.zip'
+	await downloadAndExtractZip(datasetsUrl, DATASETS_DIR)
+}
+
+async function downloadRepositories(): Promise<void> {
+	if (existsSync(REPOS_DIR)) {
+		console.log('Repositories already downloaded, skipping...')
+		return
+	}
+
+	// Using the cleaned version from Veronicium's fork
+	const reposUrl =
+		'https://github.com/Veronicium/repoeval_debug/raw/main/function_level.zip'
+	await downloadAndExtractZip(reposUrl, REPOS_DIR)
+}
+
+export interface RepoEvalTask {
+	prompt: string
+	metadata: {
+		task_id: string
+		ground_truth: string
+		fpath_tuple: string[]
+		line_no: number
+		lineno: number
+		context_start_lineno: number
+	}
+}
+
+export async function loadTasks(
+	contextLength: '1k' | '2k' | '4k' = '2k',
+): Promise<RepoEvalTask[]> {
+	const fileName = `function_level_completion_${contextLength}_context_codex.test.jsonl`
+	const filePath = join(DATASETS_DIR, fileName)
+
+	const content = await Bun.file(filePath).text()
+	const lines = content.trim().split('\n')
+
+	const tasks: RepoEvalTask[] = []
+	const repo2idx: Record<string, number> = {}
+
+	for (const line of lines) {
+		const task = JSON.parse(line) as RepoEvalTask
+
+		// Clean up task_id format
+		const repo = task.metadata.task_id.replace('--', '_').split('/')[0]
+		if (!REPOS_FUNCTION.includes(repo)) continue
+
+		if (!(repo in repo2idx)) {
+			repo2idx[repo] = 0
+		}
+
+		task.metadata.task_id = task.metadata.task_id
+			.replace('--', '_')
+			.replace('idx', String(repo2idx[repo]))
+		task.metadata.line_no = task.metadata.lineno
+		repo2idx[repo]++
+
+		tasks.push(task)
+	}
+
+	return tasks
+}
+
+export function getReposDir(): string {
+	return REPOS_DIR
+}
+
+export function getRepos(): string[] {
+	return REPOS_FUNCTION
+}
+
+export async function download(): Promise<void> {
+	console.log('Downloading RepoEval benchmark data...\n')
+
+	await mkdir(DATA_DIR, { recursive: true })
+
+	await downloadDatasets()
+	await downloadRepositories()
+
+	console.log('\nDownload complete!')
+	console.log(`Data stored in: ${DATA_DIR}`)
+}
+
+// Run if executed directly
+if (import.meta.main) {
+	await download()
+}
diff --git a/eval/embeddings.ts b/eval/embeddings.ts
new file mode 100644
index 0000000..8e242e1
--- /dev/null
+++ b/eval/embeddings.ts
@@ -0,0 +1,205 @@
+/**
+ * OpenAI embeddings wrapper with disk caching
+ */
+
+import { createHash } from 'node:crypto'
+import { existsSync } from 'node:fs'
+import { mkdir, readFile, writeFile } from 'node:fs/promises'
+import { join } from 'node:path'
+import OpenAI from 'openai'
+
+const CACHE_DIR = join(import.meta.dir, 'cache', 'embeddings')
+const MODEL = 'text-embedding-3-small'
+const BATCH_SIZE = 100
+
+let client: OpenAI | null = null
+
+function getClient(): OpenAI {
+	if (!client) {
+		client = new OpenAI()
+	}
+	return client
+}
+
+/**
+ * Create a cache key from text content
+ */
+function cacheKey(text: string): string {
+	return createHash('sha256').update(text).digest('hex').slice(0, 16)
+}
+
+/**
+ * Get cache file path for a text
+ */
+function cachePath(text: string): string {
+	const key = cacheKey(text)
+	// Use first 2 chars as subdirectory to avoid too many files in one dir
+	return join(CACHE_DIR, key.slice(0, 2), `${key}.json`)
+}
+
+/**
+ * Try to load embedding from cache
+ */
+async function loadFromCache(text: string): Promise<number[] | null> {
+	const path = cachePath(text)
+	if (!existsSync(path)) {
+		return null
+	}
+	try {
+		const data = await readFile(path, 'utf-8')
+		return JSON.parse(data) as number[]
+	} catch {
+		return null
+	}
+}
+
+/**
+ * Save embedding to cache
+ */
+async function saveToCache(text: string, embedding: number[]): Promise<void> {
+	const path = cachePath(text)
+	const dir = join(path, '..')
+	await mkdir(dir, { recursive: true })
+	await writeFile(path, JSON.stringify(embedding))
+}
+
+/**
+ * Embed a batch of texts using OpenAI API
+ */
+async function embedBatch(texts: string[]): Promise<number[][]> {
+	const openai = getClient()
+
+	// Filter out empty texts and track their indices
+	const nonEmptyTexts: string[] = []
+	const indexMap: number[] = []
+
+	for (let i = 0; i < texts.length; i++) {
+		const text = texts[i].trim()
+		if (text.length > 0) {
+			nonEmptyTexts.push(text)
+			indexMap.push(i)
+		}
+	}
+
+	if (nonEmptyTexts.length === 0) {
+		// Return zero vectors for all empty inputs
+		return texts.map(() => new Array(1536).fill(0))
+	}
+
+	const response = await openai.embeddings.create({
+		model: MODEL,
+		input: nonEmptyTexts,
+	})
+
+	// Sort by index to maintain order
+	const sorted = response.data.sort(
+		(a: { index: number }, b: { index: number }) => a.index - b.index,
+	)
+	const embeddings = sorted.map((d: { embedding: number[] }) => d.embedding)
+
+	// Map back to original indices, filling zeros for empty texts
+	const result: number[][] = texts.map(() => new Array(1536).fill(0))
+	for (let i = 0; i < indexMap.length; i++) {
+		result[indexMap[i]] = embeddings[i]
+	}
+
+	return result
+}
+
+/**
+ * Embed texts with caching
+ *
+ * @param texts - Array of texts to embed
+ * @param onProgress - Optional callback for progress updates
+ * @returns Array of embeddings (same order as input texts)
+ */
+export async function embedTexts(
+	texts: string[],
+	onProgress?: (done: number, total: number) => void,
+): Promise<number[][]> {
+	await mkdir(CACHE_DIR, { recursive: true })
+
+	const results: (number[] | null)[] = new Array(texts.length).fill(null)
+	const uncachedIndices: number[] = []
+	const uncachedTexts: string[] = []
+
+	// Check cache for each text
+	for (let i = 0; i < texts.length; i++) {
+		const cached = await loadFromCache(texts[i])
+		if (cached) {
+			results[i] = cached
+		} else {
+			uncachedIndices.push(i)
+			uncachedTexts.push(texts[i])
+		}
+	}
+
+	const cachedCount = texts.length - uncachedTexts.length
+	if (cachedCount > 0) {
+		console.log(`  Found ${cachedCount}/${texts.length} embeddings in cache`)
+	}
+
+	// Embed uncached texts in batches
+	for (let i = 0; i < uncachedTexts.length; i += BATCH_SIZE) {
+		const batch = uncachedTexts.slice(i, i + BATCH_SIZE)
+		const batchIndices = uncachedIndices.slice(i, i + BATCH_SIZE)
+
+		const embeddings = await embedBatch(batch)
+
+		// Save to cache and store results
+		for (let j = 0; j < embeddings.length; j++) {
+			const originalIdx = batchIndices[j]
+			results[originalIdx] = embeddings[j]
+			await saveToCache(batch[j], embeddings[j])
+		}
+
+		if (onProgress) {
+			onProgress(
+				Math.min(i + BATCH_SIZE, uncachedTexts.length),
+				uncachedTexts.length,
+			)
+		}
+	}
+
+	return results as number[][]
+}
+
+/**
+ * Compute cosine similarity between two vectors
+ */
+export function cosineSimilarity(a: number[], b: number[]): number {
+	let dotProduct = 0
+	let normA = 0
+	let normB = 0
+
+	for (let i = 0; i < a.length; i++) {
+		dotProduct += a[i] * b[i]
+		normA += a[i] * a[i]
+		normB += b[i] * b[i]
+	}
+
+	return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB))
+}
+
+/**
+ * Find top-k most similar items
+ *
+ * @param queryEmbedding - The query embedding
+ * @param corpusEmbeddings - Array of corpus embeddings
+ * @param k - Number of top results to return
+ * @returns Array of { index, score } sorted by score descending
+ */
+export function topK(
+	queryEmbedding: number[],
+	corpusEmbeddings: number[][],
+	k: number,
+): Array<{ index: number; score: number }> {
+	const scores = corpusEmbeddings.map((emb, idx) => ({
+		index: idx,
+		score: cosineSimilarity(queryEmbedding, emb),
+	}))
+
+	scores.sort((a, b) => b.score - a.score)
+
+	return scores.slice(0, k)
+}
diff --git a/eval/run.ts b/eval/run.ts
new file mode 100644
index 0000000..8c05d74
--- /dev/null
+++ b/eval/run.ts
@@ -0,0 +1,414 @@
+/**
+ * RepoEval Retrieval Evaluation Runner
+ *
+ * Compares AST-aware chunking vs fixed-size chunking on code retrieval.
+ *
+ * Usage:
+ *   bun eval/run.ts
+ */
+
+import { readdirSync, statSync } from 'node:fs'
+import { mkdir, readFile, writeFile } from 'node:fs/promises'
+import { join } from 'node:path'
+import { chunkFile as chunkWithAST } from './chunkers/ast'
+import { chunkFile as chunkWithFixed } from './chunkers/fixed'
+import {
+	download,
+	getRepos,
+	getReposDir,
+	loadTasks,
+	type RepoEvalTask,
+} from './download'
+import { embedTexts, topK } from './embeddings'
+import { aggregateMetrics, computeMetrics } from './metrics'
+
+const RESULTS_DIR = join(import.meta.dir, 'results')
+const K_VALUES = [5, 10] // Top-k values for retrieval
+const MAX_CHUNK_SIZE = 1500 // NWS characters per chunk
+
+interface ChunkInfo {
+	id: string
+	text: string
+	startLine: number
+	endLine: number
+	filepath: string
+}
+
+interface MetricsAtK {
+	precision: number
+	recall: number
+	ndcg: number
+}
+
+interface QueryResult {
+	taskId: string
+	prompt: string
+	groundTruthLines: { start: number; end: number }
+	groundTruthFile: string
+	retrievedChunks: Array<{ id: string; score: number; rank: number }>
+	relevantChunkIds: string[]
+	metrics: Record<number, MetricsAtK> // metrics per k value
+}
+
+interface EvalResult {
+	chunker: 'ast' | 'fixed'
+	repo: string
+	summary: Record<number, MetricsAtK> // summary per k value
+	queryResults: QueryResult[]
+	config: { kValues: number[]; maxChunkSize: number }
+	timestamp: string
+}
+
+/**
+ * Recursively find all Python files in a directory
+ */
+function findPythonFiles(dir: string): string[] {
+	const files: string[] = []
+
+	function walk(currentDir: string) {
+		const entries = readdirSync(currentDir)
+		for (const entry of entries) {
+			const fullPath = join(currentDir, entry)
+			const stat = statSync(fullPath)
+			if (stat.isDirectory()) {
+				walk(fullPath)
+			} else if (entry.endsWith('.py')) {
+				files.push(fullPath)
+			}
+		}
+	}
+
+	walk(dir)
+	return files
+}
+
+/**
+ * Check if a chunk overlaps with a line range
+ */
+function chunksOverlap(
+	chunk: { startLine: number; endLine: number },
+	target: { start: number; end: number },
+): boolean {
+	return !(chunk.endLine < target.start || chunk.startLine > target.end)
+}
+
+/**
+ * Run evaluation for a single repository
+ */
+async function evaluateRepo(
+	repo: string,
+	tasks: RepoEvalTask[],
+	chunkerType: 'ast' | 'fixed',
+): Promise<EvalResult> {
+	console.log(`\n  Evaluating ${repo} with ${chunkerType} chunker...`)
+
+	const repoDir = join(getReposDir(), repo)
+	const pyFiles = findPythonFiles(repoDir)
+	console.log(`    Found ${pyFiles.length} Python files`)
+
+	// Step 1: Chunk all files
+	console.log('    Chunking files...')
+	const allChunks: ChunkInfo[] = []
+
+	for (const filepath of pyFiles) {
+		const code = await readFile(filepath, 'utf-8')
+		const relPath = filepath.replace(`${repoDir}/`, '')
+
+		try {
+			const chunks =
+				chunkerType === 'ast'
+					? await chunkWithAST(filepath, code, MAX_CHUNK_SIZE)
+					: await chunkWithFixed(filepath, code, MAX_CHUNK_SIZE)
+
+			for (const chunk of chunks) {
+				allChunks.push({
+					...chunk,
+					filepath: relPath,
+				})
+			}
+		} catch (err) {
+			// Skip files that can't be parsed
+			console.log(`      Warning: Failed to chunk ${relPath}: ${err}`)
+		}
+	}
+
+	console.log(`    Created ${allChunks.length} chunks`)
+
+	// Step 2: Embed all chunks
+	console.log('    Embedding chunks...')
+	const chunkTexts = allChunks.map((c) => c.text)
+	const chunkEmbeddings = await embedTexts(chunkTexts, (done, total) => {
+		process.stdout.write(`\r    Embedding chunks: ${done}/${total}`)
+	})
+	console.log('')
+
+	// Step 3: Embed queries and retrieve
+	console.log('    Embedding queries and retrieving...')
+	const queryTexts = tasks.map((t) => t.prompt)
+	const queryEmbeddings = await embedTexts(queryTexts)
+
+	// Step 4: For each query, retrieve top-k and compute metrics
+	const queryResults: QueryResult[] = []
+
+	// Debug: show sample filepaths from chunks
+	const sampleFilepaths = [...new Set(allChunks.map((c) => c.filepath))].slice(
+		0,
+		5,
+	)
+	if (tasks.length > 0) {
+		console.log(
+			`    Debug: Sample chunk filepaths: ${sampleFilepaths.join(', ')}`,
+		)
+		console.log(
+			`    Debug: Sample task fpath_tuple: ${tasks[0].metadata.fpath_tuple.join('/')}`,
+		)
+		console.log(
+			`    Debug: Target file (after slice): ${tasks[0].metadata.fpath_tuple.slice(1).join('/')}`,
+		)
+	}
+
+	const maxK = Math.max(...K_VALUES)
+
+	for (let i = 0; i < tasks.length; i++) {
+		const task = tasks[i]
+		const queryEmb = queryEmbeddings[i]
+
+		// Get top-k chunks (use max k to get all we need)
+		const topKResults = topK(queryEmb, chunkEmbeddings, maxK)
+
+		// Determine ground truth: chunks that overlap with target location
+		// fpath_tuple is ["repo_name", "path", "to", "file.py"], skip first element
+		const targetFile = task.metadata.fpath_tuple.slice(1).join('/')
+		const targetLines = {
+			start: task.metadata.context_start_lineno,
+			end: task.metadata.line_no,
+		}
+
+		// Find all chunks that are relevant (overlap with ground truth)
+		const relevantChunkIds = allChunks
+			.filter((c) => c.filepath === targetFile && chunksOverlap(c, targetLines))
+			.map((c) => c.id)
+
+		// Debug first query
+		if (i === 0) {
+			console.log(`    Debug first query:`)
+			console.log(`      Target file: "${targetFile}"`)
+			console.log(`      Target lines: ${targetLines.start}-${targetLines.end}`)
+			console.log(`      Relevant chunks found: ${relevantChunkIds.length}`)
+			console.log(
+				`      Top retrieved chunk: ${allChunks[topKResults[0]?.index]?.filepath}`,
+			)
+		}
+
+		const relevantSet = new Set(relevantChunkIds)
+
+		// Get retrieved chunk IDs
+		const retrievedIds = topKResults.map((r) => allChunks[r.index].id)
+
+		// Compute metrics for each k value
+		const metrics: Record<number, MetricsAtK> = {}
+		for (const k of K_VALUES) {
+			metrics[k] = computeMetrics(retrievedIds, relevantSet, k)
+		}
+
+		queryResults.push({
+			taskId: task.metadata.task_id,
+			prompt: `${task.prompt.slice(0, 200)}...`, // Truncate for readability
+			groundTruthLines: targetLines,
+			groundTruthFile: targetFile,
+			retrievedChunks: topKResults.map((r, rank) => ({
+				id: allChunks[r.index].id,
+				score: r.score,
+				rank: rank + 1,
+			})),
+			relevantChunkIds,
+			metrics,
+		})
+	}
+
+	// Aggregate metrics for each k value
+	const summary: Record<number, MetricsAtK> = {}
+	for (const k of K_VALUES) {
+		summary[k] = aggregateMetrics(queryResults.map((q) => q.metrics[k]))
+	}
+
+	return {
+		chunker: chunkerType,
+		repo,
+		summary,
+		queryResults,
+		config: { kValues: K_VALUES, maxChunkSize: MAX_CHUNK_SIZE },
+		timestamp: new Date().toISOString(),
+	}
+}
+
+/**
+ * Format metrics as a table row for a specific k
+ */
+function formatMetricsRow(label: string, metrics: MetricsAtK): string {
+	return `${label.padEnd(20)} | ${(metrics.ndcg * 100).toFixed(1).padStart(6)} | ${(metrics.precision * 100).toFixed(1).padStart(6)} | ${(metrics.recall * 100).toFixed(1).padStart(6)}`
+}
+
+/**
+ * Print metrics table for all k values
+ */
+function printMetricsTable(
+	astSummary: Record<number, MetricsAtK>,
+	fixedSummary: Record<number, MetricsAtK>,
+	indent = '',
+): void {
+	for (const k of K_VALUES) {
+		console.log(`${indent}k=${k}:`)
+		console.log(indent + '-'.repeat(50))
+		console.log(
+			`${indent}${'Chunker'.padEnd(20)} | ${'nDCG'.padStart(6)} | ${'P@k'.padStart(6)} | ${'R@k'.padStart(6)}`,
+		)
+		console.log(indent + '-'.repeat(50))
+		console.log(indent + formatMetricsRow('AST', astSummary[k]))
+		console.log(indent + formatMetricsRow('Fixed', fixedSummary[k]))
+		console.log(indent + '-'.repeat(50))
+		console.log('')
+	}
+}
+
+async function main() {
+	console.log('RepoEval Retrieval Evaluation')
+	console.log('=============================\n')
+
+	// Step 1: Download data if needed
+	await download()
+
+	// Step 2: Load tasks
+	console.log('\nLoading tasks...')
+	const allTasks = await loadTasks('2k')
+	console.log(`Loaded ${allTasks.length} tasks`)
+
+	// Group tasks by repo
+	const tasksByRepo = new Map<string, RepoEvalTask[]>()
+	for (const task of allTasks) {
+		const repo = task.metadata.task_id.split('/')[0]
+		if (!tasksByRepo.has(repo)) {
+			tasksByRepo.set(repo, [])
+		}
+		const repoTasks = tasksByRepo.get(repo)
+		if (repoTasks) {
+			repoTasks.push(task)
+		}
+	}
+
+	// Step 3: Run evaluation for each repo and chunker
+	await mkdir(RESULTS_DIR, { recursive: true })
+
+	const allResults: EvalResult[] = []
+	const repos = getRepos()
+
+	for (const repo of repos) {
+		const tasks = tasksByRepo.get(repo)
+		if (!tasks || tasks.length === 0) {
+			console.log(`\nSkipping ${repo}: no tasks found`)
+			continue
+		}
+
+		console.log(`\n${'='.repeat(60)}`)
+		console.log(`Repository: ${repo} (${tasks.length} tasks)`)
+		console.log('='.repeat(60))
+
+		// Evaluate with AST chunker
+		const astResult = await evaluateRepo(repo, tasks, 'ast')
+		allResults.push(astResult)
+
+		// Evaluate with fixed chunker
+		const fixedResult = await evaluateRepo(repo, tasks, 'fixed')
+		allResults.push(fixedResult)
+
+		// Print comparison
+		console.log(`\n  Results for ${repo}:`)
+		printMetricsTable(astResult.summary, fixedResult.summary, '  ')
+	}
+
+	// Step 4: Compute overall summary
+	console.log(`\n${'='.repeat(60)}`)
+	console.log('OVERALL SUMMARY')
+	console.log('='.repeat(60))
+
+	const astResults = allResults.filter((r) => r.chunker === 'ast')
+	const fixedResults = allResults.filter((r) => r.chunker === 'fixed')
+
+	// Aggregate metrics for each k value
+	const astOverall: Record<number, MetricsAtK> = {}
+	const fixedOverall: Record<number, MetricsAtK> = {}
+	for (const k of K_VALUES) {
+		astOverall[k] = aggregateMetrics(astResults.map((r) => r.summary[k]))
+		fixedOverall[k] = aggregateMetrics(fixedResults.map((r) => r.summary[k]))
+	}
+
+	console.log('')
+	printMetricsTable(astOverall, fixedOverall)
+
+	// Compute improvements for each k
+	console.log('Improvement (AST vs Fixed):')
+	for (const k of K_VALUES) {
+		const ndcgImprovement =
+			((astOverall[k].ndcg - fixedOverall[k].ndcg) / fixedOverall[k].ndcg) * 100
+		const precImprovement =
+			((astOverall[k].precision - fixedOverall[k].precision) /
+				fixedOverall[k].precision) *
+			100
+		const recallImprovement =
+			((astOverall[k].recall - fixedOverall[k].recall) /
+				fixedOverall[k].recall) *
+			100
+
+		console.log(`  k=${k}:`)
+		console.log(
+			`    nDCG:      ${ndcgImprovement >= 0 ? '+' : ''}${ndcgImprovement.toFixed(1)}%`,
+		)
+		console.log(
+			`    Precision: ${precImprovement >= 0 ? '+' : ''}${precImprovement.toFixed(1)}%`,
+		)
+		console.log(
+			`    Recall:    ${recallImprovement >= 0 ? '+' : ''}${recallImprovement.toFixed(1)}%`,
+		)
+	}
+
+	// Step 5: Save results
+	const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
+
+	// Save summary
+	const summaryPath = join(RESULTS_DIR, `summary_${timestamp}.json`)
+	await writeFile(
+		summaryPath,
+		JSON.stringify(
+			{
+				overall: {
+					ast: astOverall,
+					fixed: fixedOverall,
+				},
+				perRepo: Object.fromEntries(
+					repos.map((repo) => [
+						repo,
+						{
+							ast: astResults.find((r) => r.repo === repo)?.summary,
+							fixed: fixedResults.find((r) => r.repo === repo)?.summary,
+						},
+					]),
+				),
+				config: { kValues: K_VALUES, maxChunkSize: MAX_CHUNK_SIZE },
+				timestamp: new Date().toISOString(),
+			},
+			null,
+			2,
+		),
+	)
+	console.log(`\nSaved summary to: ${summaryPath}`)
+
+	// Save detailed results
+	const detailedPath = join(RESULTS_DIR, `detailed_${timestamp}.json`)
+	await writeFile(detailedPath, JSON.stringify(allResults, null, 2))
+	console.log(`Saved detailed results to: ${detailedPath}`)
+}
+
+// Run if executed directly
+if (import.meta.main) {
+	main().catch(console.error)
+}
diff --git a/package.json b/package.json
index 4aa3f6c..008a5c2 100644
--- a/package.json
+++ b/package.json
@@ -19,6 +19,7 @@
 		"dev": "bunup --watch",
 		"lint": "biome check .",
 		"lint:fix": "biome check --write .",
+		"format": "biome format --write .",
 		"release": "bumpp --commit --push --tag",
 		"test": "bun test",
 		"test:coverage": "bun test --coverage",
diff --git a/src/chunking/index.ts b/src/chunking/index.ts
index 9bfd95d..96f262f 100644
--- a/src/chunking/index.ts
+++ b/src/chunking/index.ts
@@ -4,6 +4,7 @@ import {
 	getRelevantImports,
 	getScopeForRange,
 } from '../context'
+import { formatChunkWithContext } from '../context/format'
 import { getSiblings } from '../context/siblings'
 import type {
 	ASTWindow,
@@ -43,6 +44,7 @@ export const DEFAULT_CHUNK_OPTIONS: Required<ChunkOptions> = {
 	siblingDetail: 'signatures',
 	filterImports: false,
 	language: 'typescript',
+	overlapLines: 10,
 }
 
 /**
@@ -291,18 +293,43 @@ export const chunk = (
 			const windowArray = Array.from(mergedWindows)
 			const totalChunks = windowArray.length
 
-			const chunks: Chunk[] = windowArray.map((window, index) => {
-				// Rebuild text from window
-				const text = rebuildText(window, code)
+			// First pass: rebuild text for all windows (needed for overlap)
+			const rebuiltTexts = windowArray.map((window) =>
+				rebuildText(window, code),
+			)
 
+			// Second pass: build chunks with overlap
+			const chunks: Chunk[] = rebuiltTexts.map((text, index) => {
 				// Build context
 				const context =
 					opts.contextMode === 'none'
 						? { scope: [], entities: [], siblings: [], imports: [] }
 						: buildContext(text, scopeTree, opts, filepath, language)
 
+				// Compute overlap text from previous chunk if applicable
+				let overlapText: string | undefined
+				if (opts.overlapLines > 0 && index > 0) {
+					const prevText = rebuiltTexts[index - 1]?.text
+					if (prevText) {
+						const prevLines = prevText.split('\n')
+						const overlapLineCount = Math.min(
+							opts.overlapLines,
+							prevLines.length,
+						)
+						overlapText = prevLines.slice(-overlapLineCount).join('\n')
+					}
+				}
+
+				// Build contextualized text for embeddings (includes overlap)
+				const contextualizedText = formatChunkWithContext(
+					text.text,
+					context,
+					overlapText,
+				)
+
 				return {
 					text: text.text,
+					contextualizedText,
 					byteRange: text.byteRange,
 					lineRange: text.lineRange,
 					context,
@@ -366,6 +393,7 @@ export async function* streamChunks(
 	// Stream chunks as they are generated
 	// totalChunks is -1 since we don't know the total count while streaming
 	let index = 0
+	let prevText: string | undefined
 	for (const window of mergedWindows) {
 		// Rebuild text from window
 		const text = rebuildText(window, code)
@@ -376,14 +404,32 @@ export async function* streamChunks(
 				? { scope: [], entities: [], siblings: [], imports: [] }
 				: buildContext(text, scopeTree, opts, filepath, language)
 
+		// Compute overlap text from previous chunk if applicable
+		let overlapText: string | undefined
+		if (opts.overlapLines > 0 && prevText) {
+			const prevLines = prevText.split('\n')
+			const overlapLineCount = Math.min(opts.overlapLines, prevLines.length)
+			overlapText = prevLines.slice(-overlapLineCount).join('\n')
+		}
+
+		// Build contextualized text for embeddings (includes overlap)
+		const contextualizedText = formatChunkWithContext(
+			text.text,
+			context,
+			overlapText,
+		)
+
 		yield {
 			text: text.text,
+			contextualizedText,
 			byteRange: text.byteRange,
 			lineRange: text.lineRange,
 			context,
 			index,
 			totalChunks: -1, // Unknown during streaming
 		}
+
+		prevText = text.text
 		index++
 	}
 }
diff --git a/src/context/format.ts b/src/context/format.ts
new file mode 100644
index 0000000..ea14ddf
--- /dev/null
+++ b/src/context/format.ts
@@ -0,0 +1,99 @@
+/**
+ * Format chunks with semantic context for embedding
+ *
+ * Prepends scope chain, entity signatures, and import context
+ * to improve embedding similarity for semantic search.
+ */
+
+import type { ChunkContext } from '../types'
+
+/**
+ * Format chunk text with semantic context prepended
+ *
+ * Creates a contextualized version of the chunk text that includes:
+ * - File path (last 3 segments)
+ * - Scope chain (e.g., "MyClass > process")
+ * - Entity signatures defined in this chunk
+ * - Import dependencies
+ * - Sibling context for continuity
+ * - Optional overlap from previous chunk
+ *
+ * This format is optimized for embedding models to capture
+ * semantic relationships between code chunks.
+ *
+ * @param text - The raw chunk text
+ * @param context - The chunk's semantic context
+ * @param overlapText - Optional text from previous chunk to include for continuity
+ * @returns Formatted text with context prepended
+ */
+export function formatChunkWithContext(
+	text: string,
+	context: ChunkContext,
+	overlapText?: string,
+): string {
+	const parts: string[] = []
+
+	// Add file path for context (last 3 segments)
+	if (context.filepath) {
+		const relPath = context.filepath.split('/').slice(-3).join('/')
+		parts.push(`# ${relPath}`)
+	}
+
+	// Add scope chain (e.g., "Scope: MyClass > process")
+	if (context.scope.length > 0) {
+		const scopePath = context.scope
+			.map((s) => s.name)
+			.reverse()
+			.join(' > ')
+		parts.push(`# Scope: ${scopePath}`)
+	}
+
+	// Add entity signatures in this chunk
+	const signatures = context.entities
+		.filter((e) => e.signature && e.type !== 'import')
+		.map((e) => e.signature)
+	if (signatures.length > 0) {
+		parts.push(`# Defines: ${signatures.join(', ')}`)
+	}
+
+	// Add imports context (what this code depends on)
+	if (context.imports.length > 0) {
+		const importNames = context.imports
+			.slice(0, 10) // Limit to avoid noise
+			.map((i) => i.name)
+			.join(', ')
+		parts.push(`# Uses: ${importNames}`)
+	}
+
+	// Add sibling context for continuity
+	const beforeSiblings = context.siblings
+		.filter((s) => s.position === 'before')
+		.map((s) => s.name)
+	const afterSiblings = context.siblings
+		.filter((s) => s.position === 'after')
+		.map((s) => s.name)
+
+	if (beforeSiblings.length > 0) {
+		parts.push(`# After: ${beforeSiblings.join(', ')}`)
+	}
+	if (afterSiblings.length > 0) {
+		parts.push(`# Before: ${afterSiblings.join(', ')}`)
+	}
+
+	// Add separator before code
+	if (parts.length > 0) {
+		parts.push('')
+	}
+
+	// Add overlap from previous chunk if provided
+	if (overlapText) {
+		parts.push('# ...')
+		parts.push(overlapText)
+		parts.push('# ---')
+	}
+
+	// Add actual chunk code
+	parts.push(text)
+
+	return parts.join('\n')
+}
diff --git a/src/index.ts b/src/index.ts
index 4eeca9f..0b50057 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -19,8 +19,8 @@ export {
 
 // Chunker factory
 export { createChunker } from './chunker'
-
-// Re-export language utilities for advanced usage
+// Context formatting utility for custom embedding text generation
+export { formatChunkWithContext } from './context/format'
 export { detectLanguage, LANGUAGE_EXTENSIONS } from './parser/languages'
 
 // All public types
diff --git a/src/types.ts b/src/types.ts
index 2bafcde..e32d144 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -225,6 +225,14 @@ export interface ChunkContext {
 export interface Chunk {
 	/** The actual text content */
 	text: string
+	/**
+	 * Text with semantic context prepended for embedding
+	 *
+	 * Includes file path, scope chain, entity signatures, imports,
+	 * and sibling context to improve embedding quality for semantic search.
+	 * Use this field when creating embeddings for RAG systems.
+	 */
+	contextualizedText: string
 	/** Byte range in original source */
 	byteRange: ByteRange
 	/** Line range in original source */
@@ -251,6 +259,14 @@ export interface ChunkOptions {
 	filterImports?: boolean
 	/** Override language detection */
 	language?: Language
+	/**
+	 * Number of lines to overlap from the previous chunk (default: 0)
+	 *
+	 * When set, each chunk's contextualizedText will include the last N lines
+	 * from the previous chunk, improving recall for queries that target
+	 * code at chunk boundaries. The raw `text` field is not affected.
+	 */
+	overlapLines?: number
 }
 
 /**