supermemoryai · nexxeln · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025
diff --git a/.gitignore b/.gitignore
@@ -20,3 +20,5 @@ yarn-error.log*
 .turbo
 todo.md
 plan.md
+eval/cache
+eval/results
diff --git a/eval/chunkers/ast.ts b/eval/chunkers/ast.ts
@@ -0,0 +1,40 @@
+/**
+ * AST-aware chunker wrapper for evaluation
+ *
+ * Wraps the astchunk library for use in the evaluation harness.
+ * Uses the built-in contextualizedText for better embedding quality.
+ */
+
+import { chunk } from '../../src'
+
+/**
+ * Chunk a file using AST-aware chunking and return results
+ * in a format compatible with the evaluation
+ *
+ * @param filepath - Path to the file
+ * @param code - Source code content
+ * @param maxNws - Maximum NWS characters per chunk (default: 1500)
+ */
+export async function chunkFile(
+	filepath: string,
+	code: string,
+	maxNws: number = 1500,
+): Promise<
+	Array<{
+		id: string
+		text: string
+		startLine: number
+		endLine: number
+	}>
+> {
+	const chunks = await chunk(filepath, code, {
+		maxChunkSize: maxNws,
+	})
+
+	return chunks.map((c) => ({
+		id: `${filepath}:${c.lineRange.start}-${c.lineRange.end}`,
+		text: c.contextualizedText,
+		startLine: c.lineRange.start,
+		endLine: c.lineRange.end,
+	}))
+}
diff --git a/eval/debug_chunks.ts b/eval/debug_chunks.ts
@@ -0,0 +1,77 @@
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import { chunk } from '../src'
+import { chunkFixed } from './chunkers/fixed'
+
+// Check deepmind_tracr/tracr/craft/transformers.py
+// Assume we're looking for lines 100-150
+const testFile = join(
+	import.meta.dir,
+	'data/repoeval/repositories/function_level/deepmind_tracr/tracr/craft/transformers.py',
+)
+const code = readFileSync(testFile, 'utf-8')
+const targetStart = 100
+const targetEnd = 150
+
+console.log('File:', testFile)
+console.log('Target lines:', targetStart, '-', targetEnd)
+console.log('')
+
+function countNws(text: string): number {
+	let count = 0
+	for (let i = 0; i < text.length; i++) {
+		if (text.charCodeAt(i) > 32) count++
+	}
+	return count
+}
+
+function overlaps(
+	chunkStart: number,
+	chunkEnd: number,
+	tStart: number,
+	tEnd: number,
+): boolean {
+	return !(chunkEnd < tStart || chunkStart > tEnd)
+}
+
+for (const maxSize of [1500, 1800]) {
+	console.log(`\n=== Max chunk size: ${maxSize} ===`)
+
+	const astChunks = await chunk(testFile, code, { maxChunkSize: maxSize })
+	const fixedChunks = chunkFixed(code, maxSize)
+
+	console.log('\nAST chunks:')
+	for (const c of astChunks) {
+		const overlap = overlaps(
+			c.lineRange.start,
+			c.lineRange.end,
+			targetStart,
+			targetEnd,
+		)
+		console.log(
+			`  Lines ${c.lineRange.start}-${c.lineRange.end} (${countNws(c.text)} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
+		)
+	}
+
+	console.log('\nFixed chunks:')
+	for (const c of fixedChunks) {
+		const overlap = overlaps(c.startLine, c.endLine, targetStart, targetEnd)
+		console.log(
+			`  Lines ${c.startLine}-${c.endLine} (${c.nwsCount} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
+		)
+	}
+
+	const astRelevant = astChunks.filter((c) =>
+		overlaps(c.lineRange.start, c.lineRange.end, targetStart, targetEnd),
+	)
+	const fixedRelevant = fixedChunks.filter((c) =>
+		overlaps(c.startLine, c.endLine, targetStart, targetEnd),
+	)
+
+	console.log(
+		`\nRelevant chunks: AST=${astRelevant.length}, Fixed=${fixedRelevant.length}`,
+	)
+	console.log(
+		`Total chunks: AST=${astChunks.length}, Fixed=${fixedChunks.length}`,
+	)
+}
diff --git a/eval/download.ts b/eval/download.ts
@@ -0,0 +1,149 @@
+/**
+ * Download RepoEval benchmark data
+ *
+ * Downloads:
+ * 1. Task datasets (queries, ground truth) from Microsoft CodeT repo
+ * 2. Function-level Python repositories for chunking
+ */
+
+import { existsSync } from 'node:fs'
+import { mkdir, writeFile } from 'node:fs/promises'
+import { join } from 'node:path'
+
+const DATA_DIR = join(import.meta.dir, 'data', 'repoeval')
+const DATASETS_DIR = join(DATA_DIR, 'datasets')
+const REPOS_DIR = join(DATA_DIR, 'repositories', 'function_level')
+
+// Function-level repositories from RepoEval
+const REPOS_FUNCTION = [
+	'amazon-science_patchcore-inspection',
+	'deepmind_tracr',
+	'facebookresearch_omnivore',
+	'google_lightweight_mmm',
+	'lucidrains_imagen-pytorch',
+	'maxhumber_redframes',
+]
+
+async function downloadAndExtractZip(
+	url: string,
+	destDir: string,
+): Promise<void> {
+	console.log(`Downloading from ${url}...`)
+
+	const response = await fetch(url)
+	if (!response.ok) {
+		throw new Error(`Failed to download: ${response.statusText}`)
+	}
+
+	const arrayBuffer = await response.arrayBuffer()
+	const tempZipPath = join(destDir, '_temp.zip')
+
+	await mkdir(destDir, { recursive: true })
+	await writeFile(tempZipPath, new Uint8Array(arrayBuffer))
+
+	// Use unzip command
+	const proc = Bun.spawn(['unzip', '-o', '-q', tempZipPath, '-d', destDir], {
+		cwd: destDir,
+	})
+	await proc.exited
+
+	// Clean up temp file
+	await Bun.spawn(['rm', tempZipPath]).exited
+
+	console.log(`Extracted to ${destDir}`)
+}
+
+async function downloadDatasets(): Promise<void> {
+	if (existsSync(DATASETS_DIR)) {
+		console.log('Datasets already downloaded, skipping...')
+		return
+	}
+
+	const datasetsUrl =
+		'https://github.com/microsoft/CodeT/raw/main/RepoCoder/datasets/datasets.zip'
+	await downloadAndExtractZip(datasetsUrl, DATASETS_DIR)
+}
+
+async function downloadRepositories(): Promise<void> {
+	if (existsSync(REPOS_DIR)) {
+		console.log('Repositories already downloaded, skipping...')
+		return
+	}
+
+	// Using the cleaned version from Veronicium's fork
+	const reposUrl =
+		'https://github.com/Veronicium/repoeval_debug/raw/main/function_level.zip'
+	await downloadAndExtractZip(reposUrl, REPOS_DIR)
+}
+
+export interface RepoEvalTask {
+	prompt: string
+	metadata: {
+		task_id: string
+		ground_truth: string
+		fpath_tuple: string[]
+		line_no: number
+		lineno: number
+		context_start_lineno: number
+	}
+}
+
+export async function loadTasks(
+	contextLength: '1k' | '2k' | '4k' = '2k',
+): Promise<RepoEvalTask[]> {
+	const fileName = `function_level_completion_${contextLength}_context_codex.test.jsonl`
+	const filePath = join(DATASETS_DIR, fileName)
+
+	const content = await Bun.file(filePath).text()
+	const lines = content.trim().split('\n')
+
+	const tasks: RepoEvalTask[] = []
+	const repo2idx: Record<string, number> = {}
+
+	for (const line of lines) {
+		const task = JSON.parse(line) as RepoEvalTask
+
+		// Clean up task_id format
+		const repo = task.metadata.task_id.replace('--', '_').split('/')[0]
+		if (!REPOS_FUNCTION.includes(repo)) continue
+
+		if (!(repo in repo2idx)) {
+			repo2idx[repo] = 0
+		}
+
+		task.metadata.task_id = task.metadata.task_id
+			.replace('--', '_')
+			.replace('idx', String(repo2idx[repo]))
+		task.metadata.line_no = task.metadata.lineno
+		repo2idx[repo]++
+
+		tasks.push(task)
+	}
+
+	return tasks
+}
+
+export function getReposDir(): string {
+	return REPOS_DIR
+}
+
+export function getRepos(): string[] {
+	return REPOS_FUNCTION
+}
+
+export async function download(): Promise<void> {
+	console.log('Downloading RepoEval benchmark data...\n')
+
+	await mkdir(DATA_DIR, { recursive: true })
+
+	await downloadDatasets()
+	await downloadRepositories()
+
+	console.log('\nDownload complete!')
+	console.log(`Data stored in: ${DATA_DIR}`)
+}
+
+// Run if executed directly
+if (import.meta.main) {
+	await download()
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -20,3 +20,5 @@ yarn-error.log* @@
     .turbo
     todo.md
     plan.md
+    eval/cache
+    eval/results