Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ yarn-error.log*
.turbo
todo.md
plan.md
eval/cache
eval/results
40 changes: 40 additions & 0 deletions eval/chunkers/ast.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* AST-aware chunker wrapper for evaluation
*
* Wraps the astchunk library for use in the evaluation harness.
* Uses the built-in contextualizedText for better embedding quality.
*/

import { chunk } from '../../src'

/**
* Chunk a file using AST-aware chunking and return results
* in a format compatible with the evaluation
*
* @param filepath - Path to the file
* @param code - Source code content
* @param maxNws - Maximum NWS characters per chunk (default: 1500)
*/
export async function chunkFile(
filepath: string,
code: string,
maxNws: number = 1500,
): Promise<
Array<{
id: string
text: string
startLine: number
endLine: number
}>
> {
const chunks = await chunk(filepath, code, {
maxChunkSize: maxNws,
})

return chunks.map((c) => ({
id: `${filepath}:${c.lineRange.start}-${c.lineRange.end}`,
text: c.contextualizedText,
startLine: c.lineRange.start,
endLine: c.lineRange.end,
}))
}
77 changes: 77 additions & 0 deletions eval/debug_chunks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { readFileSync } from 'node:fs'
import { join } from 'node:path'
import { chunk } from '../src'
import { chunkFixed } from './chunkers/fixed'

// Check deepmind_tracr/tracr/craft/transformers.py
// Assume we're looking for lines 100-150
const testFile = join(
import.meta.dir,
'data/repoeval/repositories/function_level/deepmind_tracr/tracr/craft/transformers.py',
)
const code = readFileSync(testFile, 'utf-8')
const targetStart = 100
const targetEnd = 150

console.log('File:', testFile)
console.log('Target lines:', targetStart, '-', targetEnd)
console.log('')

function countNws(text: string): number {
let count = 0
for (let i = 0; i < text.length; i++) {
if (text.charCodeAt(i) > 32) count++
}
return count
}

function overlaps(
chunkStart: number,
chunkEnd: number,
tStart: number,
tEnd: number,
): boolean {
return !(chunkEnd < tStart || chunkStart > tEnd)
}

for (const maxSize of [1500, 1800]) {
console.log(`\n=== Max chunk size: ${maxSize} ===`)

const astChunks = await chunk(testFile, code, { maxChunkSize: maxSize })
const fixedChunks = chunkFixed(code, maxSize)

console.log('\nAST chunks:')
for (const c of astChunks) {
const overlap = overlaps(
c.lineRange.start,
c.lineRange.end,
targetStart,
targetEnd,
)
console.log(
` Lines ${c.lineRange.start}-${c.lineRange.end} (${countNws(c.text)} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
)
}

console.log('\nFixed chunks:')
for (const c of fixedChunks) {
const overlap = overlaps(c.startLine, c.endLine, targetStart, targetEnd)
console.log(
` Lines ${c.startLine}-${c.endLine} (${c.nwsCount} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
)
}

const astRelevant = astChunks.filter((c) =>
overlaps(c.lineRange.start, c.lineRange.end, targetStart, targetEnd),
)
const fixedRelevant = fixedChunks.filter((c) =>
overlaps(c.startLine, c.endLine, targetStart, targetEnd),
)

console.log(
`\nRelevant chunks: AST=${astRelevant.length}, Fixed=${fixedRelevant.length}`,
)
console.log(
`Total chunks: AST=${astChunks.length}, Fixed=${fixedChunks.length}`,
)
}
149 changes: 149 additions & 0 deletions eval/download.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/**
* Download RepoEval benchmark data
*
* Downloads:
* 1. Task datasets (queries, ground truth) from Microsoft CodeT repo
* 2. Function-level Python repositories for chunking
*/

import { existsSync } from 'node:fs'
import { mkdir, writeFile } from 'node:fs/promises'
import { join } from 'node:path'

const DATA_DIR = join(import.meta.dir, 'data', 'repoeval')
const DATASETS_DIR = join(DATA_DIR, 'datasets')
const REPOS_DIR = join(DATA_DIR, 'repositories', 'function_level')

// Function-level repositories from RepoEval
const REPOS_FUNCTION = [
'amazon-science_patchcore-inspection',
'deepmind_tracr',
'facebookresearch_omnivore',
'google_lightweight_mmm',
'lucidrains_imagen-pytorch',
'maxhumber_redframes',
]

async function downloadAndExtractZip(
url: string,
destDir: string,
): Promise<void> {
console.log(`Downloading from ${url}...`)

const response = await fetch(url)
if (!response.ok) {
throw new Error(`Failed to download: ${response.statusText}`)
}

const arrayBuffer = await response.arrayBuffer()
const tempZipPath = join(destDir, '_temp.zip')

await mkdir(destDir, { recursive: true })
await writeFile(tempZipPath, new Uint8Array(arrayBuffer))

// Use unzip command
const proc = Bun.spawn(['unzip', '-o', '-q', tempZipPath, '-d', destDir], {
cwd: destDir,
})
await proc.exited

// Clean up temp file
await Bun.spawn(['rm', tempZipPath]).exited

console.log(`Extracted to ${destDir}`)
}

async function downloadDatasets(): Promise<void> {
if (existsSync(DATASETS_DIR)) {
console.log('Datasets already downloaded, skipping...')
return
}

const datasetsUrl =
'https://github.com/microsoft/CodeT/raw/main/RepoCoder/datasets/datasets.zip'
await downloadAndExtractZip(datasetsUrl, DATASETS_DIR)
}

async function downloadRepositories(): Promise<void> {
if (existsSync(REPOS_DIR)) {
console.log('Repositories already downloaded, skipping...')
return
}

// Using the cleaned version from Veronicium's fork
const reposUrl =
'https://github.com/Veronicium/repoeval_debug/raw/main/function_level.zip'
await downloadAndExtractZip(reposUrl, REPOS_DIR)
}

export interface RepoEvalTask {
prompt: string
metadata: {
task_id: string
ground_truth: string
fpath_tuple: string[]
line_no: number
lineno: number
context_start_lineno: number
}
}

export async function loadTasks(
contextLength: '1k' | '2k' | '4k' = '2k',
): Promise<RepoEvalTask[]> {
const fileName = `function_level_completion_${contextLength}_context_codex.test.jsonl`
const filePath = join(DATASETS_DIR, fileName)

const content = await Bun.file(filePath).text()
const lines = content.trim().split('\n')

const tasks: RepoEvalTask[] = []
const repo2idx: Record<string, number> = {}

for (const line of lines) {
const task = JSON.parse(line) as RepoEvalTask

// Clean up task_id format
const repo = task.metadata.task_id.replace('--', '_').split('/')[0]
if (!REPOS_FUNCTION.includes(repo)) continue

if (!(repo in repo2idx)) {
repo2idx[repo] = 0
}

task.metadata.task_id = task.metadata.task_id
.replace('--', '_')
.replace('idx', String(repo2idx[repo]))
task.metadata.line_no = task.metadata.lineno
repo2idx[repo]++

tasks.push(task)
}

return tasks
}

export function getReposDir(): string {
return REPOS_DIR
}

export function getRepos(): string[] {
return REPOS_FUNCTION
}

export async function download(): Promise<void> {
console.log('Downloading RepoEval benchmark data...\n')

await mkdir(DATA_DIR, { recursive: true })

await downloadDatasets()
await downloadRepositories()

console.log('\nDownload complete!')
console.log(`Data stored in: ${DATA_DIR}`)
}

// Run if executed directly
if (import.meta.main) {
await download()
}
Loading