diff --git a/biome.json b/biome.json index 3016cbe..8e30a60 100644 --- a/biome.json +++ b/biome.json @@ -28,5 +28,23 @@ "enabled": true, "useIgnoreFile": true, "clientKind": "git" - } + }, + "overrides": [ + { + "includes": ["test/**/*.ts"], + "linter": { + "rules": { + "style": { + "noNonNullAssertion": "off" + }, + "suspicious": { + "noNonNullAssertedOptionalChain": "off" + }, + "correctness": { + "noUnsafeOptionalChaining": "off" + } + } + } + } + ] } diff --git a/src/chunk.ts b/src/chunk.ts index 381f832..061c1a8 100644 --- a/src/chunk.ts +++ b/src/chunk.ts @@ -1,10 +1,19 @@ -import { Effect } from 'effect' -import { chunk as chunkInternal } from './chunking' +import { Effect, Stream } from 'effect' +import { + chunk as chunkInternal, + streamChunks as streamChunksInternal, +} from './chunking' import { extractEntities } from './extract' import { parseCode } from './parser' import { detectLanguage } from './parser/languages' import { buildScopeTree } from './scope' -import type { Chunk, ChunkOptions, Language } from './types' +import type { + Chunk, + ChunkOptions, + Language, + ParseResult, + ScopeTree, +} from './types' /** * Error thrown when chunking fails @@ -74,7 +83,7 @@ const chunkEffect = ( new ChunkingError('Failed to build scope tree', error), ) - // Step 5: Chunk the code + // Step 5: Chunk the code (passing filepath for context) const chunks = yield* Effect.mapError( chunkInternal( parseResult.tree.rootNode, @@ -82,6 +91,7 @@ const chunkEffect = ( scopeTree, language, options, + filepath, ), (error: unknown) => new ChunkingError('Failed to chunk code', error), ) @@ -133,3 +143,172 @@ export async function chunk( ): Promise { return Effect.runPromise(chunkEffect(filepath, code, options)) } + +/** + * Prepare the chunking pipeline (parse, extract, build scope tree) + * Returns the parsed result and scope tree needed for chunking + */ +const prepareChunking = ( + filepath: string, + code: string, + options?: ChunkOptions, +): Effect.Effect< + { parseResult: ParseResult; scopeTree: ScopeTree; language: Language }, + ChunkingError | UnsupportedLanguageError +> => { + return Effect.gen(function* () { + // Step 1: Detect language (or use override) + const language: Language | null = + options?.language ?? detectLanguage(filepath) + + if (!language) { + return yield* Effect.fail(new UnsupportedLanguageError(filepath)) + } + + // Step 2: Parse the code + const parseResult = yield* Effect.tryPromise({ + try: () => parseCode(code, language), + catch: (error: unknown) => + new ChunkingError('Failed to parse code', error), + }) + + // Step 3: Extract entities from AST + const entities = yield* Effect.mapError( + extractEntities(parseResult.tree.rootNode, language, code), + (error: unknown) => + new ChunkingError('Failed to extract entities', error), + ) + + // Step 4: Build scope tree + const scopeTree = yield* Effect.mapError( + buildScopeTree(entities), + (error: unknown) => + new ChunkingError('Failed to build scope tree', error), + ) + + return { parseResult, scopeTree, language } + }) +} + +/** + * Create an Effect Stream that yields chunks + * + * This is the Effect-native streaming API. Use this if you're working + * within the Effect ecosystem and want full composability. + * + * @param filepath - The file path (used for language detection) + * @param code - The source code to chunk + * @param options - Optional chunking configuration + * @returns Effect Stream of chunks with context + * + * @example + * ```ts + * import { chunkStreamEffect } from 'astchunk' + * import { Effect, Stream } from 'effect' + * + * const program = Stream.runForEach( + * chunkStreamEffect('src/utils.ts', sourceCode), + * (chunk) => Effect.log(chunk.text) + * ) + * + * Effect.runPromise(program) + * ``` + */ +export const chunkStreamEffect = ( + filepath: string, + code: string, + options?: ChunkOptions, +): Stream.Stream => { + return Stream.unwrap( + Effect.map(prepareChunking(filepath, code, options), (prepared) => { + const { parseResult, scopeTree, language } = prepared + + // Create stream from the internal generator + return Stream.fromAsyncIterable( + streamChunksInternal( + parseResult.tree.rootNode, + code, + scopeTree, + language, + options, + filepath, + ), + (error) => new ChunkingError('Stream iteration failed', error), + ).pipe( + // Attach parse error to chunks if present + Stream.map((chunk) => + parseResult.error + ? { + ...chunk, + context: { + ...chunk.context, + parseError: parseResult.error, + }, + } + : chunk, + ), + ) + }), + ) +} + +/** + * Stream source code chunks as they are generated + * + * This function returns an async generator that yields chunks one at a time, + * which is useful for processing large files without waiting for all chunks + * to be generated. + * + * @param filepath - The file path (used for language detection) + * @param code - The source code to chunk + * @param options - Optional chunking configuration + * @returns Async generator of chunks with context + * @throws ChunkingError if chunking fails + * @throws UnsupportedLanguageError if the file type is not supported + * + * @example + * ```ts + * import { chunkStream } from 'astchunk' + * + * for await (const chunk of chunkStream('src/utils.ts', sourceCode)) { + * console.log(chunk.text, chunk.context) + * } + * ``` + */ +export async function* chunkStream( + filepath: string, + code: string, + options?: ChunkOptions, +): AsyncGenerator { + // Prepare the chunking pipeline + const prepared = await Effect.runPromise( + prepareChunking(filepath, code, options), + ) + + const { parseResult, scopeTree, language } = prepared + + // Stream chunks from the internal generator + const chunkGenerator = streamChunksInternal( + parseResult.tree.rootNode, + code, + scopeTree, + language, + options, + filepath, + ) + + // Yield chunks, optionally attaching parse error if present + for await (const chunk of chunkGenerator) { + if (parseResult.error) { + yield { + ...chunk, + context: { + ...chunk.context, + parseError: parseResult.error, + }, + } + } else { + yield chunk + } + } +} diff --git a/src/chunker.ts b/src/chunker.ts index fd7a54c..28c8a4a 100644 --- a/src/chunker.ts +++ b/src/chunker.ts @@ -1,71 +1,62 @@ -import { chunk } from './chunk' +import { chunk as chunkFn, chunkStream as streamFn } from './chunk' +import { DEFAULT_CHUNK_OPTIONS } from './chunking' import type { Chunk, Chunker, ChunkOptions } from './types' -/** - * Default options for the chunker - */ -const DEFAULT_OPTIONS: ChunkOptions = { - maxChunkSize: 4096, - contextMode: 'full', - siblingDetail: 'signatures', - filterImports: false, -} - /** * Implementation of the Chunker interface * - * Provides a stateful wrapper around the chunk function that: - * - Stores default options - * - Tracks the filepath for language detection + * Provides a stateful wrapper around the chunk and stream functions that + * stores default options and allows per-call overrides. */ class ChunkerImpl implements Chunker { - private readonly filepath: string private readonly defaultOptions: ChunkOptions - constructor(filepath: string, options: ChunkOptions = {}) { - this.filepath = filepath - this.defaultOptions = { ...DEFAULT_OPTIONS, ...options } + constructor(options: ChunkOptions = {}) { + this.defaultOptions = { ...DEFAULT_CHUNK_OPTIONS, ...options } } /** * Chunk source code into pieces with context * - * @param source - The source code to chunk + * @param filepath - The file path (used for language detection) + * @param code - The source code to chunk * @param options - Optional overrides for chunking options * @returns Promise resolving to array of chunks */ - async chunk(source: string, options?: ChunkOptions): Promise { + async chunk( + filepath: string, + code: string, + options?: ChunkOptions, + ): Promise { const mergedOptions = { ...this.defaultOptions, ...options } - return chunk(this.filepath, source, mergedOptions) + return chunkFn(filepath, code, mergedOptions) } /** * Stream chunks as they are generated * - * @param source - The source code to chunk + * @param filepath - The file path (used for language detection) + * @param code - The source code to chunk * @param options - Optional overrides for chunking options * @returns Async iterable of chunks - * - * TODO: Implement true streaming - for now, this just iterates the array */ - async *stream(source: string, options?: ChunkOptions): AsyncIterable { + async *stream( + filepath: string, + code: string, + options?: ChunkOptions, + ): AsyncIterable { const mergedOptions = { ...this.defaultOptions, ...options } - const chunks = await chunk(this.filepath, source, mergedOptions) - - for (const c of chunks) { - yield c - } + yield* streamFn(filepath, code, mergedOptions) } } /** - * Create a new Chunker instance for a specific file + * Create a new Chunker instance with default options * * The Chunker provides a convenient interface for chunking source code * with pre-configured options. It's particularly useful when you need to - * chunk multiple versions of the same file or want to stream chunks. + * chunk multiple files with the same configuration. * - * @param filepath - The file path (used for language detection) * @param options - Default options for all chunking operations * @returns A Chunker instance * @@ -73,25 +64,17 @@ class ChunkerImpl implements Chunker { * ```ts * import { createChunker } from 'astchunk' * - * const chunker = createChunker('src/utils.ts', { maxChunkSize: 2048 }) + * const chunker = createChunker({ maxChunkSize: 2048 }) * * // Chunk synchronously - * const chunks = await chunker.chunk(sourceCode) + * const chunks = await chunker.chunk('src/utils.ts', sourceCode) * * // Or stream chunks - * for await (const chunk of chunker.stream(sourceCode)) { + * for await (const chunk of chunker.stream('src/utils.ts', sourceCode)) { * process.stdout.write(chunk.text) * } * ``` */ -export function createChunker( - filepath: string, - options?: ChunkOptions, -): Chunker { - return new ChunkerImpl(filepath, options) +export function createChunker(options?: ChunkOptions): Chunker { + return new ChunkerImpl(options) } - -/** - * Re-export the Chunker type for convenience - */ -export type { Chunker } from './types' diff --git a/src/chunking/index.ts b/src/chunking/index.ts index 66aed1f..9bfd95d 100644 --- a/src/chunking/index.ts +++ b/src/chunking/index.ts @@ -1,19 +1,21 @@ import { Effect } from 'effect' +import { + getEntitiesInRange, + getRelevantImports, + getScopeForRange, +} from '../context' import { getSiblings } from '../context/siblings' -import { findScopeAtOffset, getAncestorChain } from '../scope/tree' import type { ASTWindow, Chunk, ChunkContext, ChunkOptions, - EntityInfo, - ImportInfo, Language, ScopeTree, SyntaxNode, } from '../types' import { mergeAdjacentWindows } from './merge' -import { getNwsCount, type NwsCountMap, preprocessNwsCount } from './nws' +import { getNwsCountForNode, type NwsCumsum, preprocessNwsCumsum } from './nws' import { isLeafNode } from './oversized' import { type RebuiltText, rebuildText } from './rebuild' import { getAncestors } from './windows' @@ -21,99 +23,28 @@ import { getAncestors } from './windows' /** * Error when chunking fails */ -export class ChunkError { +export class ChunkError extends Error { readonly _tag = 'ChunkError' - constructor( - readonly message: string, - readonly cause?: unknown, - ) {} + override readonly cause?: unknown + + constructor(message: string, cause?: unknown) { + super(message) + this.name = 'ChunkError' + this.cause = cause + } } /** * Default chunk options */ export const DEFAULT_CHUNK_OPTIONS: Required = { - maxChunkSize: 4096, + maxChunkSize: 1500, contextMode: 'full', siblingDetail: 'signatures', filterImports: false, language: 'typescript', } -/** - * Get entities within a byte range - */ -const getEntitiesInRange = ( - byteRange: { start: number; end: number }, - scopeTree: ScopeTree, -): EntityInfo[] => { - return scopeTree.allEntities - .filter( - (entity) => - entity.byteRange.start >= byteRange.start && - entity.byteRange.end <= byteRange.end, - ) - .map((entity) => ({ - name: entity.name, - type: entity.type, - signature: entity.signature, - })) -} - -/** - * Get scope information for a byte range - */ -const getScopeForRange = ( - byteRange: { start: number; end: number }, - scopeTree: ScopeTree, -): EntityInfo[] => { - const scopeNode = findScopeAtOffset(scopeTree, byteRange.start) - if (!scopeNode) { - return [] - } - - const ancestors = getAncestorChain(scopeNode) - return [scopeNode, ...ancestors].map((node) => ({ - name: node.entity.name, - type: node.entity.type, - signature: node.entity.signature, - })) -} - -/** - * Get relevant imports for a chunk - */ -const getRelevantImports = ( - _byteRange: { start: number; end: number }, - scopeTree: ScopeTree, - chunkText: string, - filterImports: boolean, -): ImportInfo[] => { - if (!filterImports) { - // Return all imports - return scopeTree.imports.map((imp) => ({ - name: imp.name, - source: imp.signature.match(/from ['"]([^'"]+)['"]/)?.[1] ?? '', - isDefault: imp.signature.includes('default'), - isNamespace: imp.signature.includes('* as'), - })) - } - - // Filter imports that are used in this chunk - return scopeTree.imports - .filter((imp) => { - // Check if import name is used in chunk text - const nameRegex = new RegExp(`\\b${imp.name}\\b`) - return nameRegex.test(chunkText) - }) - .map((imp) => ({ - name: imp.name, - source: imp.signature.match(/from ['"]([^'"]+)['"]/)?.[1] ?? '', - isDefault: imp.signature.includes('default'), - isNamespace: imp.signature.includes('* as'), - })) -} - /** * Greedy window assignment algorithm * Accumulates nodes until maxSize is reached, recursing into oversized nodes @@ -121,7 +52,7 @@ const getRelevantImports = ( function* greedyAssignWindows( nodes: SyntaxNode[], code: string, - nwsMap: NwsCountMap, + cumsum: NwsCumsum, maxSize: number, ): Generator { let currentWindow: ASTWindow = { @@ -132,7 +63,7 @@ function* greedyAssignWindows( } for (const node of nodes) { - const nodeSize = getNwsCount(node, nwsMap, code) + const nodeSize = getNwsCountForNode(node, cumsum) // Check if node fits in current window if (currentWindow.size + nodeSize <= maxSize) { @@ -162,7 +93,7 @@ function* greedyAssignWindows( children.push(child) } } - yield* greedyAssignWindows(children, code, nwsMap, maxSize) + yield* greedyAssignWindows(children, code, cumsum, maxSize) } else { // Leaf node that's oversized - split at line boundaries const windows = splitOversizedLeafByLines(node, code, maxSize) @@ -266,11 +197,20 @@ function* splitOversizedLeafByLines( /** * Build chunk context from scope tree + * + * @param text - The rebuilt text for the chunk + * @param scopeTree - The scope tree + * @param options - Chunking options + * @param filepath - Optional file path of the source file + * @param language - Optional programming language + * @returns The chunk context including filepath and language if provided */ const buildContext = ( text: RebuiltText, scopeTree: ScopeTree, options: Required, + filepath?: string, + language?: Language, ): ChunkContext => { const byteRange = text.byteRange @@ -287,14 +227,11 @@ const buildContext = ( }) // Get relevant imports - const imports = getRelevantImports( - byteRange, - scopeTree, - text.text, - options.filterImports, - ) + const imports = getRelevantImports(entities, scopeTree, options.filterImports) return { + filepath, + language, scope, entities, siblings, @@ -310,6 +247,7 @@ const buildContext = ( * @param scopeTree - The scope tree * @param language - The programming language * @param options - Chunking options + * @param filepath - Optional file path of the source file * @returns Effect yielding chunks */ export const chunk = ( @@ -318,6 +256,7 @@ export const chunk = ( scopeTree: ScopeTree, language: Language, options: ChunkOptions = {}, + filepath?: string, ): Effect.Effect => { return Effect.try({ try: () => { @@ -330,8 +269,8 @@ export const chunk = ( const maxSize = opts.maxChunkSize - // Step 1: Preprocess NWS counts - const nwsMap = preprocessNwsCount(rootNode, code) + // Step 1: Preprocess NWS cumulative sum for O(1) range queries + const cumsum = preprocessNwsCumsum(code) // Step 2: Get root's children for processing const children: SyntaxNode[] = [] @@ -343,7 +282,7 @@ export const chunk = ( } // Step 3: Assign nodes to windows using greedy algorithm - const rawWindows = greedyAssignWindows(children, code, nwsMap, maxSize) + const rawWindows = greedyAssignWindows(children, code, cumsum, maxSize) // Step 4: Merge adjacent windows const mergedWindows = mergeAdjacentWindows(rawWindows, { maxSize }) @@ -360,7 +299,7 @@ export const chunk = ( const context = opts.contextMode === 'none' ? { scope: [], entities: [], siblings: [], imports: [] } - : buildContext(text, scopeTree, opts) + : buildContext(text, scopeTree, opts, filepath, language) return { text: text.text, @@ -386,6 +325,7 @@ export const chunk = ( * @param scopeTree - The scope tree * @param language - The programming language * @param options - Chunking options + * @param filepath - Optional file path of the source file * @returns Async generator of chunks */ export async function* streamChunks( @@ -394,6 +334,7 @@ export async function* streamChunks( scopeTree: ScopeTree, language: Language, options: ChunkOptions = {}, + filepath?: string, ): AsyncGenerator { // Merge options with defaults const opts: Required = { @@ -404,8 +345,8 @@ export async function* streamChunks( const maxSize = opts.maxChunkSize - // Preprocess NWS counts - const nwsMap = preprocessNwsCount(rootNode, code) + // Preprocess NWS cumulative sum for O(1) range queries + const cumsum = preprocessNwsCumsum(code) // Get root's children const children: SyntaxNode[] = [] @@ -417,7 +358,7 @@ export async function* streamChunks( } // Assign nodes to windows - const rawWindows = greedyAssignWindows(children, code, nwsMap, maxSize) + const rawWindows = greedyAssignWindows(children, code, cumsum, maxSize) // Merge adjacent windows const mergedWindows = mergeAdjacentWindows(rawWindows, { maxSize }) @@ -433,7 +374,7 @@ export async function* streamChunks( const context = opts.contextMode === 'none' ? { scope: [], entities: [], siblings: [], imports: [] } - : buildContext(text, scopeTree, opts) + : buildContext(text, scopeTree, opts, filepath, language) yield { text: text.text, diff --git a/src/chunking/nws.ts b/src/chunking/nws.ts index f710416..c3029ec 100644 --- a/src/chunking/nws.ts +++ b/src/chunking/nws.ts @@ -1,10 +1,5 @@ import type { SyntaxNode } from '../types' -/** - * Map from node ID to non-whitespace character count - */ -export type NwsCountMap = Map - /** * Cumulative sum array for O(1) NWS range queries * cumsum[i] = count of non-whitespace chars in code[0..i-1] @@ -66,47 +61,16 @@ export const getNwsCountFromCumsum = ( } /** - * Preprocess the AST to compute NWS counts for all nodes - * - * @param rootNode - The root AST node - * @param code - The source code - * @returns Map from node ID to NWS count - * - * TODO: Implement NWS preprocessing with memoization - */ -export const preprocessNwsCount = ( - rootNode: SyntaxNode, - code: string, -): NwsCountMap => { - // TODO: Implement NWS count preprocessing - // 1. Walk the tree - // 2. For each node, compute NWS count of its text - // 3. Store in map keyed by node ID - const map: NwsCountMap = new Map() - void rootNode - void code - return map -} - -/** - * Get the NWS count for a node from the precomputed map + * Get the NWS count for a node using the precomputed cumulative sum array. + * This is an O(1) operation. * * @param node - The AST node - * @param nwsMap - The precomputed NWS count map - * @param code - The source code (fallback if not in map) + * @param cumsum - The precomputed cumulative sum array * @returns The NWS count for the node */ -export const getNwsCount = ( +export const getNwsCountForNode = ( node: SyntaxNode, - nwsMap: NwsCountMap, - code: string, + cumsum: NwsCumsum, ): number => { - // Try to get from map first - const cached = nwsMap.get(node.id) - if (cached !== undefined) { - return cached - } - // Fallback: compute directly - const text = code.slice(node.startIndex, node.endIndex) - return countNws(text) + return getNwsCountFromCumsum(cumsum, node.startIndex, node.endIndex) } diff --git a/src/chunking/oversized.ts b/src/chunking/oversized.ts index 4f0a959..0b4b7f6 100644 --- a/src/chunking/oversized.ts +++ b/src/chunking/oversized.ts @@ -1,233 +1,4 @@ -import type { ASTWindow, SyntaxNode } from '../types' -import { getNwsCountFromCumsum, type NwsCumsum } from './nws' - -/** - * Options for splitting oversized nodes - */ -export interface SplitOptions { - /** Maximum size of a chunk in NWS characters */ - maxSize: number - /** The precomputed NWS cumulative sum array */ - cumsum: NwsCumsum - /** The source code */ - code: string - /** Ancestor nodes for context */ - ancestors?: SyntaxNode[] -} - -/** - * Line boundary information within a node - */ -interface LineBoundary { - /** Start byte offset of the line */ - start: number - /** End byte offset of the line (exclusive) */ - end: number - /** 0-indexed line number */ - lineNumber: number -} - -/** - * Get line boundaries within a node's byte range - * - * @param node - The AST node - * @param code - The source code - * @returns Array of line boundaries within the node - */ -export const getLineRangesInNode = ( - node: SyntaxNode, - code: string, -): LineBoundary[] => { - const lines: LineBoundary[] = [] - const nodeStart = node.startIndex - const nodeEnd = node.endIndex - const startLine = node.startPosition.row - - let lineStart = nodeStart - let currentLine = startLine - - for (let i = nodeStart; i < nodeEnd; i++) { - if (code[i] === '\n') { - // End of current line (exclusive of newline) - lines.push({ - start: lineStart, - end: i + 1, // Include the newline character - lineNumber: currentLine, - }) - lineStart = i + 1 - currentLine++ - } - } - - // Handle the last line (may not end with newline) - if (lineStart < nodeEnd) { - lines.push({ - start: lineStart, - end: nodeEnd, - lineNumber: currentLine, - }) - } - - return lines -} - -/** - * Split an oversized leaf node into multiple windows at line boundaries - * - * Used when a node is too large to fit in a single window and cannot - * be subdivided further (e.g., a very long string literal or comment). - * - * @param node - The oversized node - * @param code - The source code - * @param cumsum - The precomputed NWS cumulative sum array - * @param maxSize - Maximum size in NWS characters - * @param ancestors - Ancestor nodes for context - * @returns Array of ASTWindow objects representing the split - */ -export const splitOversizedLeaf = ( - node: SyntaxNode, - code: string, - cumsum: NwsCumsum, - maxSize: number, - ancestors: SyntaxNode[] = [], -): ASTWindow[] => { - const windows: ASTWindow[] = [] - const lineBoundaries = getLineRangesInNode(node, code) - - // If no lines or single line that still exceeds, return as single partial window - if (lineBoundaries.length === 0) { - return [ - { - nodes: [node], - ancestors, - size: getNwsCountFromCumsum(cumsum, node.startIndex, node.endIndex), - isPartialNode: true, - lineRanges: [ - { - start: node.startPosition.row, - end: node.endPosition.row, - }, - ], - }, - ] - } - - let currentChunkStart = 0 // Index into lineBoundaries - let currentSize = 0 - - for (let i = 0; i < lineBoundaries.length; i++) { - const line = lineBoundaries[i] - if (!line) continue - - const lineNws = getNwsCountFromCumsum(cumsum, line.start, line.end) - - // If single line exceeds maxSize, it becomes its own chunk - if (lineNws > maxSize) { - // Flush current accumulated lines first - if (i > currentChunkStart) { - const startLine = lineBoundaries[currentChunkStart] - const endLine = lineBoundaries[i - 1] - if (startLine && endLine) { - windows.push({ - nodes: [node], - ancestors, - size: currentSize, - isPartialNode: true, - lineRanges: [ - { - start: startLine.lineNumber, - end: endLine.lineNumber, - }, - ], - }) - } - } - - // Add the oversized line as its own chunk - windows.push({ - nodes: [node], - ancestors, - size: lineNws, - isPartialNode: true, - lineRanges: [ - { - start: line.lineNumber, - end: line.lineNumber, - }, - ], - }) - - currentChunkStart = i + 1 - currentSize = 0 - continue - } - - // Check if adding this line would exceed maxSize - if (currentSize + lineNws > maxSize && i > currentChunkStart) { - // Flush current chunk - const startLine = lineBoundaries[currentChunkStart] - const endLine = lineBoundaries[i - 1] - if (startLine && endLine) { - windows.push({ - nodes: [node], - ancestors, - size: currentSize, - isPartialNode: true, - lineRanges: [ - { - start: startLine.lineNumber, - end: endLine.lineNumber, - }, - ], - }) - } - - currentChunkStart = i - currentSize = lineNws - } else { - currentSize += lineNws - } - } - - // Flush remaining lines - if (currentChunkStart < lineBoundaries.length) { - const startLine = lineBoundaries[currentChunkStart] - const endLine = lineBoundaries[lineBoundaries.length - 1] - if (startLine && endLine) { - windows.push({ - nodes: [node], - ancestors, - size: currentSize, - isPartialNode: true, - lineRanges: [ - { - start: startLine.lineNumber, - end: endLine.lineNumber, - }, - ], - }) - } - } - - return windows -} - -/** - * Check if a node is oversized using cumulative sum - * - * @param node - The node to check - * @param cumsum - Precomputed NWS cumulative sum array - * @param maxSize - Maximum allowed size - * @returns Whether the node exceeds maxSize - */ -export const isOversizedWithCumsum = ( - node: SyntaxNode, - cumsum: NwsCumsum, - maxSize: number, -): boolean => { - const nodeSize = getNwsCountFromCumsum(cumsum, node.startIndex, node.endIndex) - return nodeSize > maxSize -} +import type { SyntaxNode } from '../types' /** * Check if a node is a leaf (has no children) @@ -238,56 +9,3 @@ export const isOversizedWithCumsum = ( export const isLeafNode = (node: SyntaxNode): boolean => { return node.childCount === 0 } - -/** - * Recursively subdivide a node into windows that fit within maxSize - * - * Strategy: - * - If node fits in maxSize, yield single window - * - If node has children, recursively process children - * - If leaf node is oversized, use splitOversizedLeaf - * - * @param node - The node to subdivide - * @param code - The source code - * @param cumsum - Precomputed NWS cumulative sum array - * @param maxSize - Maximum size in NWS characters - * @param ancestors - Ancestor nodes for context - * @yields ASTWindow objects - */ -export function* subdivideNode( - node: SyntaxNode, - code: string, - cumsum: NwsCumsum, - maxSize: number, - ancestors: SyntaxNode[] = [], -): Generator { - const nodeSize = getNwsCountFromCumsum(cumsum, node.startIndex, node.endIndex) - - // If node fits within maxSize, yield single window - if (nodeSize <= maxSize) { - yield { - nodes: [node], - ancestors, - size: nodeSize, - isPartialNode: false, - } - return - } - - // If node has children, recursively process them - if (node.childCount > 0) { - const newAncestors = [...ancestors, node] - - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i) - if (child) { - yield* subdivideNode(child, code, cumsum, maxSize, newAncestors) - } - } - return - } - - // Leaf node is oversized - split at line boundaries - const windows = splitOversizedLeaf(node, code, cumsum, maxSize, ancestors) - yield* windows -} diff --git a/src/chunking/rebuild.ts b/src/chunking/rebuild.ts index 660b1d3..39b1b76 100644 --- a/src/chunking/rebuild.ts +++ b/src/chunking/rebuild.ts @@ -29,31 +29,6 @@ const buildLineStartsTable = (code: string): number[] => { return lineStarts } -/** - * Find line number for a byte offset using binary search - * - * @param byteOffset - The byte offset - * @param lineStarts - Array of line start offsets - * @returns The 0-indexed line number - */ -const findLineNumber = (byteOffset: number, lineStarts: number[]): number => { - // Binary search for the largest line start <= byteOffset - let lo = 0 - let hi = lineStarts.length - 1 - - while (lo < hi) { - const mid = Math.floor((lo + hi + 1) / 2) - const midStart = lineStarts[mid] - if (midStart !== undefined && midStart <= byteOffset) { - lo = mid - } else { - hi = mid - 1 - } - } - - return lo -} - /** * Rebuild source text from an AST window * @@ -160,52 +135,3 @@ const rebuildFromLineRanges = ( lineRange: { start: startLine, end: endLine }, } } - -/** - * Rebuild text for a partial node (oversized that was split) - * - * @param window - The AST window with partial node - * @param code - The original source code - * @param startOffset - The start offset within the node - * @param endOffset - The end offset within the node - * @returns The rebuilt text with range information - */ -export const rebuildPartialText = ( - _window: ASTWindow, - code: string, - startOffset: number, - endOffset: number, -): RebuiltText => { - const text = code.slice(startOffset, endOffset) - - // Use a simple line counting approach for the slice - // This is more efficient than building full lookup table for small slices - const lineStarts = buildLineStartsTable(code) - const startLine = findLineNumber(startOffset, lineStarts) - const endLine = findLineNumber(endOffset > 0 ? endOffset - 1 : 0, lineStarts) - - return { - text, - byteRange: { start: startOffset, end: endOffset }, - lineRange: { start: startLine, end: endLine }, - } -} - -/** - * Get line and column position for a byte offset - * - * @param byteOffset - The byte offset - * @param code - The source code - * @returns Object with line (0-indexed) and column (0-indexed) - */ -export const getPositionFromOffset = ( - byteOffset: number, - code: string, -): { line: number; column: number } => { - const lineStarts = buildLineStartsTable(code) - const line = findLineNumber(byteOffset, lineStarts) - const lineStart = lineStarts[line] ?? 0 - const column = byteOffset - lineStart - - return { line, column } -} diff --git a/src/chunking/windows.ts b/src/chunking/windows.ts index 4119f3a..3835065 100644 --- a/src/chunking/windows.ts +++ b/src/chunking/windows.ts @@ -1,127 +1,4 @@ -import type { ASTWindow, SyntaxNode } from '../types' -import { getNwsCountFromCumsum, type NwsCumsum } from './nws' -import { subdivideNode } from './oversized' - -/** - * Options for window assignment - */ -export interface WindowOptions { - /** Maximum size of a window in NWS characters */ - maxSize: number - /** The precomputed NWS cumulative sum array */ - cumsum: NwsCumsum - /** The source code */ - code: string -} - -/** - * Assign AST nodes to windows using a greedy algorithm - * - * Walks through child nodes, accumulating them into windows until maxSize is reached. - * When a single node exceeds maxSize, it delegates to subdivideNode for recursive splitting. - * - * @param nodes - Iterator/array of nodes to assign to windows - * @param code - The source code - * @param cumsum - Precomputed NWS cumulative sum array - * @param ancestors - Ancestor nodes for context - * @param maxSize - Maximum window size in NWS characters - * @yields ASTWindow objects - */ -export function* assignNodesToWindows( - nodes: Iterable, - code: string, - cumsum: NwsCumsum, - ancestors: SyntaxNode[], - maxSize: number, -): Generator { - let currentWindow: ASTWindow = { - nodes: [], - ancestors: [...ancestors], - size: 0, - isPartialNode: false, - } - - for (const node of nodes) { - const nodeSize = getNwsCountFromCumsum( - cumsum, - node.startIndex, - node.endIndex, - ) - - // Case 1: Single node exceeds maxSize entirely - subdivide it - if (nodeSize > maxSize) { - // First, yield the current window if it has nodes - if (currentWindow.nodes.length > 0) { - yield currentWindow - currentWindow = { - nodes: [], - ancestors: [...ancestors], - size: 0, - isPartialNode: false, - } - } - - // Delegate to subdivideNode for recursive splitting - yield* subdivideNode(node, code, cumsum, maxSize, ancestors) - continue - } - - // Case 2: Node fits in remaining budget - add to current window - if (currentWindow.size + nodeSize <= maxSize) { - currentWindow.nodes.push(node) - currentWindow.size += nodeSize - continue - } - - // Case 3: Node exceeds remaining budget - yield current window and start new - if (currentWindow.nodes.length > 0) { - yield currentWindow - } - - // Start a new window with this node - currentWindow = { - nodes: [node], - ancestors: [...ancestors], - size: nodeSize, - isPartialNode: false, - } - } - - // Yield final window if it has any nodes - if (currentWindow.nodes.length > 0) { - yield currentWindow - } -} - -/** - * Check if a node fits within the remaining budget - * - * @param node - The node to check - * @param currentSize - Current window size - * @param maxSize - Maximum window size - * @param cumsum - Precomputed NWS cumulative sum array - * @returns Whether the node fits - */ -export const nodeFitsInWindow = ( - node: SyntaxNode, - currentSize: number, - maxSize: number, - cumsum: NwsCumsum, -): boolean => { - const nodeSize = getNwsCountFromCumsum(cumsum, node.startIndex, node.endIndex) - return currentSize + nodeSize <= maxSize -} - -/** - * Get the NWS size of a node - * - * @param node - The node to measure - * @param cumsum - Precomputed NWS cumulative sum array - * @returns The NWS character count - */ -export const getNodeSize = (node: SyntaxNode, cumsum: NwsCumsum): number => { - return getNwsCountFromCumsum(cumsum, node.startIndex, node.endIndex) -} +import type { SyntaxNode } from '../types' /** * Get ancestors for a set of nodes diff --git a/src/context/index.ts b/src/context/index.ts index 7b666ed..fddb06b 100644 --- a/src/context/index.ts +++ b/src/context/index.ts @@ -1,30 +1,13 @@ -import { Effect } from 'effect' -import type { RebuiltText } from '../chunking/rebuild' import { findScopeAtOffset, getAncestorChain } from '../scope/tree' import type { ByteRange, - Chunk, ChunkContext, ChunkEntityInfo, - ChunkOptions, EntityInfo, ExtractedEntity, ImportInfo, - Language, ScopeTree, } from '../types' -import { getSiblings, type SiblingOptions } from './siblings' - -/** - * Error when attaching context fails - */ -export class ContextError { - readonly _tag = 'ContextError' - constructor( - readonly message: string, - readonly cause?: unknown, - ) {} -} /** * Get scope information for a byte range @@ -195,101 +178,3 @@ export const getRelevantImports = ( return filteredImports.map(mapToImportInfo) } - -/** - * Options for attaching context to a chunk - */ -export interface AttachContextOptions { - /** The rebuilt text info for the chunk */ - text: RebuiltText - /** The scope tree for the file */ - scopeTree: ScopeTree - /** Chunking options */ - options: ChunkOptions - /** The chunk index */ - index: number - /** Total number of chunks */ - totalChunks: number - /** File path of the source file (optional) */ - filepath?: string - /** Programming language of the source (optional) */ - language?: Language -} - -/** - * Attach context information to a chunk - * - * @param opts - Options containing all parameters for context attachment - * @returns Effect yielding the complete chunk with context - */ -export const attachContext = ( - opts: AttachContextOptions, -): Effect.Effect => { - const { text, scopeTree, options, index, totalChunks, filepath, language } = - opts - - return Effect.try({ - try: () => { - // Determine context mode - const contextMode = options.contextMode ?? 'full' - - // For 'none' mode, return minimal context - if (contextMode === 'none') { - const context: ChunkContext = { - filepath, - language, - scope: [], - entities: [], - siblings: [], - imports: [], - } - return { - text: text.text, - byteRange: text.byteRange, - lineRange: text.lineRange, - context, - index, - totalChunks, - } - } - - // Get scope for this chunk's byte range - const scope = getScopeForRange(text.byteRange, scopeTree) - - // Get entities within the chunk - const entities = getEntitiesInRange(text.byteRange, scopeTree) - - // Get siblings based on options - const siblingDetail = options.siblingDetail ?? 'signatures' - const siblingOptions: SiblingOptions = { - detail: siblingDetail, - maxSiblings: contextMode === 'minimal' ? 2 : undefined, - } - const siblings = getSiblings(text.byteRange, scopeTree, siblingOptions) - - // Get relevant imports - const filterImports = options.filterImports ?? false - const imports = getRelevantImports(entities, scopeTree, filterImports) - - const context: ChunkContext = { - filepath, - language, - scope, - entities, - siblings, - imports, - } - - return { - text: text.text, - byteRange: text.byteRange, - lineRange: text.lineRange, - context, - index, - totalChunks, - } - }, - catch: (error: unknown) => - new ContextError('Failed to attach context', error), - }) -} diff --git a/src/context/siblings.ts b/src/context/siblings.ts index f9c49fd..6db2838 100644 --- a/src/context/siblings.ts +++ b/src/context/siblings.ts @@ -150,81 +150,3 @@ export const getSiblings = ( return result } - -/** - * Get siblings before the current chunk - * - * @param byteRange - The byte range of the current chunk - * @param scopeTree - The scope tree - * @param maxCount - Maximum number of siblings to return - * @returns Array of sibling info, sorted by proximity (closest first) - */ -export const getSiblingsBefore = ( - byteRange: ByteRange, - scopeTree: ScopeTree, - maxCount: number, -): SiblingInfo[] => { - // Get all siblings - const allSiblings = getSiblings(byteRange, scopeTree, { detail: 'names' }) - - // Filter to only 'before' siblings - const beforeSiblings = allSiblings.filter((s) => s.position === 'before') - - // Already sorted by proximity from getSiblings - return beforeSiblings.slice(0, maxCount) -} - -/** - * Get siblings after the current chunk - * - * @param byteRange - The byte range of the current chunk - * @param scopeTree - The scope tree - * @param maxCount - Maximum number of siblings to return - * @returns Array of sibling info, sorted by proximity (closest first) - */ -export const getSiblingsAfter = ( - byteRange: ByteRange, - scopeTree: ScopeTree, - maxCount: number, -): SiblingInfo[] => { - // Get all siblings - const allSiblings = getSiblings(byteRange, scopeTree, { detail: 'names' }) - - // Filter to only 'after' siblings - const afterSiblings = allSiblings.filter((s) => s.position === 'after') - - // Already sorted by proximity from getSiblings - return afterSiblings.slice(0, maxCount) -} - -/** - * Check if two entities are siblings (same parent scope) - * - * @param byteRange1 - First entity's byte range - * @param byteRange2 - Second entity's byte range - * @param scopeTree - The scope tree - * @returns Whether the entities are siblings (have the same parent scope) - */ -export const areSiblings = ( - byteRange1: ByteRange, - byteRange2: ByteRange, - scopeTree: ScopeTree, -): boolean => { - // Find scope nodes containing each byte range - const node1 = findScopeForRange(byteRange1, scopeTree) - const node2 = findScopeForRange(byteRange2, scopeTree) - - // If both are null, they're at file level (both top-level) - if (node1 === null && node2 === null) { - return true - } - - // If only one is null, they're at different levels - if (node1 === null || node2 === null) { - return false - } - - // Check if they have the same parent - // Note: comparing parent references directly - return node1.parent === node2.parent -} diff --git a/src/extract/fallback.ts b/src/extract/fallback.ts index 61a7c9a..2d6e61f 100644 --- a/src/extract/fallback.ts +++ b/src/extract/fallback.ts @@ -6,7 +6,8 @@ import type { SyntaxNode, } from '../types' import { extractDocstring } from './docstring' -import { extractImportSource, extractName, extractSignature } from './signature' +import { extractImportSymbols } from './imports' +import { extractName, extractSignature } from './signature' /** * Node types that represent extractable entities by language @@ -162,62 +163,61 @@ function walkAndExtract( const entityType = getEntityType(node.type) if (entityType) { - // Extract name - const name = extractName(node, language) ?? '' + // For import statements, extract individual symbols + if (entityType === 'import') { + const importEntities = extractImportSymbols(node, language, code) + entities.push(...importEntities) + } else { + // Extract name + const name = extractName(node, language) ?? '' - // Extract signature - const signature = yield* extractSignature( - node, - entityType, - language, - code, - ) + // Extract signature + const signature = yield* extractSignature( + node, + entityType, + language, + code, + ) - // Extract docstring - const docstring = yield* extractDocstring(node, language, code) + // Extract docstring + const docstring = yield* extractDocstring(node, language, code) - // Extract import source for import entities - const source = - entityType === 'import' - ? (extractImportSource(node, language) ?? undefined) - : undefined - - // Create entity - const entity: ExtractedEntity = { - type: entityType, - name, - signature: signature || name, - docstring, - byteRange: { - start: node.startIndex, - end: node.endIndex, - }, - lineRange: { - start: node.startPosition.row, - end: node.endPosition.row, - }, - parent: parentName, - node, - source, - } + // Create entity + const entity: ExtractedEntity = { + type: entityType, + name, + signature: signature || name, + docstring, + byteRange: { + start: node.startIndex, + end: node.endIndex, + }, + lineRange: { + start: node.startPosition.row, + end: node.endPosition.row, + }, + parent: parentName, + node, + } - entities.push(entity) + entities.push(entity) - // For nested entities, use this entity's name as parent - const newParentName = - entityType === 'class' || - entityType === 'interface' || - entityType === 'function' || - entityType === 'method' - ? name - : parentName + // For nested entities, use this entity's name as parent + const newParentName = + entityType === 'class' || + entityType === 'interface' || + entityType === 'function' || + entityType === 'method' + ? name + : parentName - // Add children to stack (in reverse order for correct DFS order) - const children = node.namedChildren - for (let i = children.length - 1; i >= 0; i--) { - const child = children[i] - if (child) { - stack.push({ node: child, parentName: newParentName }) + // Add children to stack (in reverse order for correct DFS order) + const children = node.namedChildren + for (let i = children.length - 1; i >= 0; i--) { + const child = children[i] + if (child) { + stack.push({ node: child, parentName: newParentName }) + } } } } diff --git a/src/extract/imports.ts b/src/extract/imports.ts new file mode 100644 index 0000000..06cd41b --- /dev/null +++ b/src/extract/imports.ts @@ -0,0 +1,194 @@ +import type { ExtractedEntity, Language, SyntaxNode } from '../types' +import { extractImportSource } from './signature' + +/** + * Extract individual import symbols from an import statement + * Returns an array of import entities, one per imported symbol + * + * @param importNode - The import statement AST node + * @param language - The programming language + * @param code - The source code + * @returns Array of ExtractedEntity objects for each imported symbol + */ +export function extractImportSymbols( + importNode: SyntaxNode, + language: Language, + code: string, +): ExtractedEntity[] { + const source = extractImportSource(importNode, language) ?? '' + const signature = code.slice(importNode.startIndex, importNode.endIndex) + const entities: ExtractedEntity[] = [] + + // Helper to create an import entity with common fields + const makeImportEntity = (name: string): ExtractedEntity => ({ + type: 'import', + name, + signature, + docstring: null, + byteRange: { + start: importNode.startIndex, + end: importNode.endIndex, + }, + lineRange: { + start: importNode.startPosition.row, + end: importNode.endPosition.row, + }, + parent: null, + node: importNode, + source, + }) + + switch (language) { + case 'typescript': + case 'javascript': { + // Handle: import { A, B } from '...', import Foo from '...', import * as Foo from '...' + const importClause = importNode.namedChildren.find( + (c) => c.type === 'import_clause', + ) + if (importClause) { + for (const child of importClause.namedChildren) { + if (child.type === 'named_imports') { + // Named imports: { A, B, C } + for (const specifier of child.namedChildren) { + if (specifier.type === 'import_specifier') { + const nameNode = + specifier.childForFieldName('alias') ?? + specifier.childForFieldName('name') ?? + specifier.namedChildren.find((c) => c.type === 'identifier') + if (nameNode) { + entities.push(makeImportEntity(nameNode.text)) + } + } + } + } else if (child.type === 'identifier') { + // Default import: import Foo from '...' + entities.push(makeImportEntity(child.text)) + } else if (child.type === 'namespace_import') { + // Namespace import: import * as Foo from '...' + const aliasNode = child.namedChildren.find( + (c) => c.type === 'identifier', + ) + if (aliasNode) { + entities.push(makeImportEntity(aliasNode.text)) + } + } + } + } + break + } + + case 'python': { + // Handle: from X import A, B or import X + const names = importNode.namedChildren.filter( + (c) => + c.type === 'dotted_name' || + c.type === 'aliased_import' || + c.type === 'identifier', + ) + for (const nameNode of names) { + const name = + nameNode.type === 'aliased_import' + ? (nameNode.childForFieldName('alias')?.text ?? + nameNode.childForFieldName('name')?.text ?? + nameNode.text) + : nameNode.text + if (name) { + entities.push(makeImportEntity(name)) + } + } + break + } + + case 'rust': { + // Handle: use crate::foo::{Bar, Baz} or use crate::foo::Bar + const extractRustUseNames = (node: SyntaxNode): string[] => { + const names: string[] = [] + if (node.type === 'identifier' || node.type === 'type_identifier') { + names.push(node.text) + } else if (node.type === 'use_list') { + for (const child of node.namedChildren) { + names.push(...extractRustUseNames(child)) + } + } else if ( + node.type === 'scoped_identifier' || + node.type === 'scoped_use_list' + ) { + // Get the last part (the actual imported name) + const lastChild = node.namedChildren[node.namedChildren.length - 1] + if (lastChild) { + names.push(...extractRustUseNames(lastChild)) + } + } else if (node.type === 'use_as_clause') { + const alias = node.childForFieldName('alias') + if (alias) { + names.push(alias.text) + } + } else if (node.type === 'use_wildcard') { + names.push('*') + } + return names + } + + const argument = importNode.childForFieldName('argument') + if (argument) { + for (const name of extractRustUseNames(argument)) { + entities.push(makeImportEntity(name)) + } + } + break + } + + case 'go': { + // Handle: import "fmt" or import ( "fmt" "os" ) + const extractGoImportNames = (node: SyntaxNode): string[] => { + const names: string[] = [] + if (node.type === 'import_spec') { + // Get alias if present, otherwise derive from path + const alias = node.childForFieldName('name') + const pathNode = node.childForFieldName('path') + if (alias) { + names.push(alias.text) + } else if (pathNode) { + // Use last path segment as name + const pathText = pathNode.text.replace(/['"]/g, '') + const segments = pathText.split('/') + names.push(segments[segments.length - 1] ?? pathText) + } + } else if (node.type === 'import_spec_list') { + for (const child of node.namedChildren) { + names.push(...extractGoImportNames(child)) + } + } + return names + } + + for (const child of importNode.namedChildren) { + for (const name of extractGoImportNames(child)) { + entities.push(makeImportEntity(name)) + } + } + break + } + + case 'java': { + // Handle: import package.Class or import package.* + const scopedId = importNode.namedChildren.find( + (c) => c.type === 'scoped_identifier', + ) + if (scopedId) { + // Get the last identifier (the class name) + const parts = scopedId.text.split('.') + const name = parts[parts.length - 1] ?? scopedId.text + entities.push(makeImportEntity(name)) + } + break + } + } + + // If no symbols were extracted, fall back to using source as name + if (entities.length === 0) { + entities.push(makeImportEntity(source || '')) + } + + return entities +} diff --git a/src/extract/index.ts b/src/extract/index.ts index 8d7ec26..94bc6a1 100644 --- a/src/extract/index.ts +++ b/src/extract/index.ts @@ -11,8 +11,15 @@ import { extractByNodeTypes, getEntityType, } from './fallback' -import { type CompiledQuery, loadQuery, loadQuerySync } from './queries' -import { extractImportSource, extractName, extractSignature } from './signature' +import { extractImportSymbols } from './imports' +import { + type CompiledQuery, + extractEntityFromMatch, + loadQuery, + loadQuerySync, + type QueryMatch, +} from './queries' +import { extractName, extractSignature } from './signature' /** * Error when entity extraction fails @@ -26,54 +33,8 @@ export class ExtractError { } /** - * Interface for query match captures (compatible with future queries.ts implementation) - */ -interface QueryCapture { - name: string - node: SyntaxNode - patternIndex: number -} - -/** - * Interface for query matches (compatible with future queries.ts implementation) - */ -interface QueryMatch { - patternIndex: number - captures: QueryCapture[] -} - -/** - * Extract the entity node and name node from a query match - * This will be provided by queries.ts when merged, but we define it here for now - */ -function extractEntityFromMatch(match: QueryMatch): { - itemNode: SyntaxNode - nameNode: SyntaxNode | null - contextNodes: SyntaxNode[] - annotationNodes: SyntaxNode[] -} | null { - const itemCapture = match.captures.find((c) => c.name === 'item') - if (!itemCapture) { - return null - } - - const nameCapture = match.captures.find((c) => c.name === 'name') - const contextCaptures = match.captures.filter((c) => c.name === 'context') - const annotationCaptures = match.captures.filter( - (c) => c.name === 'annotation', - ) - - return { - itemNode: itemCapture.node, - nameNode: nameCapture?.node ?? null, - contextNodes: contextCaptures.map((c) => c.node), - annotationNodes: annotationCaptures.map((c) => c.node), - } -} - -/** - * Execute a query against a tree (compatible interface) - * This will be provided by queries.ts when merged + * Execute a query against a tree + * Wraps the web-tree-sitter Query.matches() call with error handling */ function executeQueryOnTree( query: CompiledQuery, @@ -149,6 +110,13 @@ function matchesToEntities( } } + // For import statements, extract individual symbols + if (entityType === 'import') { + const importEntities = extractImportSymbols(itemNode, language, code) + entities.push(...importEntities) + continue + } + // Extract name - prefer name node from query, fallback to extraction const name = nameNode ? nameNode.text @@ -168,12 +136,6 @@ function matchesToEntities( // Find parent entity const parent = findParentEntityName(itemNode, rootNode, language) - // Extract import source for import entities - const source = - entityType === 'import' - ? (extractImportSource(itemNode, language) ?? undefined) - : undefined - const entity: ExtractedEntity = { type: entityType, name, @@ -189,7 +151,6 @@ function matchesToEntities( }, parent, node: itemNode, - source, } entities.push(entity) @@ -364,6 +325,7 @@ export { getEntityType, NODE_TYPE_TO_ENTITY_TYPE, } from './fallback' +export { extractImportSymbols } from './imports' export type { CompiledQuery, QueryLoadError } from './queries' export { clearQueryCache, loadQuery, loadQuerySync } from './queries' export { extractImportSource, extractName, extractSignature } from './signature' diff --git a/src/index.ts b/src/index.ts index e5a5158..4eeca9f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -9,7 +9,13 @@ */ // Main chunking function -export { ChunkingError, chunk, UnsupportedLanguageError } from './chunk' +export { + ChunkingError, + chunk, + chunkStream, + chunkStreamEffect, + UnsupportedLanguageError, +} from './chunk' // Chunker factory export { createChunker } from './chunker' @@ -23,6 +29,7 @@ export type { ByteRange, Chunk, ChunkContext, + ChunkEntityInfo, Chunker, ChunkOptions, EntityInfo, diff --git a/src/parser/index.ts b/src/parser/index.ts index ba7c4c0..150253a 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -1,11 +1,15 @@ -import { Context, Effect, Layer } from 'effect' +import { Effect } from 'effect' import { Parser, type Node as TSNode, type Tree as TSTree, } from 'web-tree-sitter' import type { Language, ParseError, ParseResult } from '../types' -import { type GrammarLoadError, getLanguageGrammar } from './languages' +import { + clearGrammarCache, + type GrammarLoadError, + getLanguageGrammar, +} from './languages' // Re-export language utilities export { @@ -30,32 +34,6 @@ export class ParserInitError extends Error { } } -/** - * Parser service interface for dependency injection - */ -export interface ParserService { - readonly _tag: 'ParserService' - - /** - * Parse source code with a specific language - */ - parse( - code: string, - language: Language, - ): Effect.Effect - - /** - * Get the underlying tree-sitter parser instance - */ - getParser(): Parser -} - -/** - * ParserService tag for Effect Context - */ -export const ParserService: Context.Tag = - Context.GenericTag('ParserService') - /** * Flag to track if tree-sitter has been initialized */ @@ -163,30 +141,6 @@ export function parse( }) } -/** - * Create a ParserService implementation - */ -function makeParserService(parser: Parser): ParserService { - return { - _tag: 'ParserService', - parse: (code: string, language: Language) => parse(parser, code, language), - getParser: () => parser, - } -} - -/** - * Live layer that provides ParserService - */ -export const ParserServiceLive: Layer.Layer = - Layer.effect( - ParserService, - Effect.gen(function* () { - yield* initParser() - const parser = new Parser() - return makeParserService(parser) - }), - ) - // ============================================================================ // Public API - Unwraps Effect for consumers // ============================================================================ @@ -241,6 +195,7 @@ export async function initializeParser(): Promise { /** * Reset the shared parser state (useful for testing) + * Also clears the grammar cache to ensure clean reinitialization */ export function resetParser(): void { if (sharedParser) { @@ -248,4 +203,5 @@ export function resetParser(): void { sharedParser = null } initialized = false + clearGrammarCache() } diff --git a/src/scope/index.ts b/src/scope/index.ts index 957fe7d..147fec9 100644 --- a/src/scope/index.ts +++ b/src/scope/index.ts @@ -2,8 +2,6 @@ import { Effect } from 'effect' import type { ExtractedEntity, ScopeTree } from '../types' import { buildScopeTreeFromEntities, - createScopeNode, - findParentNode, findScopeAtOffset, flattenScopeTree, getAncestorChain, @@ -61,8 +59,6 @@ export const buildScopeTreeSync = (entities: ExtractedEntity[]): ScopeTree => { // Re-export utilities from tree.ts for public API export { buildScopeTreeFromEntities, - createScopeNode, - findParentNode, findScopeAtOffset, flattenScopeTree, getAncestorChain, diff --git a/src/types.ts b/src/types.ts index 28269b4..2bafcde 100644 --- a/src/types.ts +++ b/src/types.ts @@ -241,7 +241,7 @@ export interface Chunk { * Options for chunking source code */ export interface ChunkOptions { - /** Maximum size of each chunk in bytes (default: 4096) */ + /** Maximum size of each chunk in bytes (default: 1500) */ maxChunkSize?: number /** How much context to include (default: 'full') */ contextMode?: 'none' | 'minimal' | 'full' @@ -259,17 +259,27 @@ export interface ChunkOptions { export interface Chunker { /** * Chunk source code into pieces with context + * @param filepath The file path (used for language detection) * @param source The source code to chunk * @param options Chunking options * @returns Array of chunks */ - chunk(source: string, options?: ChunkOptions): Promise + chunk( + filepath: string, + source: string, + options?: ChunkOptions, + ): Promise /** * Stream chunks as they are generated + * @param filepath The file path (used for language detection) * @param source The source code to chunk * @param options Chunking options * @returns Async iterable of chunks */ - stream(source: string, options?: ChunkOptions): AsyncIterable + stream( + filepath: string, + source: string, + options?: ChunkOptions, + ): AsyncIterable } diff --git a/test/chunking.test.ts b/test/chunking.test.ts index 42b4108..f3adc00 100644 --- a/test/chunking.test.ts +++ b/test/chunking.test.ts @@ -1,5 +1,11 @@ import { describe, expect, test } from 'bun:test' -import { type Chunk, chunk, createChunker, type Language } from '../src' +import { + type Chunk, + chunk, + chunkStream, + createChunker, + type Language, +} from '../src' import { countNws, getNwsCountFromCumsum, @@ -24,6 +30,7 @@ describe('NWS preprocessing', () => { const cumsum = preprocessNwsCumsum(code) // cumsum[i] = count of NWS chars in code[0..i-1] + expect(cumsum).toHaveLength(6) expect(cumsum[0]).toBe(0) // before any chars expect(cumsum[1]).toBe(1) // after 'a' expect(cumsum[2]).toBe(2) // after 'ab' @@ -39,6 +46,8 @@ describe('NWS preprocessing', () => { // Full range const fullNws = getNwsCountFromCumsum(cumsum, 0, code.length) expect(fullNws).toBe(countNws(code)) + // Exact count: "functionhello(){return42;}" = 26 chars + expect(fullNws).toBe(26) // Partial range const partialNws = getNwsCountFromCumsum(cumsum, 0, 8) // 'function' @@ -51,21 +60,30 @@ describe('NWS preprocessing', () => { // ============================================================================ describe('chunk', () => { - test('chunks simple TypeScript file', async () => { - const code = ` -function greet(name: string): string { + test('chunks simple TypeScript file with exact structure', async () => { + const code = `function greet(name: string): string { return \`Hello, \${name}!\` -} -` +}` const chunks = await chunk('test.ts', code) - expect(chunks.length).toBeGreaterThan(0) - expect(chunks[0]).toHaveProperty('text') - expect(chunks[0]).toHaveProperty('byteRange') - expect(chunks[0]).toHaveProperty('lineRange') - expect(chunks[0]).toHaveProperty('context') - expect(chunks[0]).toHaveProperty('index') - expect(chunks[0]).toHaveProperty('totalChunks') + expect(chunks).toHaveLength(1) + expect(chunks[0]).toMatchObject({ + text: code, + byteRange: { start: 0, end: code.length }, + lineRange: { start: 0, end: 2 }, + index: 0, + totalChunks: 1, + }) + expect(chunks[0]?.context).toMatchObject({ + filepath: 'test.ts', + language: 'typescript', + }) + expect(chunks[0]?.context.entities).toHaveLength(1) + expect(chunks[0]?.context.entities[0]).toMatchObject({ + name: 'greet', + type: 'function', + isPartial: false, + }) }) test('chunks preserve original text via source slicing', async () => { @@ -75,6 +93,7 @@ const z = 3` const chunks = await chunk('test.ts', code) + expect(chunks).toHaveLength(1) // Reconstruct should match slicing from original for (const c of chunks) { const sliced = code.slice(c.byteRange.start, c.byteRange.end) @@ -83,21 +102,21 @@ const z = 3` }) test('chunks have correct index and totalChunks', async () => { - const code = ` -function a() { return 1 } + const code = `function a() { return 1 } function b() { return 2 } -function c() { return 3 } -` - const chunks = await chunk('test.ts', code) +function c() { return 3 }` + const chunks = await chunk('test.ts', code, { maxChunkSize: 50 }) const total = chunks.length + // With maxChunkSize=50, we get at least 2 chunks + expect(total).toBeGreaterThanOrEqual(2) chunks.forEach((c, i) => { expect(c.index).toBe(i) expect(c.totalChunks).toBe(total) }) }) - test('respects maxChunkSize option', async () => { + test('respects maxChunkSize option with exact counts', async () => { // Create code that would be large const functions = Array.from( { length: 10 }, @@ -108,6 +127,7 @@ function c() { return 3 } // With small maxChunkSize, should produce multiple chunks expect(chunks.length).toBeGreaterThan(1) + expect(chunks.length).toBeLessThanOrEqual(10) // Each chunk's NWS count should be reasonable for (const c of chunks) { @@ -119,12 +139,12 @@ function c() { return 3 } test('handles empty code', async () => { const chunks = await chunk('test.ts', '') - expect(chunks).toEqual([]) + expect(chunks).toHaveLength(0) }) test('handles code with only whitespace', async () => { const chunks = await chunk('test.ts', ' \n\n \t\t ') - expect(chunks.length).toBe(0) + expect(chunks).toHaveLength(0) }) test('throws UnsupportedLanguageError for unknown extension', async () => { @@ -138,7 +158,481 @@ function c() { return 3 } // Even with wrong extension, should work with language override const chunks = await chunk('test.txt', code, { language: 'typescript' }) + expect(chunks).toHaveLength(1) + expect(chunks[0]?.context.language).toBe('typescript') + }) +}) + +// ============================================================================ +// Chunk Ordering and Boundaries Tests +// ============================================================================ + +describe('chunk ordering and boundaries', () => { + test('chunks are non-overlapping and cover source', async () => { + const code = `function a() { return 1 } +function b() { return 2 } +function c() { return 3 } +function d() { return 4 }` + + const chunks = await chunk('test.ts', code, { maxChunkSize: 80 }) + + // Sort by byte range start + const sortedChunks = [...chunks].sort( + (a, b) => a.byteRange.start - b.byteRange.start, + ) + + // Check non-overlapping + for (let i = 1; i < sortedChunks.length; i++) { + const prev = sortedChunks[i - 1]! + const curr = sortedChunks[i]! + expect(curr.byteRange.start).toBeGreaterThanOrEqual(prev.byteRange.end) + } + + // Check chunks are sequential (indices match sorted order) + sortedChunks.forEach((c, i) => { + expect(c.index).toBe(i) + }) + }) + + test('exact byte offset verification', async () => { + const code = `const x = 1` + + const chunks = await chunk('test.ts', code) + + expect(chunks).toHaveLength(1) + expect(chunks[0]?.byteRange).toEqual({ start: 0, end: 11 }) + expect(chunks[0]?.lineRange).toEqual({ start: 0, end: 0 }) + }) + + test('exact line range verification with multiline code', async () => { + const code = `// Line 0 +function foo() { // Line 1 + return 42 // Line 2 +} // Line 3 +// Line 4` + + const chunks = await chunk('test.ts', code) + + // All lines should be covered + expect(chunks).toHaveLength(1) + expect(chunks[0]?.lineRange.start).toBe(0) + expect(chunks[0]?.lineRange.end).toBe(4) + }) + + test('multiple chunks maintain byte continuity', async () => { + const code = `function longFunction1() { + const a = 1 + const b = 2 + const c = 3 + return a + b + c +} + +function longFunction2() { + const d = 4 + const e = 5 + const f = 6 + return d + e + f +} + +function longFunction3() { + const g = 7 + const h = 8 + const i = 9 + return g + h + i +}` + + const chunks = await chunk('test.ts', code, { maxChunkSize: 100 }) + + expect(chunks.length).toBeGreaterThan(1) + + // First chunk starts at beginning + const sortedChunks = [...chunks].sort( + (a, b) => a.byteRange.start - b.byteRange.start, + ) + expect(sortedChunks[0]?.byteRange.start).toBe(0) + + // Verify all byte ranges are valid + for (const c of sortedChunks) { + expect(c.byteRange.end).toBeGreaterThan(c.byteRange.start) + expect(c.byteRange.start).toBeGreaterThanOrEqual(0) + expect(c.byteRange.end).toBeLessThanOrEqual(code.length) + } + }) +}) + +// ============================================================================ +// Context Verification Tests +// ============================================================================ + +describe('context.entities verification', () => { + test('exact entity count and properties', async () => { + const code = `class Calculator { + add(a: number, b: number): number { + return a + b + } + subtract(a: number, b: number): number { + return a - b + } +}` + + const chunks = await chunk('test.ts', code) + + // Collect all entities across chunks + const allEntities = chunks.flatMap((c) => c.context.entities) + const uniqueNames = [...new Set(allEntities.map((e) => e.name))] + + expect(uniqueNames).toContain('Calculator') + expect(uniqueNames).toContain('add') + expect(uniqueNames).toContain('subtract') + + // Find the class entity + const classEntity = allEntities.find((e) => e.name === 'Calculator') + expect(classEntity).toBeDefined() + expect(classEntity?.type).toBe('class') + }) + + test('entity isPartial flag correctness', async () => { + const code = `class LargeClass { + method1() { + return 1 + } + method2() { + return 2 + } + method3() { + return 3 + } + method4() { + return 4 + } + method5() { + return 5 + } + method6() { + return 6 + } +}` + + const chunks = await chunk('test.ts', code, { maxChunkSize: 100 }) + + // With small chunk size, the class should be partial in multiple chunks + if (chunks.length > 1) { + const classEntities = chunks.flatMap((c) => + c.context.entities.filter((e) => e.name === 'LargeClass'), + ) + // If class spans multiple chunks, it should be marked as partial + const partialClasses = classEntities.filter((e) => e.isPartial) + expect(partialClasses.length).toBeGreaterThan(0) + } + }) + + test('entity docstring extraction', async () => { + const code = `/** + * Adds two numbers together. + * @param a First number + * @param b Second number + */ +function add(a: number, b: number): number { + return a + b +}` + + const chunks = await chunk('test.ts', code) + + const addEntity = chunks + .flatMap((c) => c.context.entities) + .find((e) => e.name === 'add') + + expect(addEntity).toBeDefined() + expect(addEntity?.docstring).toBeDefined() + expect(addEntity?.docstring).toContain('Adds two numbers together') + }) + + test('entity lineRange is present', async () => { + const code = `function foo() { + return 42 +}` + + const chunks = await chunk('test.ts', code) + const entity = chunks[0]?.context.entities[0]! + + expect(entity.lineRange).toBeDefined() + expect(entity.lineRange?.start).toBe(0) + expect(entity.lineRange?.end).toBe(2) + }) +}) + +// ============================================================================ +// Context Scope Chain Tests +// ============================================================================ + +describe('context.scope chain verification', () => { + test('scope chain for nested entities', async () => { + const code = `class Outer { + innerMethod() { + return 42 + } +}` + + const chunks = await chunk('test.ts', code, { maxChunkSize: 50 }) + + // Find a chunk that's inside the class + const chunkInClass = chunks.find( + (c) => + c.context.scope.length > 0 && + c.context.scope.some((s) => s.name === 'Outer'), + ) + + if (chunkInClass) { + expect(chunkInClass.context.scope.length).toBeGreaterThan(0) + const outerScope = chunkInClass.context.scope.find( + (s) => s.name === 'Outer', + ) + expect(outerScope).toBeDefined() + expect(outerScope?.type).toBe('class') + } + }) + + test('deeply nested scope chain', async () => { + const code = `class Level1 { + level2Method() { + function level3() { + return 42 + } + return level3() + } +}` + + const chunks = await chunk('test.ts', code) + + // Check that entities are detected at various levels + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + + expect(entityNames).toContain('Level1') + expect(entityNames).toContain('level2Method') + }) + + test('top-level entities have empty scope', async () => { + const code = `function standalone() { + return 1 +}` + + const chunks = await chunk('test.ts', code) + + // A standalone function should have empty scope or self-reference + expect(chunks).toHaveLength(1) + // The chunk might have scope pointing to itself or empty + // depending on implementation + }) +}) + +// ============================================================================ +// Context Siblings Tests +// ============================================================================ + +describe('context.siblings verification', () => { + test('siblings with correct position and distance', async () => { + const code = `function first() { return 1 } +function second() { return 2 } +function third() { return 3 } +function fourth() { return 4 }` + + const chunks = await chunk('test.ts', code, { maxChunkSize: 50 }) + + // Find chunk with second function + const secondChunk = chunks.find((c) => + c.context.entities.some((e) => e.name === 'second'), + ) + + if (secondChunk) { + const siblings = secondChunk.context.siblings + + // Should have siblings before and after + const beforeSiblings = siblings.filter((s) => s.position === 'before') + const _afterSiblings = siblings.filter((s) => s.position === 'after') + + // first should be before second + const firstSibling = beforeSiblings.find((s) => s.name === 'first') + if (firstSibling) { + expect(firstSibling.position).toBe('before') + expect(firstSibling.distance).toBeGreaterThan(0) + } + } + }) + + test('sibling distance increases correctly', async () => { + const code = `function a() { return 1 } +function b() { return 2 } +function c() { return 3 } +function d() { return 4 } +function e() { return 5 }` + + const chunks = await chunk('test.ts', code, { + maxChunkSize: 50, + siblingDetail: 'names', + }) + + for (const c of chunks) { + const siblings = c.context.siblings + + // All distances should be positive + for (const s of siblings) { + expect(s.distance).toBeGreaterThan(0) + } + + // Siblings should be sorted by distance (closest first) + for (let i = 1; i < siblings.length; i++) { + const prev = siblings[i - 1]! + const curr = siblings[i]! + // Same-position siblings should have non-decreasing distance + if (prev.position === curr.position) { + expect(curr.distance).toBeGreaterThanOrEqual(prev.distance) + } + } + } + }) + + test('siblingDetail: none returns empty siblings', async () => { + const code = `function a() { return 1 } +function b() { return 2 }` + + const chunks = await chunk('test.ts', code, { siblingDetail: 'none' }) + + for (const c of chunks) { + expect(c.context.siblings).toHaveLength(0) + } + }) +}) + +// ============================================================================ +// filterImports Option Tests +// ============================================================================ + +describe('filterImports option behavior', () => { + test('filterImports: false includes all imports', async () => { + const code = `import { used } from './used' +import { unused } from './unused' + +function foo() { + return used() +}` + + const chunks = await chunk('test.ts', code, { filterImports: false }) + + const allImports = chunks.flatMap((c) => c.context.imports) + const importNames = allImports.map((i) => i.name) + + expect(importNames).toContain('used') + expect(importNames).toContain('unused') + }) + + test('filterImports: true filters to used imports', async () => { + const code = `import { Database } from './db' +import { UnusedThing } from './unused' + +function queryDb(db: Database) { + return db.query('SELECT 1') +}` + + const chunks = await chunk('test.ts', code, { filterImports: true }) + + const allImports = chunks.flatMap((c) => c.context.imports) + const importNames = allImports.map((i) => i.name) + + // Database is used in the function signature, so it should be included + expect(importNames).toContain('Database') + // UnusedThing may or may not be included depending on which chunk + }) + + test('import source is correctly captured', async () => { + const code = `import { foo } from './utils/foo' +import { bar } from '@scope/bar' + +const x = foo() + bar()` + + const chunks = await chunk('test.ts', code) + + const allImports = chunks.flatMap((c) => c.context.imports) + + const fooImport = allImports.find((i) => i.name === 'foo') + const barImport = allImports.find((i) => i.name === 'bar') + + expect(fooImport).toBeDefined() + expect(fooImport?.source).toBe('./utils/foo') + + expect(barImport).toBeDefined() + expect(barImport?.source).toBe('@scope/bar') + }) +}) + +// ============================================================================ +// Streaming API Tests +// ============================================================================ + +describe('stream', () => { + test('streams chunks from code with exact structure', async () => { + const code = `function a() { return 1 } +function b() { return 2 }` + + const chunks: Chunk[] = [] + for await (const c of chunkStream('test.ts', code)) { + chunks.push(c) + } + expect(chunks.length).toBeGreaterThan(0) + expect(chunks[0]).toMatchObject({ + context: { + filepath: 'test.ts', + language: 'typescript', + }, + }) + expect(chunks[0]?.text.length).toBeGreaterThan(0) + }) + + test('stream respects options', async () => { + const functions = Array.from( + { length: 10 }, + (_, i) => `function fn${i}() { return ${i} }`, + ).join('\n') + + const chunks: Chunk[] = [] + for await (const c of chunkStream('test.ts', functions, { + maxChunkSize: 100, + })) { + chunks.push(c) + } + + // With small maxChunkSize, should produce multiple chunks + expect(chunks.length).toBeGreaterThan(1) + }) + + test('stream yields chunks with correct index (totalChunks is -1 for streaming)', async () => { + const code = `function a() { return 1 } +function b() { return 2 } +function c() { return 3 }` + + const chunks: Chunk[] = [] + for await (const c of chunkStream('test.ts', code)) { + chunks.push(c) + } + + // Streaming doesn't know total upfront, so totalChunks is -1 + chunks.forEach((c, i) => { + expect(c.index).toBe(i) + expect(c.totalChunks).toBe(-1) + }) + }) + + test('stream chunks have valid byte ranges', async () => { + const code = `const a = 1 +const b = 2 +const c = 3` + + for await (const c of chunkStream('test.ts', code)) { + expect(c.byteRange.start).toBeGreaterThanOrEqual(0) + expect(c.byteRange.end).toBeGreaterThan(c.byteRange.start) + expect(c.byteRange.end).toBeLessThanOrEqual(code.length) + } }) }) @@ -148,30 +642,67 @@ function c() { return 3 } describe('createChunker', () => { test('creates a reusable chunker instance', async () => { - const chunker = createChunker('test.ts', { maxChunkSize: 500 }) + const chunker = createChunker({ maxChunkSize: 500 }) const code1 = 'const a = 1' const code2 = 'const b = 2' - const chunks1 = await chunker.chunk(code1) - const chunks2 = await chunker.chunk(code2) + const chunks1 = await chunker.chunk('test.ts', code1) + const chunks2 = await chunker.chunk('test.ts', code2) - expect(chunks1.length).toBeGreaterThan(0) - expect(chunks2.length).toBeGreaterThan(0) + expect(chunks1).toHaveLength(1) + expect(chunks2).toHaveLength(1) + expect(chunks1[0]?.text).toBe(code1) + expect(chunks2[0]?.text).toBe(code2) }) - test('chunker.stream yields chunks', async () => { - const chunker = createChunker('test.ts') - const code = ` -function a() { return 1 } -function b() { return 2 } -` + test('chunker can chunk multiple files with different extensions', async () => { + // Note: To get proper language detection, we need to NOT set a language default + // or explicitly pass language: undefined to use auto-detection + const tsCode = 'const a: number = 1' + const jsCode = 'const b = 2' + + const tsChunks = await chunk('test.ts', tsCode) + const jsChunks = await chunk('test.js', jsCode) + + expect(tsChunks).toHaveLength(1) + expect(jsChunks).toHaveLength(1) + expect(tsChunks[0]?.context.language).toBe('typescript') + expect(jsChunks[0]?.context.language).toBe('javascript') + }) + + test('chunker.stream yields chunks with correct properties', async () => { + const chunker = createChunker() + const code = `function a() { return 1 } +function b() { return 2 }` + const chunks: Chunk[] = [] - for await (const c of chunker.stream(code)) { + for await (const c of chunker.stream('test.ts', code)) { chunks.push(c) } expect(chunks.length).toBeGreaterThan(0) + for (const c of chunks) { + expect(c.totalChunks).toBe(-1) // streaming + expect(c.context.filepath).toBe('test.ts') + } + }) + + test('chunker allows per-call option overrides', async () => { + const chunker = createChunker({ maxChunkSize: 1500 }) + + const functions = Array.from( + { length: 10 }, + (_, i) => `function fn${i}() { return ${i} }`, + ).join('\n') + + // Override maxChunkSize for this specific call + const chunks = await chunker.chunk('test.ts', functions, { + maxChunkSize: 100, + }) + + // With small maxChunkSize, should produce multiple chunks + expect(chunks.length).toBeGreaterThan(1) }) }) @@ -180,26 +711,29 @@ function b() { return 2 } // ============================================================================ describe('multi-language chunking', () => { - const testCases: { lang: Language; ext: string; code: string }[] = [ + const testCases: { + lang: Language + ext: string + code: string + expectedEntityTypes: string[] + }[] = [ { lang: 'typescript', ext: 'ts', - code: ` -interface User { + code: `interface User { name: string age: number } function greet(user: User): string { return \`Hello, \${user.name}!\` -} -`, +}`, + expectedEntityTypes: ['interface', 'function'], }, { lang: 'javascript', ext: 'js', - code: ` -class Calculator { + code: `class Calculator { add(a, b) { return a + b } @@ -207,39 +741,36 @@ class Calculator { subtract(a, b) { return a - b } -} -`, +}`, + expectedEntityTypes: ['class', 'method'], }, { lang: 'python', ext: 'py', - code: ` -class Calculator: + code: `class Calculator: def add(self, a, b): return a + b def subtract(self, a, b): - return a - b -`, + return a - b`, + expectedEntityTypes: ['class', 'function'], }, { lang: 'rust', ext: 'rs', - code: ` -fn main() { + code: `fn main() { println!("Hello, world!"); } fn add(a: i32, b: i32) -> i32 { a + b -} -`, +}`, + expectedEntityTypes: ['function'], }, { lang: 'go', ext: 'go', - code: ` -package main + code: `package main func main() { fmt.Println("Hello, world!") @@ -247,14 +778,13 @@ func main() { func add(a, b int) int { return a + b -} -`, +}`, + expectedEntityTypes: ['function'], }, { lang: 'java', ext: 'java', - code: ` -public class Main { + code: `public class Main { public static void main(String[] args) { System.out.println("Hello, world!"); } @@ -262,13 +792,13 @@ public class Main { public static int add(int a, int b) { return a + b; } -} -`, +}`, + expectedEntityTypes: ['class', 'method'], }, ] - for (const { lang, ext, code } of testCases) { - test(`chunks ${lang} code correctly`, async () => { + for (const { lang, ext, code, expectedEntityTypes } of testCases) { + test(`chunks ${lang} code with correct entity types`, async () => { const chunks = await chunk(`test.${ext}`, code) expect(chunks.length).toBeGreaterThan(0) @@ -278,6 +808,17 @@ public class Main { expect(c.text.length).toBeGreaterThan(0) expect(c.byteRange.end).toBeGreaterThan(c.byteRange.start) expect(c.lineRange.end).toBeGreaterThanOrEqual(c.lineRange.start) + expect(c.context.language).toBe(lang) + } + + // Check expected entity types are present + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityTypes = [ + ...new Set(allEntities.map((e) => e.type)), + ] as string[] + + for (const expectedType of expectedEntityTypes) { + expect(entityTypes).toContain(expectedType) } }) } @@ -294,11 +835,15 @@ describe('edge cases', () => { // Should handle without crashing expect(chunks.length).toBeGreaterThan(0) + + // All chunks should have valid structure + for (const c of chunks) { + expect(c.byteRange.end).toBeGreaterThan(c.byteRange.start) + } }) test('handles deeply nested code', async () => { - const nested = ` -function outer() { + const nested = `function outer() { function inner1() { function inner2() { function inner3() { @@ -309,29 +854,37 @@ function outer() { return inner2() } return inner1() -} -` +}` const chunks = await chunk('test.ts', nested) + expect(chunks.length).toBeGreaterThan(0) + + // Should detect nested functions + const allEntities = chunks.flatMap((c) => c.context.entities) + const functionNames = allEntities + .filter((e) => e.type === 'function') + .map((e) => e.name) + + expect(functionNames).toContain('outer') + expect(functionNames).toContain('inner1') + expect(functionNames).toContain('inner2') + expect(functionNames).toContain('inner3') }) - test('handles unicode characters', async () => { - const code = ` -const greeting = "こんにちは" -const emoji = "🎉🚀✨" -` + test('handles unicode characters correctly', async () => { + const code = `const greeting = "こんにちは" +const emoji = "🎉🚀✨"` + const chunks = await chunk('test.ts', code) - expect(chunks.length).toBeGreaterThan(0) + expect(chunks).toHaveLength(1) // Should preserve unicode - const allText = chunks.map((c) => c.text).join('') - expect(allText).toContain('こんにちは') - expect(allText).toContain('🎉') + expect(chunks[0]?.text).toContain('こんにちは') + expect(chunks[0]?.text).toContain('🎉') }) - test('handles code with comments', async () => { - const code = ` -// Single line comment + test('handles code with various comment styles', async () => { + const code = `// Single line comment /* Multi-line comment */ /** @@ -339,9 +892,83 @@ const emoji = "🎉🚀✨" */ function documented() { return 1 -} -` +}` const chunks = await chunk('test.ts', code) + expect(chunks.length).toBeGreaterThan(0) + + // Function should have the JSDoc as docstring + const funcEntity = chunks + .flatMap((c) => c.context.entities) + .find((e) => e.name === 'documented') + + expect(funcEntity).toBeDefined() + expect(funcEntity?.docstring).toContain('JSDoc comment') + }) + + test('handles empty functions', async () => { + const code = `function empty() {} +function alsoEmpty() { +}` + + const chunks = await chunk('test.ts', code) + + expect(chunks.length).toBeGreaterThan(0) + + const entities = chunks.flatMap((c) => c.context.entities) + expect(entities.map((e) => e.name)).toContain('empty') + expect(entities.map((e) => e.name)).toContain('alsoEmpty') + }) + + test('handles syntax with semicolons and without', async () => { + const code = `const a = 1; +const b = 2 +function foo() { + return a + b; +}` + + const chunks = await chunk('test.ts', code) + + expect(chunks).toHaveLength(1) + expect(chunks[0]?.text).toBe(code) + }) +}) + +// ============================================================================ +// contextMode Option Tests +// ============================================================================ + +describe('contextMode option', () => { + test('contextMode: none returns empty context arrays', async () => { + const code = `function foo() { return 1 } +function bar() { return 2 }` + + const chunks = await chunk('test.ts', code, { contextMode: 'none' }) + + for (const c of chunks) { + expect(c.context.scope).toHaveLength(0) + expect(c.context.entities).toHaveLength(0) + expect(c.context.siblings).toHaveLength(0) + expect(c.context.imports).toHaveLength(0) + } + }) + + test('contextMode: full includes all context', async () => { + const code = `import { x } from './x' + +class MyClass { + method1() { return 1 } + method2() { return 2 } +}` + + const chunks = await chunk('test.ts', code, { contextMode: 'full' }) + + // Should have entities + const hasEntities = chunks.some((c) => c.context.entities.length > 0) + expect(hasEntities).toBe(true) + + // Should have imports + const hasImports = chunks.some((c) => c.context.imports.length > 0) + expect(hasImports).toBe(true) }) }) diff --git a/test/extract.test.ts b/test/extract.test.ts index 92da2a0..41e0fef 100644 --- a/test/extract.test.ts +++ b/test/extract.test.ts @@ -111,13 +111,13 @@ function greet(name: string): string { ) // Both should find the same entities - expect(entitiesSync.length).toBe(entitiesAsync.length) + expect(entitiesSync).toHaveLength(entitiesAsync.length) expect(entitiesSync.map((e) => e.name)).toEqual( entitiesAsync.map((e) => e.name), ) }) - test('extractEntitiesSync falls back to node types when query not cached', () => { + test('extractEntitiesSync falls back to node types when query not cached', async () => { clearQueryCache() const code = ` @@ -125,19 +125,14 @@ function test() { return 1 } ` - // Parse synchronously (we need the tree) - const parseEffect = Effect.gen(function* () { - const result = yield* Effect.tryPromise(() => - parseCode(code, 'typescript'), - ) - return result - }) - - Effect.runPromise(parseEffect).then((result) => { - const rootNode = result.tree.rootNode - // With no cached query, should still work via fallback - const entities = extractEntitiesSync(rootNode, 'typescript', code) - expect(entities.length).toBeGreaterThan(0) + const result = await parseCode(code, 'typescript') + const rootNode = result.tree.rootNode + // With no cached query, should still work via fallback + const entities = extractEntitiesSync(rootNode, 'typescript', code) + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'test', + type: 'function', }) }) }) @@ -147,12 +142,10 @@ function test() { // ============================================================================ describe('extractEntities', () => { - test('extracts TypeScript function declaration', async () => { - const code = ` -function greet(name: string): string { + test('extracts TypeScript function declaration with exact properties', async () => { + const code = `function greet(name: string): string { return \`Hello, \${name}!\` -} -` +}` const result = await parseCode(code, 'typescript') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -160,16 +153,24 @@ function greet(name: string): string { code, ) - expect(entities.length).toBeGreaterThan(0) - const fn = entities.find((e) => e.name === 'greet') - expect(fn).toBeDefined() - expect(fn?.type).toBe('function') - expect(fn?.signature).toContain('greet') + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'greet', + type: 'function', + signature: 'function greet(name: string): string', + docstring: null, + parent: null, + }) + // Verify byteRange covers the full function + expect(entities[0].byteRange.start).toBe(0) + expect(entities[0].byteRange.end).toBe(code.length) + // Verify lineRange + expect(entities[0].lineRange.start).toBe(0) + expect(entities[0].lineRange.end).toBe(2) }) - test('extracts TypeScript class with methods', async () => { - const code = ` -class Calculator { + test('extracts TypeScript class with methods and exact counts', async () => { + const code = `class Calculator { add(a: number, b: number): number { return a + b } @@ -177,8 +178,7 @@ class Calculator { subtract(a: number, b: number): number { return a - b } -} -` +}` const result = await parseCode(code, 'typescript') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -186,23 +186,48 @@ class Calculator { code, ) + expect(entities).toHaveLength(3) // 1 class + 2 methods + const cls = entities.find((e) => e.name === 'Calculator') - expect(cls).toBeDefined() - expect(cls?.type).toBe('class') + expect(cls).toMatchObject({ + name: 'Calculator', + type: 'class', + signature: 'class Calculator', + docstring: null, + parent: null, + }) + // Class spans entire code + expect(cls?.byteRange.start).toBe(0) + expect(cls?.byteRange.end).toBe(code.length) + expect(cls?.lineRange.start).toBe(0) + expect(cls?.lineRange.end).toBe(8) const methods = entities.filter((e) => e.type === 'method') - expect(methods.length).toBe(2) - expect(methods.map((m) => m.name)).toContain('add') - expect(methods.map((m) => m.name)).toContain('subtract') + expect(methods).toHaveLength(2) + expect(methods.map((m) => m.name).sort()).toEqual(['add', 'subtract']) + + const addMethod = methods.find((m) => m.name === 'add') + expect(addMethod).toMatchObject({ + name: 'add', + type: 'method', + signature: 'add(a: number, b: number): number', + parent: 'Calculator', + }) + + const subtractMethod = methods.find((m) => m.name === 'subtract') + expect(subtractMethod).toMatchObject({ + name: 'subtract', + type: 'method', + signature: 'subtract(a: number, b: number): number', + parent: 'Calculator', + }) }) - test('extracts TypeScript interface', async () => { - const code = ` -interface User { + test('extracts TypeScript interface with exact properties', async () => { + const code = `interface User { name: string age: number -} -` +}` const result = await parseCode(code, 'typescript') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -210,17 +235,24 @@ interface User { code, ) - const iface = entities.find((e) => e.name === 'User') - expect(iface).toBeDefined() - expect(iface?.type).toBe('interface') + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'User', + type: 'interface', + signature: 'interface User', + docstring: null, + parent: null, + }) + expect(entities[0].byteRange.start).toBe(0) + expect(entities[0].byteRange.end).toBe(code.length) + expect(entities[0].lineRange.start).toBe(0) + expect(entities[0].lineRange.end).toBe(3) }) - test('extracts Python function with docstring', async () => { - const code = ` -def greet(name): + test('extracts Python function with docstring and exact properties', async () => { + const code = `def greet(name): """Say hello to someone.""" - return f"Hello, {name}!" -` + return f"Hello, {name}!"` const result = await parseCode(code, 'python') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -228,20 +260,26 @@ def greet(name): code, ) - const fn = entities.find((e) => e.name === 'greet') - expect(fn).toBeDefined() - expect(fn?.type).toBe('function') - expect(fn?.docstring).toBe('Say hello to someone.') + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'greet', + type: 'function', + signature: 'def greet(name)', + docstring: 'Say hello to someone.', + parent: null, + }) + expect(entities[0].byteRange.start).toBe(0) + expect(entities[0].byteRange.end).toBe(code.length) + expect(entities[0].lineRange.start).toBe(0) + expect(entities[0].lineRange.end).toBe(2) }) - test('extracts Python class', async () => { - const code = ` -class Calculator: + test('extracts Python class with exact properties', async () => { + const code = `class Calculator: """A simple calculator.""" def add(self, a, b): - return a + b -` + return a + b` const result = await parseCode(code, 'python') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -249,17 +287,30 @@ class Calculator: code, ) + expect(entities).toHaveLength(2) // 1 class + 1 function (methods in Python are extracted as functions with parent) + const cls = entities.find((e) => e.name === 'Calculator') - expect(cls).toBeDefined() - expect(cls?.type).toBe('class') + expect(cls).toMatchObject({ + name: 'Calculator', + type: 'class', + signature: 'class Calculator', + docstring: 'A simple calculator.', + parent: null, + }) + + // Python methods are extracted as 'function' type with parent set + const method = entities.find((e) => e.name === 'add') + expect(method).toMatchObject({ + name: 'add', + type: 'function', + parent: 'Calculator', + }) }) - test('extracts Rust function', async () => { - const code = ` -fn add(a: i32, b: i32) -> i32 { + test('extracts Rust function with exact properties', async () => { + const code = `fn add(a: i32, b: i32) -> i32 { a + b -} -` +}` const result = await parseCode(code, 'rust') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -267,19 +318,26 @@ fn add(a: i32, b: i32) -> i32 { code, ) - const fn = entities.find((e) => e.name === 'add') - expect(fn).toBeDefined() - expect(fn?.type).toBe('function') + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'add', + type: 'function', + signature: 'fn add(a: i32, b: i32) -> i32', + docstring: null, + parent: null, + }) + expect(entities[0].byteRange.start).toBe(0) + expect(entities[0].byteRange.end).toBe(code.length) + expect(entities[0].lineRange.start).toBe(0) + expect(entities[0].lineRange.end).toBe(2) }) - test('extracts Go function', async () => { - const code = ` -package main + test('extracts Go function with exact properties', async () => { + const code = `package main func add(a, b int) int { return a + b -} -` +}` const result = await parseCode(code, 'go') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -287,19 +345,24 @@ func add(a, b int) int { code, ) - const fn = entities.find((e) => e.name === 'add') - expect(fn).toBeDefined() - expect(fn?.type).toBe('function') + // Go extracts package as import + the function + const functions = entities.filter((e) => e.type === 'function') + expect(functions).toHaveLength(1) + expect(functions[0]).toMatchObject({ + name: 'add', + type: 'function', + signature: 'func add(a, b int) int', + docstring: null, + parent: null, + }) }) - test('extracts Java class and method', async () => { - const code = ` -public class Calculator { + test('extracts Java class and method with exact properties', async () => { + const code = `public class Calculator { public int add(int a, int b) { return a + b; } -} -` +}` const result = await parseCode(code, 'java') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -307,19 +370,29 @@ public class Calculator { code, ) + expect(entities).toHaveLength(2) // 1 class + 1 method + const cls = entities.find((e) => e.name === 'Calculator') - expect(cls).toBeDefined() - expect(cls?.type).toBe('class') + expect(cls).toMatchObject({ + name: 'Calculator', + type: 'class', + parent: null, + }) + + const method = entities.find((e) => e.name === 'add') + expect(method).toMatchObject({ + name: 'add', + type: 'method', + parent: 'Calculator', + }) }) - test('tracks parent relationships for nested entities', async () => { - const code = ` -class Outer { + test('tracks parent relationships for nested entities accurately', async () => { + const code = `class Outer { inner() { return 1 } -} -` +}` const result = await parseCode(code, 'typescript') const entities = await extractEntitiesAsync( result.tree.rootNode, @@ -327,8 +400,21 @@ class Outer { code, ) + expect(entities).toHaveLength(2) + + const cls = entities.find((e) => e.name === 'Outer') + expect(cls).toMatchObject({ + name: 'Outer', + type: 'class', + parent: null, + }) + const method = entities.find((e) => e.name === 'inner') - expect(method?.parent).toBe('Outer') + expect(method).toMatchObject({ + name: 'inner', + type: 'method', + parent: 'Outer', + }) }) }) @@ -357,28 +443,37 @@ describe('fallback extraction (iterative)', () => { extractByNodeTypes(result.tree.rootNode, 'typescript', code), ) - // Should find nested functions (exact count may vary based on nesting support) + // Should find nested functions - at minimum the outer function const functions = entities.filter((e) => e.type === 'function') - expect(functions.length).toBeGreaterThan(0) - // At minimum the outer function should be found + expect(functions.length).toBeGreaterThanOrEqual(1) expect(functions.some((f) => f.name === 'level0')).toBe(true) }) - test('extractByNodeTypes extracts entities correctly', async () => { - const code = ` -function foo() { return 1 } + test('extractByNodeTypes extracts entities with exact counts', async () => { + const code = `function foo() { return 1 } class Bar { baz() { return 2 } -} -` +}` const result = await parseCode(code, 'typescript') const entities = await Effect.runPromise( extractByNodeTypes(result.tree.rootNode, 'typescript', code), ) - expect(entities.find((e) => e.name === 'foo')).toBeDefined() - expect(entities.find((e) => e.name === 'Bar')).toBeDefined() - expect(entities.find((e) => e.name === 'baz')).toBeDefined() + expect(entities).toHaveLength(3) // 1 function + 1 class + 1 method + + expect(entities.find((e) => e.name === 'foo')).toMatchObject({ + name: 'foo', + type: 'function', + }) + expect(entities.find((e) => e.name === 'Bar')).toMatchObject({ + name: 'Bar', + type: 'class', + }) + expect(entities.find((e) => e.name === 'baz')).toMatchObject({ + name: 'baz', + type: 'method', + parent: 'Bar', + }) }) test('getEntityType maps node types correctly', () => { @@ -386,10 +481,12 @@ class Bar { expect(getEntityType('method_definition')).toBe('method') expect(getEntityType('class_declaration')).toBe('class') expect(getEntityType('interface_declaration')).toBe('interface') + expect(getEntityType('type_alias_declaration')).toBe('type') + expect(getEntityType('enum_declaration')).toBe('enum') expect(getEntityType('unknown_type')).toBeNull() }) - test('ENTITY_NODE_TYPES contains all supported languages', () => { + test('ENTITY_NODE_TYPES contains all supported languages with entries', () => { const languages: Language[] = [ 'typescript', 'javascript', @@ -401,7 +498,7 @@ class Bar { for (const lang of languages) { expect(ENTITY_NODE_TYPES[lang]).toBeDefined() - expect(ENTITY_NODE_TYPES[lang].length).toBeGreaterThan(0) + expect(ENTITY_NODE_TYPES[lang].length).toBeGreaterThanOrEqual(3) } }) }) @@ -411,7 +508,7 @@ class Bar { // ============================================================================ describe('signature extraction', () => { - test('extracts TypeScript function signature', async () => { + test('extracts TypeScript function signature exactly', async () => { const code = `function greet(name: string): string { return \`Hello, \${name}!\` }` @@ -449,9 +546,7 @@ describe('signature extraction', () => { extractSignature(fnNode, 'function', 'typescript', code), ) - // Should include the generic parameter - expect(signature).toContain('') - expect(signature).toContain('identity') + expect(signature).toBe('function identity(arg: T): T') }) test('handles comparison operators in signatures (angle bracket fix)', async () => { @@ -466,11 +561,10 @@ describe('signature extraction', () => { extractSignature(fnNode, 'function', 'typescript', code), ) - // Should extract signature correctly without being confused by < in body expect(signature).toBe('function compare(a: number, b: number): boolean') }) - test('extracts class signature', async () => { + test('extracts class signature with extends and implements', async () => { const code = `class Calculator extends Base implements ICalc { add(a: number, b: number): number { return a + b @@ -483,9 +577,7 @@ describe('signature extraction', () => { extractSignature(classNode, 'class', 'typescript', code), ) - expect(signature).toContain('class Calculator') - expect(signature).toContain('extends Base') - expect(signature).toContain('implements ICalc') + expect(signature).toBe('class Calculator extends Base implements ICalc') }) test('cleans multi-line signatures to single line', async () => { @@ -503,12 +595,10 @@ describe('signature extraction', () => { extractSignature(fnNode, 'function', 'typescript', code), ) - // Should not contain newlines expect(signature).not.toContain('\n') - // Should have all params - expect(signature).toContain('param1') - expect(signature).toContain('param2') - expect(signature).toContain('param3') + expect(signature).toBe( + 'function multiLine( param1: string, param2: number, param3: boolean ): void', + ) }) test('extractName finds identifier in node', async () => { @@ -526,7 +616,7 @@ describe('signature extraction', () => { // ============================================================================ describe('docstring extraction', () => { - test('extracts JSDoc for TypeScript function', async () => { + test('extracts JSDoc for TypeScript function with exact content', async () => { const code = `/** * Greet someone by name. * @param name The name to greet @@ -541,11 +631,12 @@ function greet(name: string): string { extractDocstring(fnNode, 'typescript', code), ) - expect(docstring).toContain('Greet someone by name') - expect(docstring).toContain('@param name') + expect(docstring).toBe( + 'Greet someone by name.\n@param name The name to greet', + ) }) - test('extracts Python docstring from function body', async () => { + test('extracts Python docstring from function body with exact content', async () => { const code = `def greet(name): """ Say hello to someone. @@ -561,11 +652,12 @@ function greet(name: string): string { extractDocstring(fnNode, 'python', code), ) - expect(docstring).toContain('Say hello to someone') + expect(docstring).toContain('Say hello to someone.') expect(docstring).toContain('Args:') + expect(docstring).toContain('name: The person to greet') }) - test('extracts Rust doc comment', async () => { + test('extracts Rust doc comment with exact content', async () => { const code = `/// Add two numbers together. /// Returns the sum. fn add(a: i32, b: i32) -> i32 { @@ -577,16 +669,17 @@ fn add(a: i32, b: i32) -> i32 { (n) => n.type === 'function_item', ) - if (fnNode) { - const docstring = await Effect.runPromise( - extractDocstring(fnNode, 'rust', code), - ) + expect(fnNode).not.toBeUndefined() + const docstring = await Effect.runPromise( + extractDocstring(fnNode!, 'rust', code), + ) - expect(docstring).toContain('Add two numbers') - } + // Rust doc comments may have blank line between comment lines + expect(docstring).toContain('Add two numbers together.') + expect(docstring).toContain('Returns the sum.') }) - test('extracts Go comment', async () => { + test('extracts Go comment with exact content', async () => { const code = `// Add returns the sum of a and b. func Add(a, b int) int { return a + b @@ -596,16 +689,15 @@ func Add(a, b int) int { (n) => n.type === 'function_declaration', ) - if (fnNode) { - const docstring = await Effect.runPromise( - extractDocstring(fnNode, 'go', code), - ) + expect(fnNode).not.toBeUndefined() + const docstring = await Effect.runPromise( + extractDocstring(fnNode!, 'go', code), + ) - expect(docstring).toContain('Add returns the sum') - } + expect(docstring).toBe('Add returns the sum of a and b.') }) - test('extracts Javadoc', async () => { + test('extracts Javadoc with exact content', async () => { const code = `/** * Add two integers. * @param a First number @@ -620,13 +712,15 @@ public int add(int a, int b) { (n) => n.type === 'method_declaration', ) - if (methodNode) { - const docstring = await Effect.runPromise( - extractDocstring(methodNode, 'java', code), - ) + expect(methodNode).not.toBeUndefined() + const docstring = await Effect.runPromise( + extractDocstring(methodNode!, 'java', code), + ) - expect(docstring).toContain('Add two integers') - } + expect(docstring).toContain('Add two integers.') + expect(docstring).toContain('@param a First number') + expect(docstring).toContain('@param b Second number') + expect(docstring).toContain('@return The sum') }) test('returns null when no docstring present', async () => { @@ -691,8 +785,7 @@ describe('parseDocstring', () => { expect(parsed).not.toContain('/**') expect(parsed).not.toContain('*/') - expect(parsed).toContain('This is a description') - expect(parsed).toContain('@param name') + expect(parsed).toBe('This is a description.\n@param name The name') }) test('parses Python docstring and dedents', () => { @@ -704,6 +797,7 @@ describe('parseDocstring', () => { expect(parsed).not.toContain('"""') expect(parsed).toContain('This is indented') + expect(parsed).toContain('So is this') // Should be dedented expect(parsed).not.toMatch(/^\s{4}This/) }) @@ -714,8 +808,7 @@ describe('parseDocstring', () => { const parsed = parseDocstring(input, 'rust') expect(parsed).not.toContain('///') - expect(parsed).toContain('First line') - expect(parsed).toContain('Second line') + expect(parsed).toBe('First line.\nSecond line.') }) test('parses Go comments and removes //', () => { @@ -724,8 +817,7 @@ describe('parseDocstring', () => { const parsed = parseDocstring(input, 'go') expect(parsed).not.toContain('//') - expect(parsed).toContain('First line') - expect(parsed).toContain('Second line') + expect(parsed).toBe('First line.\nSecond line.') }) }) @@ -766,26 +858,52 @@ describe('extraction edge cases', () => { expect(Array.isArray(entities)).toBe(true) }) - test('handles arrow functions - no crash', async () => { - // Arrow functions may or may not be extracted depending on query patterns - // The key is the system handles them without crashing - const code = `const add = (a: number, b: number) => a + b` + test('handles async functions with exact signature', async () => { + const code = `async function fetchData(): Promise { + return await fetch('/api') +}` const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) - // Should not throw + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'fetchData', + type: 'function', + signature: 'async function fetchData(): Promise', + parent: null, + }) + }) + + test('handles async generator functions', async () => { + const code = `async function* generateData(): AsyncGenerator { + yield 1 + yield 2 +}` + const result = await parseCode(code, 'typescript') const entities = await extractEntitiesAsync( result.tree.rootNode, 'typescript', code, ) - // Result should be an array (may be empty if arrow function isn't captured) - expect(Array.isArray(entities)).toBe(true) + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'generateData', + type: 'function', + parent: null, + }) + expect(entities[0].signature).toContain('async') + expect(entities[0].signature).toContain('generateData') }) - test('handles async functions', async () => { - const code = `async function fetchData(): Promise { - return await fetch('/api') + test('handles generator functions', async () => { + const code = `function* myGenerator(): Generator { + yield 1 + yield 2 }` const result = await parseCode(code, 'typescript') const entities = await extractEntitiesAsync( @@ -794,12 +912,15 @@ describe('extraction edge cases', () => { code, ) - const fn = entities.find((e) => e.name === 'fetchData') - expect(fn).toBeDefined() - expect(fn?.signature).toContain('async') + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'myGenerator', + type: 'function', + parent: null, + }) }) - test('handles export declarations', async () => { + test('handles export declarations with exact counts', async () => { const code = `export function publicFn() { return 1 } export default function defaultFn() { return 2 }` const result = await parseCode(code, 'typescript') @@ -809,7 +930,19 @@ export default function defaultFn() { return 2 }` code, ) - expect(entities.length).toBeGreaterThan(0) + expect(entities.length).toBeGreaterThanOrEqual(2) + + const publicFn = entities.find((e) => e.name === 'publicFn') + expect(publicFn).toMatchObject({ + name: 'publicFn', + type: 'function', + }) + + const defaultFn = entities.find((e) => e.name === 'defaultFn') + expect(defaultFn).toMatchObject({ + name: 'defaultFn', + type: 'function', + }) }) test('handles empty file', async () => { @@ -822,6 +955,7 @@ export default function defaultFn() { return 2 }` ) expect(entities).toEqual([]) + expect(entities).toHaveLength(0) }) test('handles file with only comments', async () => { @@ -835,6 +969,7 @@ export default function defaultFn() { return 2 }` ) expect(entities).toEqual([]) + expect(entities).toHaveLength(0) }) }) @@ -870,6 +1005,15 @@ describe('extractImportSource', () => { expect(source).toBe('path') }) + test('extracts TypeScript type-only import source', async () => { + const code = `import type { Option } from 'effect/Option'` + const result = await parseCode(code, 'typescript') + const importNode = result.tree.rootNode.namedChildren[0] + + const source = extractImportSource(importNode, 'typescript') + expect(source).toBe('effect/Option') + }) + test('extracts JavaScript import source', async () => { const code = `import { useState } from 'react'` const result = await parseCode(code, 'javascript') @@ -912,7 +1056,7 @@ describe('extractImportSource', () => { const useNode = result.tree.rootNode.namedChildren[0] const source = extractImportSource(useNode, 'rust') - expect(source).toContain('std::collections') + expect(source).toBe('std::collections::HashMap') }) test('extracts Go import source', async () => { @@ -924,10 +1068,9 @@ import "fmt"` (n) => n.type === 'import_declaration', ) - if (importNode) { - const source = extractImportSource(importNode, 'go') - expect(source).toBe('fmt') - } + expect(importNode).not.toBeUndefined() + const source = extractImportSource(importNode!, 'go') + expect(source).toBe('fmt') }) test('extracts Java import source', async () => { @@ -939,7 +1082,7 @@ import "fmt"` expect(source).toBe('java.util.List') }) - test('import entities have source field populated', async () => { + test('import entities have source field populated with exact values', async () => { const code = `import { Effect } from 'effect' import type { Option } from 'effect/Option' @@ -952,12 +1095,581 @@ function test() { return 1 }` ) const imports = entities.filter((e) => e.type === 'import') - expect(imports.length).toBeGreaterThan(0) + expect(imports).toHaveLength(2) + + // Verify each import has proper source + const effectImport = imports.find((i) => i.name === 'Effect') + expect(effectImport).toMatchObject({ + name: 'Effect', + type: 'import', + source: 'effect', + }) + + const optionImport = imports.find((i) => i.name === 'Option') + expect(optionImport).toMatchObject({ + name: 'Option', + type: 'import', + source: 'effect/Option', + }) + }) +}) + +// ============================================================================ +// Byte/Line Range Verification Tests +// ============================================================================ + +describe('byte and line range verification', () => { + test('verifies byte offsets cover correct content for function', async () => { + const code = `function test() { + return 42 +}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(1) + // Verify the extracted range covers the full code + const extractedText = code.slice( + entities[0].byteRange.start, + entities[0].byteRange.end, + ) + expect(extractedText).toBe(code) + expect(entities[0].byteRange.start).toBe(0) + }) + + test('verifies line ranges for class with methods', async () => { + const code = `class Foo { + bar() {} + baz() {} +}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(3) + + const cls = entities.find((e) => e.name === 'Foo') + expect(cls?.lineRange).toEqual({ start: 0, end: 3 }) + + const bar = entities.find((e) => e.name === 'bar') + expect(bar?.lineRange).toEqual({ start: 1, end: 1 }) + + const baz = entities.find((e) => e.name === 'baz') + expect(baz?.lineRange).toEqual({ start: 2, end: 2 }) + }) + + test('verifies byte ranges for multiple top-level functions', async () => { + const code = `function a() {} +function b() {}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(2) + + const fnA = entities.find((e) => e.name === 'a') + const fnB = entities.find((e) => e.name === 'b') + + // fnA should start at 0 + expect(fnA?.byteRange.start).toBe(0) + // fnA should end before fnB starts + expect(fnA?.byteRange.end).toBeLessThanOrEqual(fnB?.byteRange.start) + // fnB should end at code.length + expect(fnB?.byteRange.end).toBe(code.length) + + // Verify line ranges + expect(fnA?.lineRange).toEqual({ start: 0, end: 0 }) + expect(fnB?.lineRange).toEqual({ start: 1, end: 1 }) + }) + + test('verifies line range for multi-line interface', async () => { + const code = `interface Config { + host: string + port: number + debug: boolean +}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'Config', + type: 'interface', + }) + expect(entities[0].lineRange).toEqual({ start: 0, end: 4 }) + }) +}) + +// ============================================================================ +// Parent Relationship Tests +// ============================================================================ + +describe('parent relationship accuracy', () => { + test('method has correct parent class', async () => { + const code = `class Container { + method1() {} + method2() {} +}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(3) + + const method1 = entities.find((e) => e.name === 'method1') + expect(method1?.parent).toBe('Container') + + const method2 = entities.find((e) => e.name === 'method2') + expect(method2?.parent).toBe('Container') + + const container = entities.find((e) => e.name === 'Container') + expect(container?.parent).toBeNull() + }) + + test('top-level functions have null parent', async () => { + const code = `function topLevel1() {} +function topLevel2() {}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(2) + expect(entities[0].parent).toBeNull() + expect(entities[1].parent).toBeNull() + }) + + test('Python method has correct parent class', async () => { + const code = `class MyClass: + def my_method(self): + pass` + const result = await parseCode(code, 'python') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'python', + code, + ) - // Each import should have source populated + expect(entities).toHaveLength(2) + + // Python methods are extracted as 'function' type with parent set + const method = entities.find((e) => e.name === 'my_method') + expect(method?.type).toBe('function') + expect(method?.parent).toBe('MyClass') + }) +}) + +// ============================================================================ +// Type Alias and Enum Tests +// ============================================================================ + +describe('type alias and enum extraction', () => { + test('extracts TypeScript type alias', async () => { + const code = `type UserId = string` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'UserId', + type: 'type', + }) + expect(entities[0].byteRange.start).toBe(0) + expect(entities[0].byteRange.end).toBe(code.length) + expect(entities[0].lineRange).toEqual({ start: 0, end: 0 }) + }) + + test('extracts TypeScript enum', async () => { + const code = `enum Status { + Active, + Inactive +}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'Status', + type: 'enum', + signature: 'enum Status', + }) + expect(entities[0].lineRange).toEqual({ start: 0, end: 3 }) + }) + + test('extracts Rust struct', async () => { + const code = `struct Point { + x: i32, + y: i32, +}` + const result = await parseCode(code, 'rust') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'rust', + code, + ) + + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'Point', + type: 'type', + }) + }) + + test('extracts Rust enum', async () => { + const code = `enum Direction { + Up, + Down, + Left, + Right, +}` + const result = await parseCode(code, 'rust') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'rust', + code, + ) + + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'Direction', + type: 'enum', + }) + }) +}) + +// ============================================================================ +// Entity Count Verification Tests +// ============================================================================ + +describe('entity count verification', () => { + test('extracts exact count for complex TypeScript file', async () => { + const code = `import { Effect } from 'effect' + +interface Options { + timeout: number +} + +type Result = string | number + +class Service { + constructor() {} + + process() { + return 1 + } +} + +function helper() { + return 'help' +} + +enum Status { + Active, + Inactive +}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + // Expected: 1 import + 1 interface + 1 type + 1 class + 2 methods + 1 function + 1 enum = 8 + const imports = entities.filter((e) => e.type === 'import') + const interfaces = entities.filter((e) => e.type === 'interface') + const types = entities.filter((e) => e.type === 'type') + const classes = entities.filter((e) => e.type === 'class') + const methods = entities.filter((e) => e.type === 'method') + const functions = entities.filter((e) => e.type === 'function') + const enums = entities.filter((e) => e.type === 'enum') + + expect(imports).toHaveLength(1) + expect(interfaces).toHaveLength(1) + expect(types).toHaveLength(1) + expect(classes).toHaveLength(1) + expect(methods).toHaveLength(2) // constructor + process + expect(functions).toHaveLength(1) + expect(enums).toHaveLength(1) + + expect(entities).toHaveLength(8) + }) + + test('extracts exact count for Python module', async () => { + const code = `from typing import Optional + +class DataProcessor: + def __init__(self): + pass + + def process(self, data): + return data + +def helper(x): + return x * 2` + const result = await parseCode(code, 'python') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'python', + code, + ) + + const imports = entities.filter((e) => e.type === 'import') + const classes = entities.filter((e) => e.type === 'class') + // Python methods are extracted as functions with parent set + const methods = entities.filter( + (e) => e.type === 'function' && e.parent !== null, + ) + const topLevelFunctions = entities.filter( + (e) => e.type === 'function' && e.parent === null, + ) + + // Python 'from X import Y' extracts both module and imported symbol + expect(imports).toHaveLength(2) // typing + Optional + expect(classes).toHaveLength(1) + expect(methods).toHaveLength(2) // __init__ + process (as functions with parent) + expect(topLevelFunctions).toHaveLength(1) // helper + }) +}) + +// ============================================================================ +// Decorated Function Tests +// ============================================================================ + +describe('decorated functions', () => { + test('extracts Python decorated function', async () => { + const code = `@decorator +def decorated_func(): + pass` + const result = await parseCode(code, 'python') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'python', + code, + ) + + // Should extract the decorated function + expect(entities.length).toBeGreaterThanOrEqual(1) + const fn = entities.find((e) => e.type === 'function') + expect(fn).toMatchObject({ + name: 'decorated_func', + type: 'function', + }) + }) + + test('extracts Python decorated method in class', async () => { + const code = `class MyClass: + @staticmethod + def static_method(): + pass + + @classmethod + def class_method(cls): + pass` + const result = await parseCode(code, 'python') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'python', + code, + ) + + expect(entities).toHaveLength(3) // 1 class + 2 functions (Python methods are extracted as functions) + + // Python methods inside classes are extracted as 'function' type with parent set + const staticMethod = entities.find((e) => e.name === 'static_method') + expect(staticMethod).toMatchObject({ + type: 'function', + parent: 'MyClass', + }) + + const classMethod = entities.find((e) => e.name === 'class_method') + expect(classMethod).toMatchObject({ + type: 'function', + parent: 'MyClass', + }) + }) +}) + +// ============================================================================ +// Entity Properties Completeness Tests +// ============================================================================ + +describe('entity properties completeness', () => { + test('extracted entities have all required properties', async () => { + const code = `function test() { + return 1 +}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(1) + const entity = entities[0] + + // Verify all required properties exist + expect(entity.name).toBe('test') + expect(entity.type).toBe('function') + expect(entity.signature).toBe('function test()') + expect(entity.docstring).toBeNull() + expect(entity.parent).toBeNull() + expect(entity.byteRange).toEqual({ + start: 0, + end: code.length, + }) + expect(entity.lineRange).toEqual({ + start: 0, + end: 2, + }) + expect(entity.node).toBeDefined() + }) + + test('import entities have source property', async () => { + const code = `import { foo } from 'bar'` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(1) + expect(entities[0]).toMatchObject({ + name: 'foo', + type: 'import', + source: 'bar', + }) + }) +}) + +// ============================================================================ +// Multiple Import Symbols Tests +// ============================================================================ + +describe('multiple import symbols extraction', () => { + test('extracts all named imports from single statement', async () => { + const code = `import { foo, bar, baz } from 'my-module'` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + const imports = entities.filter((e) => e.type === 'import') + expect(imports).toHaveLength(3) + expect(imports.map((i) => i.name).sort()).toEqual(['bar', 'baz', 'foo']) + + // All should have the same source for (const imp of imports) { - expect(imp.source).toBeDefined() - expect(imp.source).not.toBe('') + expect(imp.source).toBe('my-module') } }) + + test('extracts imports from multiple import statements', async () => { + const code = `import { a } from 'module-a' +import { b, c } from 'module-b' +import d from 'module-d'` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + const imports = entities.filter((e) => e.type === 'import') + expect(imports).toHaveLength(4) + + const importA = imports.find((i) => i.name === 'a') + expect(importA?.source).toBe('module-a') + + const importB = imports.find((i) => i.name === 'b') + expect(importB?.source).toBe('module-b') + + const importC = imports.find((i) => i.name === 'c') + expect(importC?.source).toBe('module-b') + + const importD = imports.find((i) => i.name === 'd') + expect(importD?.source).toBe('module-d') + }) +}) + +// ============================================================================ +// Ordering Guarantees Tests +// ============================================================================ + +describe('entity ordering', () => { + test('entities are ordered by appearance in source', async () => { + const code = `function first() {} +function second() {} +function third() {}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(3) + + // Verify byteRange ordering + for (let i = 0; i < entities.length - 1; i++) { + expect(entities[i].byteRange.start).toBeLessThan( + entities[i + 1].byteRange.start, + ) + } + + // Verify name ordering matches source order + expect(entities.map((e) => e.name)).toEqual(['first', 'second', 'third']) + }) + + test('class appears before its methods in entity list', async () => { + const code = `class MyClass { + methodA() {} + methodB() {} +}` + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + expect(entities).toHaveLength(3) + + // Class should come first + expect(entities[0].type).toBe('class') + expect(entities[0].name).toBe('MyClass') + + // Methods follow + const methodNames = entities.slice(1).map((e) => e.name) + expect(methodNames).toEqual(['methodA', 'methodB']) + }) }) diff --git a/test/integration.test.ts b/test/integration.test.ts new file mode 100644 index 0000000..ea77401 --- /dev/null +++ b/test/integration.test.ts @@ -0,0 +1,1084 @@ +import { describe, expect, test } from 'bun:test' +import { type Chunk, chunk, chunkStream, type Language } from '../src' + +// ============================================================================ +// Integration Tests - End-to-End Flow for All Supported Languages +// ============================================================================ + +describe('integration: TypeScript', () => { + test('full pipeline with class, methods, imports, and docstrings', async () => { + const code = `import { Database } from './db' +import { Logger } from './utils' + +/** + * Service for managing user accounts. + * Handles CRUD operations and authentication. + */ +export class UserService { + private db: Database + private logger: Logger + + constructor(db: Database, logger: Logger) { + this.db = db + this.logger = logger + } + + /** + * Fetch a user by their unique ID. + * @param id - The user's unique identifier + * @returns The user object or null if not found + */ + async getUser(id: string): Promise { + this.logger.info(\`Fetching user: \${id}\`) + return this.db.query('SELECT * FROM users WHERE id = ?', [id]) + } + + /** + * Create a new user account. + * @param data - The user data to insert + * @returns The created user with generated ID + */ + async createUser(data: CreateUserInput): Promise { + this.logger.info('Creating new user') + const result = await this.db.insert('users', data) + return { id: result.insertId, ...data } + } + + /** + * Delete a user by ID. + * @param id - The user's unique identifier + */ + async deleteUser(id: string): Promise { + this.logger.warn(\`Deleting user: \${id}\`) + await this.db.delete('users', { id }) + } +} + +/** + * Helper function to validate user input. + */ +function validateUserInput(input: unknown): input is CreateUserInput { + return typeof input === 'object' && input !== null && 'email' in input +}` + + const filepath = 'services/user.ts' + const chunks = await chunk(filepath, code, { + maxChunkSize: 500, + siblingDetail: 'signatures', + filterImports: true, + }) + + // Validate basic structure + expect(chunks.length).toBeGreaterThan(1) + expect(chunks.length).toBeLessThanOrEqual(10) + + // All chunks have correct metadata + for (const c of chunks) { + expect(c.context.filepath).toBe(filepath) + expect(c.context.language).toBe('typescript') + } + + // UserService class is detected + const hasUserService = chunks.some( + (c) => + c.context.entities.some((e) => e.name === 'UserService') || + c.context.scope.some((s) => s.name === 'UserService'), + ) + expect(hasUserService).toBe(true) + + // Methods are detected + const allEntities = chunks.flatMap((c) => c.context.entities) + const methodNames = allEntities + .filter((e) => e.type === 'method') + .map((e) => e.name) + expect(methodNames).toContain('getUser') + expect(methodNames).toContain('createUser') + expect(methodNames).toContain('deleteUser') + + // Imports are captured + const allImports = chunks.flatMap((c) => c.context.imports) + expect(allImports.length).toBeGreaterThan(0) + + // Chunks don't overlap + const sortedChunks = [...chunks].sort( + (a, b) => a.byteRange.start - b.byteRange.start, + ) + let lastEnd = sortedChunks[0]?.byteRange.start ?? 0 + for (const c of sortedChunks) { + expect(c.byteRange.start).toBeGreaterThanOrEqual(lastEnd) + lastEnd = c.byteRange.end + } + + // Text matches byte range + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + }) + + test('interface and type alias extraction', async () => { + const code = `interface User { + id: string + name: string + email: string +} + +type UserRole = 'admin' | 'user' | 'guest' + +interface UserWithRole extends User { + role: UserRole +} + +function createUser(data: Omit): User { + return { ...data, id: crypto.randomUUID() } +}` + + const chunks = await chunk('types.ts', code) + + expect(chunks.length).toBeGreaterThan(0) + + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + + expect(entityNames).toContain('User') + expect(entityNames).toContain('UserWithRole') + expect(entityNames).toContain('createUser') + + // Verify interface type + const userInterface = allEntities.find((e) => e.name === 'User') + expect(userInterface?.type).toBe('interface') + }) + + test('arrow functions and const declarations', async () => { + const code = `const add = (a: number, b: number): number => a + b + +const multiply = (a: number, b: number): number => { + return a * b +} + +const API_URL = 'https://api.example.com' + +export const fetchData = async (endpoint: string): Promise => { + const response = await fetch(\`\${API_URL}/\${endpoint}\`) + return response.json() +}` + + const chunks = await chunk('utils.ts', code) + + expect(chunks.length).toBeGreaterThan(0) + + // Verify text reconstruction + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + }) +}) + +describe('integration: JavaScript', () => { + test('full pipeline with class, methods, and JSDoc', async () => { + const code = `import { EventEmitter } from 'events' + +/** + * A simple event-driven calculator. + * @extends EventEmitter + */ +class Calculator extends EventEmitter { + constructor() { + super() + this.result = 0 + } + + /** + * Add a number to the result. + * @param {number} n - The number to add + * @returns {number} The new result + */ + add(n) { + this.result += n + this.emit('change', this.result) + return this.result + } + + /** + * Subtract a number from the result. + * @param {number} n - The number to subtract + * @returns {number} The new result + */ + subtract(n) { + this.result -= n + this.emit('change', this.result) + return this.result + } + + /** + * Reset the calculator. + */ + reset() { + this.result = 0 + this.emit('reset') + } +} + +module.exports = { Calculator }` + + const chunks = await chunk('calculator.js', code) + + expect(chunks.length).toBeGreaterThan(0) + + // All chunks have correct metadata + for (const c of chunks) { + expect(c.context.language).toBe('javascript') + } + + // Class and methods detected + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + + expect(entityNames).toContain('Calculator') + + const methods = allEntities.filter((e) => e.type === 'method') + expect(methods.map((m) => m.name)).toContain('add') + expect(methods.map((m) => m.name)).toContain('subtract') + expect(methods.map((m) => m.name)).toContain('reset') + + // Text matches byte range + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + }) + + test('ES modules with default and named exports', async () => { + const code = `export const VERSION = '1.0.0' + +export function greet(name) { + return \`Hello, \${name}!\` +} + +export default class App { + constructor(config) { + this.config = config + } + + start() { + console.log('App started') + } +}` + + const chunks = await chunk('app.js', code) + + expect(chunks.length).toBeGreaterThan(0) + + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + + expect(entityNames).toContain('greet') + expect(entityNames).toContain('App') + }) +}) + +describe('integration: Python', () => { + test('full pipeline with class, methods, and docstrings', async () => { + const code = `from typing import Optional, List +from dataclasses import dataclass + +@dataclass +class User: + """Represents a user in the system.""" + id: int + name: str + email: str + +class UserRepository: + """Repository for managing user data.""" + + def __init__(self, db_connection): + """Initialize the repository with a database connection. + + Args: + db_connection: The database connection to use. + """ + self.db = db_connection + self._cache = {} + + def get_user(self, user_id: int) -> Optional[User]: + """Fetch a user by ID. + + Args: + user_id: The unique identifier of the user. + + Returns: + The User object if found, None otherwise. + """ + if user_id in self._cache: + return self._cache[user_id] + return self.db.query(User, user_id) + + def get_all_users(self) -> List[User]: + """Fetch all users from the database. + + Returns: + A list of all User objects. + """ + return self.db.query_all(User) + + def save_user(self, user: User) -> None: + """Save a user to the database. + + Args: + user: The User object to save. + """ + self.db.save(user) + self._cache[user.id] = user + + +def create_default_user() -> User: + """Create a default user for testing.""" + return User(id=0, name="Default", email="default@example.com")` + + const chunks = await chunk('repository.py', code) + + expect(chunks.length).toBeGreaterThan(0) + + // All chunks have correct metadata + for (const c of chunks) { + expect(c.context.language).toBe('python') + } + + // Classes detected + const allEntities = chunks.flatMap((c) => c.context.entities) + const classNames = allEntities + .filter((e) => e.type === 'class') + .map((e) => e.name) + expect(classNames).toContain('User') + expect(classNames).toContain('UserRepository') + + // Methods detected (Python extracts methods as 'function' type) + const pythonFunctions = allEntities.filter((e) => e.type === 'function') + const functionNames = pythonFunctions.map((f) => f.name) + expect(functionNames).toContain('__init__') + expect(functionNames).toContain('get_user') + expect(functionNames).toContain('get_all_users') + expect(functionNames).toContain('save_user') + + // Standalone function also detected + expect(functionNames).toContain('create_default_user') + + // Text matches byte range + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + }) + + test('async functions and decorators', async () => { + const code = `import asyncio +from functools import lru_cache + +@lru_cache(maxsize=100) +def fibonacci(n: int) -> int: + """Calculate the nth Fibonacci number with caching.""" + if n < 2: + return n + return fibonacci(n - 1) + fibonacci(n - 2) + +async def fetch_data(url: str) -> dict: + """Fetch data from a URL asynchronously.""" + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + return await response.json() + +async def main(): + """Main entry point.""" + data = await fetch_data("https://api.example.com/data") + print(data)` + + const chunks = await chunk('async_utils.py', code) + + expect(chunks.length).toBeGreaterThan(0) + + const allEntities = chunks.flatMap((c) => c.context.entities) + const functionNames = allEntities + .filter((e) => e.type === 'function') + .map((e) => e.name) + + expect(functionNames).toContain('fibonacci') + expect(functionNames).toContain('fetch_data') + expect(functionNames).toContain('main') + }) +}) + +describe('integration: Rust', () => { + test('full pipeline with struct, impl, and traits', async () => { + const code = `use std::collections::HashMap; +use std::fmt; + +/// A simple key-value store. +#[derive(Debug, Clone)] +pub struct Store { + data: HashMap, + name: String, +} + +impl Store { + /// Create a new empty store. + pub fn new(name: &str) -> Self { + Store { + data: HashMap::new(), + name: name.to_string(), + } + } + + /// Get a value by key. + pub fn get(&self, key: &str) -> Option<&T> { + self.data.get(key) + } + + /// Set a value for a key. + pub fn set(&mut self, key: String, value: T) { + self.data.insert(key, value); + } + + /// Remove a value by key. + pub fn remove(&mut self, key: &str) -> Option { + self.data.remove(key) + } + + /// Get the number of items in the store. + pub fn len(&self) -> usize { + self.data.len() + } + + /// Check if the store is empty. + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } +} + +impl fmt::Display for Store { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Store '{}' with {} items", self.name, self.data.len()) + } +} + +/// A helper function to create a store with initial values. +pub fn create_store_with(name: &str, items: Vec<(String, T)>) -> Store { + let mut store = Store::new(name); + for (key, value) in items { + store.set(key, value); + } + store +}` + + const chunks = await chunk('store.rs', code) + + expect(chunks.length).toBeGreaterThan(0) + + // All chunks have correct metadata + for (const c of chunks) { + expect(c.context.language).toBe('rust') + } + + // Entities detected (Rust struct may be extracted as 'type' or 'class') + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + expect(entityNames).toContain('Store') + + // Functions detected + const functions = allEntities.filter((e) => e.type === 'function') + const functionNames = functions.map((f) => f.name) + expect(functionNames).toContain('new') + expect(functionNames).toContain('get') + expect(functionNames).toContain('set') + expect(functionNames).toContain('create_store_with') + + // Text matches byte range + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + }) + + test('enums and match expressions', async () => { + const code = `/// Represents the status of an operation. +#[derive(Debug, Clone, PartialEq)] +pub enum Status { + Pending, + Running, + Completed(String), + Failed(String), +} + +impl Status { + /// Check if the status represents a terminal state. + pub fn is_terminal(&self) -> bool { + matches!(self, Status::Completed(_) | Status::Failed(_)) + } + + /// Get a human-readable description. + pub fn description(&self) -> &str { + match self { + Status::Pending => "Waiting to start", + Status::Running => "In progress", + Status::Completed(_) => "Finished successfully", + Status::Failed(_) => "Finished with error", + } + } +}` + + const chunks = await chunk('status.rs', code) + + expect(chunks.length).toBeGreaterThan(0) + + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + + expect(entityNames).toContain('Status') + expect(entityNames).toContain('is_terminal') + expect(entityNames).toContain('description') + }) +}) + +describe('integration: Go', () => { + test('full pipeline with struct, methods, and interfaces', async () => { + const code = `package repository + +import ( + "context" + "database/sql" + "errors" + "time" +) + +// ErrNotFound is returned when an entity is not found. +var ErrNotFound = errors.New("entity not found") + +// User represents a user in the system. +type User struct { + ID int64 + Name string + Email string + CreatedAt time.Time + UpdatedAt time.Time +} + +// UserRepository defines the interface for user data access. +type UserRepository interface { + GetByID(ctx context.Context, id int64) (*User, error) + GetAll(ctx context.Context) ([]*User, error) + Create(ctx context.Context, user *User) error + Update(ctx context.Context, user *User) error + Delete(ctx context.Context, id int64) error +} + +// SQLUserRepository implements UserRepository using SQL. +type SQLUserRepository struct { + db *sql.DB +} + +// NewSQLUserRepository creates a new SQL-backed user repository. +func NewSQLUserRepository(db *sql.DB) *SQLUserRepository { + return &SQLUserRepository{db: db} +} + +// GetByID fetches a user by their ID. +func (r *SQLUserRepository) GetByID(ctx context.Context, id int64) (*User, error) { + user := &User{} + err := r.db.QueryRowContext(ctx, "SELECT id, name, email, created_at, updated_at FROM users WHERE id = ?", id). + Scan(&user.ID, &user.Name, &user.Email, &user.CreatedAt, &user.UpdatedAt) + if err == sql.ErrNoRows { + return nil, ErrNotFound + } + return user, err +} + +// GetAll fetches all users. +func (r *SQLUserRepository) GetAll(ctx context.Context) ([]*User, error) { + rows, err := r.db.QueryContext(ctx, "SELECT id, name, email, created_at, updated_at FROM users") + if err != nil { + return nil, err + } + defer rows.Close() + + var users []*User + for rows.Next() { + user := &User{} + if err := rows.Scan(&user.ID, &user.Name, &user.Email, &user.CreatedAt, &user.UpdatedAt); err != nil { + return nil, err + } + users = append(users, user) + } + return users, rows.Err() +}` + + const chunks = await chunk('repository.go', code) + + expect(chunks.length).toBeGreaterThan(0) + + // All chunks have correct metadata + for (const c of chunks) { + expect(c.context.language).toBe('go') + } + + // Types detected + const allEntities = chunks.flatMap((c) => c.context.entities) + const typeNames = allEntities.map((e) => e.name) + + expect(typeNames).toContain('User') + expect(typeNames).toContain('UserRepository') + expect(typeNames).toContain('SQLUserRepository') + + // Functions detected (standalone functions) + const goFunctions = allEntities.filter((e) => e.type === 'function') + const goFunctionNames = goFunctions.map((f) => f.name) + expect(goFunctionNames).toContain('NewSQLUserRepository') + + // Methods detected (receiver functions) + const goMethods = allEntities.filter((e) => e.type === 'method') + const goMethodNames = goMethods.map((m) => m.name) + expect(goMethodNames).toContain('GetByID') + expect(goMethodNames).toContain('GetAll') + + // Text matches byte range + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + }) + + test('goroutines and channels', async () => { + const code = `package worker + +import ( + "context" + "sync" +) + +// Job represents a unit of work. +type Job struct { + ID int + Data string +} + +// Result represents the result of processing a job. +type Result struct { + JobID int + Value string + Error error +} + +// Worker processes jobs from a channel. +func Worker(ctx context.Context, id int, jobs <-chan Job, results chan<- Result, wg *sync.WaitGroup) { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + case job, ok := <-jobs: + if !ok { + return + } + result := processJob(job) + results <- result + } + } +} + +func processJob(job Job) Result { + return Result{ + JobID: job.ID, + Value: "processed: " + job.Data, + } +}` + + const chunks = await chunk('worker.go', code) + + expect(chunks.length).toBeGreaterThan(0) + + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + + expect(entityNames).toContain('Job') + expect(entityNames).toContain('Result') + expect(entityNames).toContain('Worker') + expect(entityNames).toContain('processJob') + }) +}) + +describe('integration: Java', () => { + test('full pipeline with class, methods, and annotations', async () => { + const code = `package com.example.service; + +import java.util.List; +import java.util.Optional; +import java.util.ArrayList; + +/** + * Service for managing user operations. + * + * @author Example + * @version 1.0 + */ +public class UserService { + private final UserRepository repository; + private final Logger logger; + + /** + * Creates a new UserService instance. + * + * @param repository the user repository + * @param logger the logger instance + */ + public UserService(UserRepository repository, Logger logger) { + this.repository = repository; + this.logger = logger; + } + + /** + * Find a user by their unique identifier. + * + * @param id the user's ID + * @return an Optional containing the user if found + */ + public Optional findById(Long id) { + logger.info("Finding user with id: " + id); + return repository.findById(id); + } + + /** + * Get all users in the system. + * + * @return a list of all users + */ + public List findAll() { + logger.info("Fetching all users"); + return repository.findAll(); + } + + /** + * Save a user to the database. + * + * @param user the user to save + * @return the saved user with generated ID + */ + public User save(User user) { + logger.info("Saving user: " + user.getName()); + return repository.save(user); + } + + /** + * Delete a user by their ID. + * + * @param id the user's ID + */ + public void deleteById(Long id) { + logger.warn("Deleting user with id: " + id); + repository.deleteById(id); + } +} + +/** + * Interface for user data access. + */ +interface UserRepository { + Optional findById(Long id); + List findAll(); + User save(User user); + void deleteById(Long id); +}` + + const chunks = await chunk('UserService.java', code) + + expect(chunks.length).toBeGreaterThan(0) + + // All chunks have correct metadata + for (const c of chunks) { + expect(c.context.language).toBe('java') + } + + // Class detected + const allEntities = chunks.flatMap((c) => c.context.entities) + const classNames = allEntities + .filter((e) => e.type === 'class') + .map((e) => e.name) + expect(classNames).toContain('UserService') + + // Methods detected + const methods = allEntities.filter((e) => e.type === 'method') + const methodNames = methods.map((m) => m.name) + expect(methodNames).toContain('findById') + expect(methodNames).toContain('findAll') + expect(methodNames).toContain('save') + expect(methodNames).toContain('deleteById') + + // Interface detected + const interfaces = allEntities.filter((e) => e.type === 'interface') + expect(interfaces.map((i) => i.name)).toContain('UserRepository') + + // Text matches byte range + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + }) + + test('enum and static methods', async () => { + const code = `package com.example.model; + +/** + * Represents the status of an order. + */ +public enum OrderStatus { + PENDING("Waiting for processing"), + PROCESSING("Being processed"), + SHIPPED("On the way"), + DELIVERED("Successfully delivered"), + CANCELLED("Order cancelled"); + + private final String description; + + OrderStatus(String description) { + this.description = description; + } + + public String getDescription() { + return description; + } + + public boolean isTerminal() { + return this == DELIVERED || this == CANCELLED; + } + + public static OrderStatus fromString(String status) { + for (OrderStatus os : values()) { + if (os.name().equalsIgnoreCase(status)) { + return os; + } + } + throw new IllegalArgumentException("Unknown status: " + status); + } +}` + + const chunks = await chunk('OrderStatus.java', code) + + expect(chunks.length).toBeGreaterThan(0) + + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + + expect(entityNames).toContain('OrderStatus') + expect(entityNames).toContain('getDescription') + expect(entityNames).toContain('isTerminal') + expect(entityNames).toContain('fromString') + }) +}) + +// ============================================================================ +// Cross-Language Tests +// ============================================================================ + +describe('integration: cross-language', () => { + test('chunker processes multiple languages sequentially', async () => { + const files: { path: string; code: string; expectedLang: Language }[] = [ + { + path: 'utils/math.ts', + code: `export function add(a: number, b: number): number { return a + b } +export function subtract(a: number, b: number): number { return a - b }`, + expectedLang: 'typescript', + }, + { + path: 'utils/string.py', + code: `def capitalize(s: str) -> str: + return s.capitalize() + +def lowercase(s: str) -> str: + return s.lower()`, + expectedLang: 'python', + }, + { + path: 'utils/array.go', + code: `package utils + +func Sum(nums []int) int { + total := 0 + for _, n := range nums { + total += n + } + return total +}`, + expectedLang: 'go', + }, + { + path: 'utils/calc.rs', + code: `pub fn multiply(a: i32, b: i32) -> i32 { + a * b +} + +pub fn divide(a: i32, b: i32) -> Option { + if b == 0 { None } else { Some(a / b) } +}`, + expectedLang: 'rust', + }, + { + path: 'Utils.java', + code: `public class Utils { + public static int max(int a, int b) { + return a > b ? a : b; + } +}`, + expectedLang: 'java', + }, + { + path: 'helpers.js', + code: `function debounce(fn, delay) { + let timer + return function(...args) { + clearTimeout(timer) + timer = setTimeout(() => fn.apply(this, args), delay) + } +}`, + expectedLang: 'javascript', + }, + ] + + for (const file of files) { + const chunks = await chunk(file.path, file.code, { maxChunkSize: 300 }) + + expect(chunks.length).toBeGreaterThan(0) + expect(chunks[0]?.context.filepath).toBe(file.path) + expect(chunks[0]?.context.language).toBe(file.expectedLang) + + // Verify text integrity + for (const c of chunks) { + const sliced = file.code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + } + }) +}) + +// ============================================================================ +// Streaming Tests +// ============================================================================ + +describe('integration: streaming', () => { + test('stream processes chunks incrementally', async () => { + const code = `function processItem(item: Item): Result { + const validated = validate(item) + const transformed = transform(validated) + return finalize(transformed) +} + +function validate(item: Item): ValidatedItem { + if (!item.id) throw new Error('Missing id') + return { ...item, validated: true } +} + +function transform(item: ValidatedItem): TransformedItem { + return { ...item, transformed: true } +} + +function finalize(item: TransformedItem): Result { + return { success: true, data: item } +}` + + const chunks: Chunk[] = [] + for await (const c of chunkStream('pipeline.ts', code, { + maxChunkSize: 200, + })) { + chunks.push(c) + + // In streaming mode, totalChunks is -1 (unknown upfront) + expect(c.totalChunks).toBe(-1) + expect(c.index).toBe(chunks.length - 1) + } + + expect(chunks.length).toBeGreaterThan(0) + + // Verify indices are sequential + chunks.forEach((c, i) => { + expect(c.index).toBe(i) + }) + }) + + test('stream for each supported language', async () => { + const samples: { path: string; code: string }[] = [ + { path: 'test.ts', code: 'function foo(): number { return 1 }' }, + { path: 'test.js', code: 'function bar() { return 2 }' }, + { path: 'test.py', code: 'def baz():\n return 3' }, + { path: 'test.rs', code: 'fn qux() -> i32 { 4 }' }, + { path: 'test.go', code: 'package main\n\nfunc quux() int { return 5 }' }, + { + path: 'Test.java', + code: 'public class Test { int corge() { return 6; } }', + }, + ] + + for (const sample of samples) { + const chunks: Chunk[] = [] + for await (const c of chunkStream(sample.path, sample.code)) { + chunks.push(c) + } + + expect(chunks.length).toBeGreaterThan(0) + + // Text reconstruction works + for (const c of chunks) { + const sliced = sample.code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + } + }) +}) + +// ============================================================================ +// Error Handling Tests +// ============================================================================ + +describe('integration: error handling', () => { + test('throws for unsupported file extensions', async () => { + await expect(chunk('file.unsupported', 'code')).rejects.toThrow( + 'Unsupported file type', + ) + }) + + test('stream throws for unsupported file extensions', async () => { + const collectChunks = async () => { + const chunks: Chunk[] = [] + for await (const c of chunkStream('file.unsupported', 'code')) { + chunks.push(c) + } + return chunks + } + + await expect(collectChunks()).rejects.toThrow('Unsupported file type') + }) + + test('handles malformed code gracefully', async () => { + // Unclosed brace - parser should handle this + const code = `function broken() { + return 1` + + // Should not throw, should produce some output + const chunks = await chunk('test.ts', code) + expect(chunks.length).toBeGreaterThanOrEqual(0) + }) + + test('handles syntax errors in each language', async () => { + const malformedSamples: { path: string; code: string }[] = [ + { path: 'test.ts', code: 'function broken( { return' }, + { path: 'test.js', code: 'const x =' }, + { path: 'test.py', code: 'def broken(\n return' }, + { path: 'test.rs', code: 'fn broken( -> {' }, + { path: 'test.go', code: 'func broken( {' }, + { path: 'Test.java', code: 'public class { int' }, + ] + + for (const sample of malformedSamples) { + // Should not throw - parser recovers + const chunks = await chunk(sample.path, sample.code) + expect(chunks).toBeDefined() + } + }) +}) diff --git a/test/parser.test.ts b/test/parser.test.ts index 9a44b4a..d72d8ca 100644 --- a/test/parser.test.ts +++ b/test/parser.test.ts @@ -1,6 +1,11 @@ -import { beforeAll, describe, expect, test } from 'bun:test' +import { beforeAll, beforeEach, describe, expect, test } from 'bun:test' import { detectLanguage } from '../src' -import { initializeParser, parseCode } from '../src/parser' +import { + clearGrammarCache, + initializeParser, + parseCode, + resetParser, +} from '../src/parser' // ============================================================================ // Language Detection Tests @@ -15,6 +20,14 @@ describe('detectLanguage', () => { expect(detectLanguage('components/Button.tsx')).toBe('typescript') }) + test('detects typescript from .mts extension', () => { + expect(detectLanguage('src/module.mts')).toBe('typescript') + }) + + test('detects typescript from .cts extension', () => { + expect(detectLanguage('src/commonjs.cts')).toBe('typescript') + }) + test('detects javascript from .js extension', () => { expect(detectLanguage('lib/utils.js')).toBe('javascript') }) @@ -23,10 +36,22 @@ describe('detectLanguage', () => { expect(detectLanguage('components/App.jsx')).toBe('javascript') }) + test('detects javascript from .mjs extension', () => { + expect(detectLanguage('lib/module.mjs')).toBe('javascript') + }) + + test('detects javascript from .cjs extension', () => { + expect(detectLanguage('lib/commonjs.cjs')).toBe('javascript') + }) + test('detects python from .py extension', () => { expect(detectLanguage('scripts/main.py')).toBe('python') }) + test('detects python from .pyi extension', () => { + expect(detectLanguage('stubs/types.pyi')).toBe('python') + }) + test('detects rust from .rs extension', () => { expect(detectLanguage('src/lib.rs')).toBe('rust') }) @@ -43,6 +68,19 @@ describe('detectLanguage', () => { expect(detectLanguage('README.md')).toBeNull() expect(detectLanguage('config.yaml')).toBeNull() expect(detectLanguage('Makefile')).toBeNull() + expect(detectLanguage('data.json')).toBeNull() + expect(detectLanguage('.env')).toBeNull() + }) + + test('handles deeply nested paths correctly', () => { + expect(detectLanguage('src/a/b/c/deep/file.ts')).toBe('typescript') + expect(detectLanguage('/absolute/path/to/file.py')).toBe('python') + }) + + test('handles filenames with multiple dots', () => { + expect(detectLanguage('file.test.ts')).toBe('typescript') + expect(detectLanguage('app.config.js')).toBe('javascript') + expect(detectLanguage('my.file.name.py')).toBe('python') }) }) @@ -55,116 +93,862 @@ describe('parseCode', () => { await initializeParser() }) - test('parses valid TypeScript code', async () => { - const code = ` -function greet(name: string): string { + describe('TypeScript parsing', () => { + test('parses simple function with exact AST structure', async () => { + const code = `function greet(name: string): string { return \`Hello, \${name}!\` -} -` - const result = await parseCode(code, 'typescript') +}` + const result = await parseCode(code, 'typescript') - expect(result.tree).toBeDefined() - expect(result.error).toBeNull() - expect(result.tree.rootNode.type).toBe('program') + expect(result.error).toBeNull() + expect(result.tree.rootNode.type).toBe('program') + expect(result.tree.rootNode.childCount).toBe(1) + + const funcNode = result.tree.rootNode.firstChild! + expect(funcNode.type).toBe('function_declaration') + + // Verify function name via tree-sitter field access + const nameNode = funcNode.childForFieldName('name') + expect(nameNode).not.toBeNull() + expect(nameNode?.type).toBe('identifier') + expect(nameNode?.text).toBe('greet') + + // Verify exact positions + expect(funcNode.startPosition.row).toBe(0) + expect(funcNode.startPosition.column).toBe(0) + expect(funcNode.endPosition.row).toBe(2) + expect(funcNode.endPosition.column).toBe(1) + + // Verify parameters field + const paramsNode = funcNode.childForFieldName('parameters') + expect(paramsNode).not.toBeNull() + expect(paramsNode?.type).toBe('formal_parameters') + + // Verify return type field + const returnTypeNode = funcNode.childForFieldName('return_type') + expect(returnTypeNode).not.toBeNull() + + // Verify body field + const bodyNode = funcNode.childForFieldName('body') + expect(bodyNode).not.toBeNull() + expect(bodyNode?.type).toBe('statement_block') + }) + + test('parses arrow function with exact positions', async () => { + const code = `const add = (a: number, b: number) => a + b` + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.type).toBe('program') + expect(root.childCount).toBe(1) + + const lexicalDecl = root.firstChild! + expect(lexicalDecl.type).toBe('lexical_declaration') + + const variableDeclarator = lexicalDecl.firstNamedChild! + expect(variableDeclarator.type).toBe('variable_declarator') + + const arrowFunc = variableDeclarator.childForFieldName('value') + expect(arrowFunc).not.toBeNull() + expect(arrowFunc?.type).toBe('arrow_function') + + // Verify positions + expect(root.startPosition).toEqual({ row: 0, column: 0 }) + expect(root.endPosition).toEqual({ row: 0, column: 43 }) + }) + + test('parses class with exact child structure', async () => { + const code = `class Calculator { + private value: number = 0 + + add(n: number): number { + return this.value += n + } +}` + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const classNode = root.firstChild! + expect(classNode.type).toBe('class_declaration') + + const className = classNode.childForFieldName('name') + expect(className).not.toBeNull() + expect(className?.text).toBe('Calculator') + + const body = classNode.childForFieldName('body') + expect(body).not.toBeNull() + expect(body?.type).toBe('class_body') + + // Verify class body has exactly 2 members (field + method) + const namedChildren = body?.namedChildren + expect(namedChildren).toHaveLength(2) + expect(namedChildren[0].type).toBe('public_field_definition') + expect(namedChildren[1].type).toBe('method_definition') + }) + + test('parses interface with exact structure', async () => { + const code = `interface User { + id: number + name: string + email?: string +}` + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const interfaceNode = root.firstChild! + expect(interfaceNode.type).toBe('interface_declaration') + + const interfaceName = interfaceNode.childForFieldName('name') + expect(interfaceName?.text).toBe('User') + + const body = interfaceNode.childForFieldName('body') + expect(body).not.toBeNull() + expect(body?.type).toBe('interface_body') + + // Verify exact property count + const properties = body?.namedChildren.filter( + (n) => n.type === 'property_signature', + ) + expect(properties).toHaveLength(3) + }) + + test('parses type alias with exact structure', async () => { + const code = `type Status = 'pending' | 'active' | 'done'` + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const typeAlias = root.firstChild! + expect(typeAlias.type).toBe('type_alias_declaration') + + const typeName = typeAlias.childForFieldName('name') + expect(typeName?.text).toBe('Status') + + const typeValue = typeAlias.childForFieldName('value') + expect(typeValue).not.toBeNull() + expect(typeValue?.type).toBe('union_type') + }) }) - test('parses valid JavaScript code', async () => { - const code = ` -const add = (a, b) => a + b + describe('JavaScript parsing', () => { + test('parses ES6 module exports with exact structure', async () => { + const code = `const add = (a, b) => a + b export default add -` - const result = await parseCode(code, 'javascript') +export { add }` + const result = await parseCode(code, 'javascript') - expect(result.tree).toBeDefined() - expect(result.error).toBeNull() + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.type).toBe('program') + expect(root.childCount).toBe(3) + + expect(root.children[0].type).toBe('lexical_declaration') + expect(root.children[1].type).toBe('export_statement') + expect(root.children[2].type).toBe('export_statement') + + // Verify byte ranges + expect(root.startIndex).toBe(0) + expect(root.endIndex).toBe(code.length) + }) + + test('parses object destructuring correctly', async () => { + const code = `const { a, b, c: renamed } = obj` + const result = await parseCode(code, 'javascript') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const lexicalDecl = root.firstChild! + expect(lexicalDecl.type).toBe('lexical_declaration') + + const declarator = lexicalDecl.firstNamedChild! + const pattern = declarator.childForFieldName('name') + expect(pattern?.type).toBe('object_pattern') + }) }) - test('parses valid Python code', async () => { - const code = ` -def greet(name): - return f"Hello, {name}!" -` - const result = await parseCode(code, 'python') + describe('Python parsing', () => { + test('parses function with exact positions', async () => { + const code = `def greet(name): + return f"Hello, {name}!"` + const result = await parseCode(code, 'python') - expect(result.tree).toBeDefined() - expect(result.error).toBeNull() + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.type).toBe('module') + expect(root.childCount).toBe(1) + + const funcNode = root.firstChild! + expect(funcNode.type).toBe('function_definition') + + const funcName = funcNode.childForFieldName('name') + expect(funcName?.text).toBe('greet') + + // Verify exact position + expect(funcNode.startPosition).toEqual({ row: 0, column: 0 }) + // End position column is length of " return f\"Hello, {name}!\"" + expect(funcNode.endPosition.row).toBe(1) + expect(funcNode.endPosition.column).toBe(28) + }) + + test('parses class with methods', async () => { + const code = `class Calculator: + def __init__(self): + self.value = 0 + + def add(self, n): + self.value += n + return self.value` + const result = await parseCode(code, 'python') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const classNode = root.firstChild! + expect(classNode.type).toBe('class_definition') + + const className = classNode.childForFieldName('name') + expect(className?.text).toBe('Calculator') + + const body = classNode.childForFieldName('body') + expect(body).not.toBeNull() + expect(body?.type).toBe('block') + + // Verify method count + const methods = body?.namedChildren.filter( + (n) => n.type === 'function_definition', + ) + expect(methods).toHaveLength(2) + }) + + test('parses decorators correctly', async () => { + const code = `@property +def value(self): + return self._value` + const result = await parseCode(code, 'python') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const decoratedDef = root.firstChild! + expect(decoratedDef.type).toBe('decorated_definition') + + const decorator = decoratedDef.namedChildren.find( + (n) => n.type === 'decorator', + ) + expect(decorator).not.toBeNull() + + const funcDef = decoratedDef.namedChildren.find( + (n) => n.type === 'function_definition', + ) + expect(funcDef).not.toBeNull() + }) }) - test('parses valid Rust code', async () => { - const code = ` -fn main() { + describe('Rust parsing', () => { + test('parses function with exact structure', async () => { + const code = `fn main() { println!("Hello, world!"); -} -` - const result = await parseCode(code, 'rust') +}` + const result = await parseCode(code, 'rust') - expect(result.tree).toBeDefined() - expect(result.error).toBeNull() + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.type).toBe('source_file') + expect(root.childCount).toBe(1) + + const funcNode = root.firstChild! + expect(funcNode.type).toBe('function_item') + + const funcName = funcNode.childForFieldName('name') + expect(funcName?.text).toBe('main') + + // Verify positions + expect(funcNode.startPosition).toEqual({ row: 0, column: 0 }) + expect(funcNode.endPosition).toEqual({ row: 2, column: 1 }) + }) + + test('parses struct with fields', async () => { + const code = `struct Point { + x: i32, + y: i32, +}` + const result = await parseCode(code, 'rust') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const structNode = root.firstChild! + expect(structNode.type).toBe('struct_item') + + const structName = structNode.childForFieldName('name') + expect(structName?.text).toBe('Point') + + const body = structNode.childForFieldName('body') + expect(body).not.toBeNull() + expect(body?.type).toBe('field_declaration_list') + + // Verify field count + const fields = body?.namedChildren.filter( + (n) => n.type === 'field_declaration', + ) + expect(fields).toHaveLength(2) + }) + + test('parses impl block correctly', async () => { + const code = `impl Point { + fn new(x: i32, y: i32) -> Self { + Point { x, y } + } +}` + const result = await parseCode(code, 'rust') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const implNode = root.firstChild! + expect(implNode.type).toBe('impl_item') + + const implType = implNode.childForFieldName('type') + expect(implType?.text).toBe('Point') + + const body = implNode.childForFieldName('body') + expect(body).not.toBeNull() + + const methods = body?.namedChildren.filter( + (n) => n.type === 'function_item', + ) + expect(methods).toHaveLength(1) + }) }) - test('parses valid Go code', async () => { - const code = ` -package main + describe('Go parsing', () => { + test('parses package and function with exact structure', async () => { + const code = `package main func main() { fmt.Println("Hello, world!") +}` + const result = await parseCode(code, 'go') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.type).toBe('source_file') + expect(root.childCount).toBe(2) + + expect(root.children[0].type).toBe('package_clause') + expect(root.children[1].type).toBe('function_declaration') + + const funcNode = root.children[1] + const funcName = funcNode.childForFieldName('name') + expect(funcName?.text).toBe('main') + + // Verify positions + expect(funcNode.startPosition).toEqual({ row: 2, column: 0 }) + }) + + test('parses struct with methods', async () => { + const code = `package main + +type Point struct { + X int + Y int } -` - const result = await parseCode(code, 'go') - expect(result.tree).toBeDefined() - expect(result.error).toBeNull() +func (p Point) String() string { + return fmt.Sprintf("(%d, %d)", p.X, p.Y) +}` + const result = await parseCode(code, 'go') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(3) + + expect(root.children[0].type).toBe('package_clause') + expect(root.children[1].type).toBe('type_declaration') + expect(root.children[2].type).toBe('method_declaration') + + // Verify struct + const typeDecl = root.children[1] + const typeSpec = typeDecl.firstNamedChild! + expect(typeSpec.type).toBe('type_spec') + + // Verify method receiver + const methodDecl = root.children[2] + const receiver = methodDecl.childForFieldName('receiver') + expect(receiver).not.toBeNull() + }) }) - test('parses valid Java code', async () => { - const code = ` -public class Main { + describe('Java parsing', () => { + test('parses class with exact structure', async () => { + const code = `public class Main { public static void main(String[] args) { System.out.println("Hello, world!"); } -} -` - const result = await parseCode(code, 'java') +}` + const result = await parseCode(code, 'java') - expect(result.tree).toBeDefined() - expect(result.error).toBeNull() + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.type).toBe('program') + expect(root.childCount).toBe(1) + + const classNode = root.firstChild! + expect(classNode.type).toBe('class_declaration') + + const className = classNode.childForFieldName('name') + expect(className?.text).toBe('Main') + + const body = classNode.childForFieldName('body') + expect(body).not.toBeNull() + expect(body?.type).toBe('class_body') + + const methods = body?.namedChildren.filter( + (n) => n.type === 'method_declaration', + ) + expect(methods).toHaveLength(1) + + // Verify method name + const mainMethod = methods[0] + const methodName = mainMethod.childForFieldName('name') + expect(methodName?.text).toBe('main') + }) + + test('parses interface correctly', async () => { + const code = `public interface Comparable { + int compareTo(T other); +}` + const result = await parseCode(code, 'java') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(1) + + const interfaceNode = root.firstChild! + expect(interfaceNode.type).toBe('interface_declaration') + + const interfaceName = interfaceNode.childForFieldName('name') + expect(interfaceName?.text).toBe('Comparable') + }) }) - test('handles syntax errors gracefully (recoverable)', async () => { - const code = ` -function broken( { + describe('syntax errors and partial trees', () => { + test('TypeScript: missing closing brace produces recoverable error', async () => { + const code = `function broken( { return -} -` - const result = await parseCode(code, 'typescript') +}` + const result = await parseCode(code, 'typescript') + + // Tree-sitter always produces a tree + expect(result.tree).not.toBeNull() + expect(result.tree.rootNode.type).toBe('program') + + // Verify error details + expect(result.error).not.toBeNull() + expect(result.error?.recoverable).toBe(true) + // Error message contains either ERROR or MISSING depending on grammar + expect(result.error?.message).toMatch(/ERROR|MISSING/) + + // Verify tree has errors + expect(result.tree.rootNode.hasError).toBe(true) + }) + + test('TypeScript: unclosed string produces error with position', async () => { + const code = `const x = "unclosed string` + const result = await parseCode(code, 'typescript') + + expect(result.tree).not.toBeNull() + expect(result.error).not.toBeNull() + expect(result.error?.recoverable).toBe(true) + // Error message should include position info + expect(result.error?.message).toMatch(/line \d+, column \d+/) + }) + + test('Python: missing body produces partial tree', async () => { + // A function without a valid body produces an error + const code = `def broken():` + const result = await parseCode(code, 'python') - // Tree-sitter always produces a tree - expect(result.tree).toBeDefined() - // But marks the error - expect(result.error).not.toBeNull() - expect(result.error?.recoverable).toBe(true) + expect(result.tree).not.toBeNull() + // Tree should still be navigable + const root = result.tree.rootNode + expect(root.type).toBe('module') + // May or may not have error depending on grammar tolerance + }) + + test('JavaScript: missing semicolon in strict context', async () => { + const code = `const a = 1 +const b = 2` + const result = await parseCode(code, 'javascript') + + // This is actually valid JS, no semicolons needed + expect(result.error).toBeNull() + expect(result.tree.rootNode.childCount).toBe(2) + }) + + test('Rust: missing semicolon produces error', async () => { + const code = `fn main() { + let x = 5 + let y = 6; +}` + const result = await parseCode(code, 'rust') + + // Rust requires semicolons - this should have an error + expect(result.tree).not.toBeNull() + expect(result.error).not.toBeNull() + expect(result.error?.recoverable).toBe(true) + }) + + test('Go: missing package declaration produces error', async () => { + const code = `func main() { + fmt.Println("Hello") +}` + const result = await parseCode(code, 'go') + + // Go files need package declaration + // Tree-sitter may or may not error on this depending on grammar + expect(result.tree).not.toBeNull() + const root = result.tree.rootNode + expect(root.type).toBe('source_file') + }) + + test('multiple errors are collected in message', async () => { + const code = `function a( { return } +function b( { return } +function c( { return }` + const result = await parseCode(code, 'typescript') + + expect(result.error).not.toBeNull() + expect(result.error?.recoverable).toBe(true) + // Should have multiple error locations (ERROR or MISSING) + expect(result.error?.message).toMatch(/ERROR|MISSING/) + // Multiple errors means multiple occurrences of line info + const lineMatches = result.error?.message.match(/line \d+/g) + expect(lineMatches?.length).toBeGreaterThanOrEqual(2) + }) + + test('error count is capped at 3 plus summary', async () => { + const code = `function a( { } +function b( { } +function c( { } +function d( { } +function e( { }` + const result = await parseCode(code, 'typescript') + + expect(result.error).not.toBeNull() + // Error message should show first 3 errors and "... and X more" + expect(result.error?.message).toContain('more') + }) }) - test('parses code with multiple functions', async () => { - const code = ` -function add(a: number, b: number): number { - return a + b -} + describe('rootNode properties with exact values', () => { + test('TypeScript rootNode has correct properties', async () => { + const code = `export const x = 1` + const result = await parseCode(code, 'typescript') -function subtract(a: number, b: number): number { - return a - b -} -` - const result = await parseCode(code, 'typescript') + const root = result.tree.rootNode + expect(root.type).toBe('program') + expect(root.text).toBe(code) + expect(root.childCount).toBe(1) + expect(root.startIndex).toBe(0) + expect(root.endIndex).toBe(code.length) + expect(root.startPosition).toEqual({ row: 0, column: 0 }) + expect(root.endPosition).toEqual({ row: 0, column: 18 }) + expect(root.hasError).toBe(false) + expect(root.parent).toBeNull() + }) + + test('Python rootNode has correct properties', async () => { + const code = `x = 1` + const result = await parseCode(code, 'python') + + const root = result.tree.rootNode + expect(root.type).toBe('module') + expect(root.text).toBe(code) + expect(root.childCount).toBe(1) + expect(root.startIndex).toBe(0) + expect(root.endIndex).toBe(5) + }) + + test('Rust rootNode has correct properties', async () => { + const code = `fn x() {}` + const result = await parseCode(code, 'rust') + + const root = result.tree.rootNode + expect(root.type).toBe('source_file') + expect(root.text).toBe(code) + expect(root.childCount).toBe(1) + }) + + test('Go rootNode has correct properties', async () => { + const code = `package main` + const result = await parseCode(code, 'go') + + const root = result.tree.rootNode + expect(root.type).toBe('source_file') + expect(root.text).toBe(code) + expect(root.childCount).toBe(1) + }) + + test('Java rootNode has correct properties', async () => { + const code = `class X {}` + const result = await parseCode(code, 'java') + + const root = result.tree.rootNode + expect(root.type).toBe('program') + expect(root.text).toBe(code) + expect(root.childCount).toBe(1) + }) + }) + + describe('exact node counts in parsed trees', () => { + test('counts nodes in TypeScript with imports and exports', async () => { + const code = `import { foo } from 'bar' +export function greet(name: string) { + return name +}` + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + const root = result.tree.rootNode + expect(root.childCount).toBe(2) + + const importStmt = root.children[0] + expect(importStmt.type).toBe('import_statement') + + const exportStmt = root.children[1] + expect(exportStmt.type).toBe('export_statement') + }) + + test('counts nested class members accurately', async () => { + const code = `class Example { + field1 = 1 + field2 = 2 + method1() {} + method2() {} + method3() {} +}` + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + const classNode = result.tree.rootNode.firstChild! + const body = classNode.childForFieldName('body')! + + // Should have exactly 5 members + const members = body.namedChildren + expect(members).toHaveLength(5) + + const fields = members.filter((n) => n.type === 'public_field_definition') + const methods = members.filter((n) => n.type === 'method_definition') + + expect(fields).toHaveLength(2) + expect(methods).toHaveLength(3) + }) + + test('counts function parameters exactly', async () => { + const code = `function test(a: number, b: string, c?: boolean, ...rest: any[]) {}` + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + const funcNode = result.tree.rootNode.firstChild! + const params = funcNode.childForFieldName('parameters')! + + // Parameters include: a, b, c, rest + const paramList = params.namedChildren + expect(paramList).toHaveLength(4) + }) + + test('counts array elements correctly', async () => { + const code = `const arr = [1, 2, 3, 4, 5]` + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + const decl = result.tree.rootNode.firstChild! + const declarator = decl.firstNamedChild! + const arrayLiteral = declarator.childForFieldName('value')! + + expect(arrayLiteral.type).toBe('array') + expect(arrayLiteral.namedChildCount).toBe(5) + }) + }) - expect(result.tree).toBeDefined() + describe('tree-sitter node navigation', () => { + test('namedChildren filters anonymous nodes', async () => { + const code = `const x = { a: 1, b: 2 }` + const result = await parseCode(code, 'typescript') + + const decl = result.tree.rootNode.firstChild! + const declarator = decl.firstNamedChild! + const obj = declarator.childForFieldName('value')! + + // All children includes punctuation + expect(obj.childCount).toBeGreaterThan(obj.namedChildCount) + + // Named children should be just the properties + expect(obj.namedChildCount).toBe(2) + expect(obj.namedChildren[0].type).toBe('pair') + expect(obj.namedChildren[1].type).toBe('pair') + }) + + test('firstChild and lastChild work correctly', async () => { + const code = `function a() {} +function b() {} +function c() {}` + const result = await parseCode(code, 'typescript') + + const root = result.tree.rootNode + expect(root.firstChild?.type).toBe('function_declaration') + expect(root.lastChild?.type).toBe('function_declaration') + + // Verify first vs last by checking function names + const firstName = root.firstChild?.childForFieldName('name')?.text + const lastName = root.lastChild?.childForFieldName('name')?.text + + expect(firstName).toBe('a') + expect(lastName).toBe('c') + }) + + test('nextSibling and previousSibling navigation', async () => { + const code = `function a() {} +function b() {} +function c() {}` + const result = await parseCode(code, 'typescript') + + const root = result.tree.rootNode + const first = root.firstChild! + const second = first.nextSibling! + const third = second.nextSibling! + + expect(second.childForFieldName('name')?.text).toBe('b') + expect(third.childForFieldName('name')?.text).toBe('c') + expect(third.nextSibling).toBeNull() + + // Use .equals() for node comparison instead of toBe (object identity) + expect(third.previousSibling?.equals(second)).toBe(true) + expect(second.previousSibling?.equals(first)).toBe(true) + expect(first.previousSibling).toBeNull() + }) + + test('descendantForIndex finds correct node', async () => { + const code = `function greet(name: string) {}` + const result = await parseCode(code, 'typescript') + + const root = result.tree.rootNode + + // Find node at position of "name" parameter + const nameStart = code.indexOf('name') + const node = root.descendantForIndex(nameStart) + + expect(node).not.toBeNull() + expect(node?.text).toBe('name') + expect(node?.type).toBe('identifier') + }) + }) +}) + +// ============================================================================ +// Grammar Caching Tests +// ============================================================================ + +describe('grammar caching', () => { + beforeEach(() => { + resetParser() + }) + + test('cached grammars are reused on subsequent parses', async () => { + await initializeParser() + + // First parse loads the grammar + const code1 = `const x = 1` + const result1 = await parseCode(code1, 'typescript') + expect(result1.error).toBeNull() + + // Second parse should use cached grammar + const code2 = `const y = 2` + const result2 = await parseCode(code2, 'typescript') + expect(result2.error).toBeNull() + + // Both should parse successfully with same root type + expect(result1.tree.rootNode.type).toBe('program') + expect(result2.tree.rootNode.type).toBe('program') + }) + + test('different languages have separate caches', async () => { + await initializeParser() + + const tsCode = `const x: number = 1` + const pyCode = `x: int = 1` + + const tsResult = await parseCode(tsCode, 'typescript') + const pyResult = await parseCode(pyCode, 'python') + + // Each should parse with correct language grammar + expect(tsResult.tree.rootNode.type).toBe('program') + expect(pyResult.tree.rootNode.type).toBe('module') + }) + + test('clearGrammarCache forces reload', async () => { + await initializeParser() + + // Parse once to cache + const result1 = await parseCode('const x = 1', 'typescript') + expect(result1.error).toBeNull() + + // Clear cache + clearGrammarCache() + + // Parse again - should reload grammar + const result2 = await parseCode('const y = 2', 'typescript') + expect(result2.error).toBeNull() + expect(result2.tree.rootNode.type).toBe('program') + }) + + test('resetParser clears both parser and grammar cache', async () => { + await initializeParser() + + // Parse to establish state + await parseCode('const x = 1', 'typescript') + + // Reset everything + resetParser() + + // Should be able to reinitialize and parse + await initializeParser() + const result = await parseCode('const y = 2', 'typescript') expect(result.error).toBeNull() + }) + + test('multiple languages can be parsed in sequence', async () => { + await initializeParser() + + const languages = [ + { lang: 'typescript' as const, code: 'const x = 1' }, + { lang: 'javascript' as const, code: 'const x = 1' }, + { lang: 'python' as const, code: 'x = 1' }, + { lang: 'rust' as const, code: 'fn main() {}' }, + { lang: 'go' as const, code: 'package main' }, + { lang: 'java' as const, code: 'class X {}' }, + ] - // Check that we have function declarations - const root = result.tree.rootNode - const functions = root.children.filter( - (n) => n.type === 'function_declaration', - ) - expect(functions.length).toBe(2) + for (const { lang, code } of languages) { + const result = await parseCode(code, lang) + expect(result.error).toBeNull() + expect(result.tree).not.toBeNull() + } }) }) diff --git a/test/pipeline-debug.test.ts b/test/pipeline-debug.test.ts new file mode 100644 index 0000000..0a99da0 --- /dev/null +++ b/test/pipeline-debug.test.ts @@ -0,0 +1,835 @@ +import { beforeAll, describe, expect, test } from 'bun:test' +import { chunk, type Language } from '../src' +import { extractEntitiesAsync } from '../src/extract' +import { initializeParser, parseCode } from '../src/parser' +import { buildScopeTreeFromEntities } from '../src/scope' + +/** + * Pipeline Debug Tests + * + * These tests verify the output at each stage of the chunking pipeline: + * 1. Parse - tree-sitter AST generation + * 2. Extract - entity extraction (functions, classes, methods, etc.) + * 3. Scope - scope tree construction + * 4. Chunk - final chunking with context + * + * For each supported language, we verify: + * - Correct entity types and names + * - Accurate byte/line ranges + * - Proper parent/child relationships + * - Docstring extraction + * - Import detection + */ + +beforeAll(async () => { + await initializeParser() +}) + +// ============================================================================ +// TypeScript Pipeline Debug +// ============================================================================ + +describe('pipeline debug: TypeScript', () => { + const code = `import { Effect } from 'effect' + +/** + * A simple calculator class. + */ +export class Calculator { + private value: number = 0 + + /** Add a number to the current value. */ + add(n: number): number { + this.value += n + return this.value + } + + /** Subtract a number from the current value. */ + subtract(n: number): number { + this.value -= n + return this.value + } +} + +/** Create a new calculator instance. */ +function createCalculator(): Calculator { + return new Calculator() +}` + + test('step 1: parse - produces valid AST', async () => { + const result = await parseCode(code, 'typescript') + + expect(result.error).toBeNull() + expect(result.tree.rootNode.type).toBe('program') + + // Log AST structure + const children = result.tree.rootNode.namedChildren + console.log('\n[TypeScript] AST root children:') + for (const child of children) { + console.log( + ` - ${child.type} at lines ${child.startPosition.row}-${child.endPosition.row}`, + ) + } + + // Verify expected node types + const nodeTypes = children.map((c) => c.type) + expect(nodeTypes).toContain('import_statement') + expect(nodeTypes).toContain('export_statement') // class is wrapped in export + expect(nodeTypes).toContain('function_declaration') + }) + + test('step 2: extract - finds all entities', async () => { + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + + console.log('\n[TypeScript] Extracted entities:') + for (const e of entities) { + console.log( + ` - ${e.type}: ${e.name} (lines ${e.lineRange.start}-${e.lineRange.end}, parent: ${e.parent ?? 'none'})`, + ) + if (e.docstring) { + console.log(` docstring: "${e.docstring.slice(0, 50)}..."`) + } + } + + // Verify entity counts + const imports = entities.filter((e) => e.type === 'import') + const classes = entities.filter((e) => e.type === 'class') + const methods = entities.filter((e) => e.type === 'method') + const functions = entities.filter((e) => e.type === 'function') + + expect(imports.length).toBeGreaterThanOrEqual(1) + expect(classes).toHaveLength(1) + expect(classes[0].name).toBe('Calculator') + expect(methods).toHaveLength(2) + expect(methods.map((m) => m.name).sort()).toEqual(['add', 'subtract']) + expect(functions).toHaveLength(1) + expect(functions[0].name).toBe('createCalculator') + + // Verify parent relationships + for (const method of methods) { + expect(method.parent).toBe('Calculator') + } + + // Verify docstrings (methods have docstrings, class may not due to export wrapping) + const addMethod = methods.find((m) => m.name === 'add') + expect(addMethod?.docstring).toContain('Add a number') + expect(functions[0].docstring).toContain('Create a new calculator') + }) + + test('step 3: scope - builds correct tree', async () => { + const result = await parseCode(code, 'typescript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'typescript', + code, + ) + const tree = buildScopeTreeFromEntities(entities) + + console.log('\n[TypeScript] Scope tree:') + console.log(` imports: ${tree.imports.length}`) + console.log(` exports: ${tree.exports.length}`) + console.log(` root nodes: ${tree.root.length}`) + for (const node of tree.root) { + console.log( + ` - ${node.entity.type}: ${node.entity.name} (${node.children.length} children)`, + ) + for (const child of node.children) { + console.log(` - ${child.entity.type}: ${child.entity.name}`) + } + } + + // Verify imports are separated + expect(tree.imports.length).toBeGreaterThanOrEqual(1) + expect(tree.imports[0].name).toBe('Effect') + expect(tree.imports[0].source).toBe('effect') + + // Verify class has method children + const classNode = tree.root.find((n) => n.entity.name === 'Calculator') + expect(classNode).toBeDefined() + expect(classNode?.children).toHaveLength(2) + expect(classNode?.children.map((c) => c.entity.name).sort()).toEqual([ + 'add', + 'subtract', + ]) + }) + + test('step 4: chunk - produces valid chunks with context', async () => { + const chunks = await chunk('calculator.ts', code, { maxChunkSize: 300 }) + + console.log('\n[TypeScript] Chunks:') + for (const c of chunks) { + console.log(` Chunk ${c.index}/${c.totalChunks}:`) + console.log(` bytes: ${c.byteRange.start}-${c.byteRange.end}`) + console.log(` lines: ${c.lineRange.start}-${c.lineRange.end}`) + console.log( + ` entities: ${c.context.entities.map((e) => e.name).join(', ')}`, + ) + console.log( + ` scope: ${c.context.scope.map((s) => s.name).join(' > ') || 'top-level'}`, + ) + console.log( + ` imports: ${c.context.imports.map((i) => i.name).join(', ') || 'none'}`, + ) + } + + // Verify chunks cover the code + expect(chunks.length).toBeGreaterThan(0) + + // Verify text matches byte range + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + + // Verify context is populated + const allEntities = chunks.flatMap((c) => c.context.entities) + expect(allEntities.some((e) => e.name === 'Calculator')).toBe(true) + }) +}) + +// ============================================================================ +// JavaScript Pipeline Debug +// ============================================================================ + +describe('pipeline debug: JavaScript', () => { + const code = `const EventEmitter = require('events') + +/** + * A simple counter with events. + */ +class Counter extends EventEmitter { + constructor() { + super() + this.count = 0 + } + + increment() { + this.count++ + this.emit('change', this.count) + } + + decrement() { + this.count-- + this.emit('change', this.count) + } +} + +module.exports = { Counter }` + + test('step 1: parse - produces valid AST', async () => { + const result = await parseCode(code, 'javascript') + + expect(result.error).toBeNull() + expect(result.tree.rootNode.type).toBe('program') + + console.log('\n[JavaScript] AST root children:') + for (const child of result.tree.rootNode.namedChildren) { + console.log( + ` - ${child.type} at lines ${child.startPosition.row}-${child.endPosition.row}`, + ) + } + }) + + test('step 2: extract - finds all entities', async () => { + const result = await parseCode(code, 'javascript') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'javascript', + code, + ) + + console.log('\n[JavaScript] Extracted entities:') + for (const e of entities) { + console.log(` - ${e.type}: ${e.name} (parent: ${e.parent ?? 'none'})`) + } + + const classes = entities.filter((e) => e.type === 'class') + const methods = entities.filter((e) => e.type === 'method') + + expect(classes).toHaveLength(1) + expect(classes[0].name).toBe('Counter') + expect(methods.length).toBeGreaterThanOrEqual(2) + }) + + test('step 4: chunk - produces valid chunks', async () => { + const chunks = await chunk('counter.js', code) + + console.log('\n[JavaScript] Chunks:') + for (const c of chunks) { + console.log( + ` Chunk ${c.index}: lines ${c.lineRange.start}-${c.lineRange.end}, entities: ${c.context.entities.map((e) => e.name).join(', ')}`, + ) + } + + expect(chunks.length).toBeGreaterThan(0) + expect(chunks[0].context.language).toBe('javascript') + }) +}) + +// ============================================================================ +// Python Pipeline Debug +// ============================================================================ + +describe('pipeline debug: Python', () => { + const code = `from typing import Optional, List + +class UserService: + """Service for managing users.""" + + def __init__(self, db): + """Initialize with database connection.""" + self.db = db + + def get_user(self, user_id: int) -> Optional[dict]: + """Fetch a user by ID.""" + return self.db.query(user_id) + + def list_users(self) -> List[dict]: + """List all users.""" + return self.db.query_all() + +def create_service(db) -> UserService: + """Factory function for UserService.""" + return UserService(db)` + + test('step 1: parse - produces valid AST', async () => { + const result = await parseCode(code, 'python') + + expect(result.error).toBeNull() + expect(result.tree.rootNode.type).toBe('module') + + console.log('\n[Python] AST root children:') + for (const child of result.tree.rootNode.namedChildren) { + console.log( + ` - ${child.type} at lines ${child.startPosition.row}-${child.endPosition.row}`, + ) + } + }) + + test('step 2: extract - finds all entities with docstrings', async () => { + const result = await parseCode(code, 'python') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'python', + code, + ) + + console.log('\n[Python] Extracted entities:') + for (const e of entities) { + console.log(` - ${e.type}: ${e.name}`) + if (e.docstring) { + console.log(` docstring: "${e.docstring}"`) + } + } + + const classes = entities.filter((e) => e.type === 'class') + const functions = entities.filter((e) => e.type === 'function') + + expect(classes).toHaveLength(1) + expect(classes[0].name).toBe('UserService') + expect(classes[0].docstring).toContain('managing users') + + // Python methods are extracted as 'function' type + expect(functions.length).toBeGreaterThanOrEqual(4) + expect(functions.some((f) => f.name === '__init__')).toBe(true) + expect(functions.some((f) => f.name === 'create_service')).toBe(true) + }) + + test('step 4: chunk - produces valid chunks', async () => { + const chunks = await chunk('service.py', code) + + console.log('\n[Python] Chunks:') + for (const c of chunks) { + console.log( + ` Chunk ${c.index}: lines ${c.lineRange.start}-${c.lineRange.end}, entities: ${c.context.entities.map((e) => e.name).join(', ')}`, + ) + } + + expect(chunks.length).toBeGreaterThan(0) + expect(chunks[0].context.language).toBe('python') + }) +}) + +// ============================================================================ +// Rust Pipeline Debug +// ============================================================================ + +describe('pipeline debug: Rust', () => { + const code = `use std::collections::HashMap; + +/// A simple key-value store. +pub struct Store { + data: HashMap, +} + +impl Store { + /// Create a new empty store. + pub fn new() -> Self { + Store { + data: HashMap::new(), + } + } + + /// Get a value by key. + pub fn get(&self, key: &str) -> Option<&String> { + self.data.get(key) + } + + /// Set a value for a key. + pub fn set(&mut self, key: String, value: String) { + self.data.insert(key, value); + } +}` + + test('step 1: parse - produces valid AST', async () => { + const result = await parseCode(code, 'rust') + + expect(result.error).toBeNull() + expect(result.tree.rootNode.type).toBe('source_file') + + console.log('\n[Rust] AST root children:') + for (const child of result.tree.rootNode.namedChildren) { + console.log( + ` - ${child.type} at lines ${child.startPosition.row}-${child.endPosition.row}`, + ) + } + }) + + test('step 2: extract - finds structs and impl functions', async () => { + const result = await parseCode(code, 'rust') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'rust', + code, + ) + + console.log('\n[Rust] Extracted entities:') + for (const e of entities) { + console.log(` - ${e.type}: ${e.name}`) + if (e.docstring) { + console.log(` docstring: "${e.docstring}"`) + } + } + + // Verify we find the struct and functions + const entityNames = entities.map((e) => e.name) + expect(entityNames).toContain('Store') + expect(entityNames).toContain('new') + expect(entityNames).toContain('get') + expect(entityNames).toContain('set') + }) + + test('step 4: chunk - produces valid chunks', async () => { + const chunks = await chunk('store.rs', code) + + console.log('\n[Rust] Chunks:') + for (const c of chunks) { + console.log( + ` Chunk ${c.index}: lines ${c.lineRange.start}-${c.lineRange.end}, entities: ${c.context.entities.map((e) => e.name).join(', ')}`, + ) + } + + expect(chunks.length).toBeGreaterThan(0) + expect(chunks[0].context.language).toBe('rust') + }) +}) + +// ============================================================================ +// Go Pipeline Debug +// ============================================================================ + +describe('pipeline debug: Go', () => { + const code = `package main + +import "fmt" + +// User represents a user in the system. +type User struct { + ID int + Name string +} + +// NewUser creates a new user with the given name. +func NewUser(name string) *User { + return &User{Name: name} +} + +// Greet returns a greeting for the user. +func (u *User) Greet() string { + return fmt.Sprintf("Hello, %s!", u.Name) +}` + + test('step 1: parse - produces valid AST', async () => { + const result = await parseCode(code, 'go') + + expect(result.error).toBeNull() + expect(result.tree.rootNode.type).toBe('source_file') + + console.log('\n[Go] AST root children:') + for (const child of result.tree.rootNode.namedChildren) { + console.log( + ` - ${child.type} at lines ${child.startPosition.row}-${child.endPosition.row}`, + ) + } + }) + + test('step 2: extract - finds types, functions, and methods', async () => { + const result = await parseCode(code, 'go') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'go', + code, + ) + + console.log('\n[Go] Extracted entities:') + for (const e of entities) { + console.log(` - ${e.type}: ${e.name}`) + if (e.docstring) { + console.log(` docstring: "${e.docstring}"`) + } + } + + const types = entities.filter((e) => e.type === 'type') + const functions = entities.filter((e) => e.type === 'function') + const methods = entities.filter((e) => e.type === 'method') + + expect(types.some((t) => t.name === 'User')).toBe(true) + expect(functions.some((f) => f.name === 'NewUser')).toBe(true) + expect(methods.some((m) => m.name === 'Greet')).toBe(true) + }) + + test('step 4: chunk - produces valid chunks', async () => { + const chunks = await chunk('main.go', code) + + console.log('\n[Go] Chunks:') + for (const c of chunks) { + console.log( + ` Chunk ${c.index}: lines ${c.lineRange.start}-${c.lineRange.end}, entities: ${c.context.entities.map((e) => e.name).join(', ')}`, + ) + } + + expect(chunks.length).toBeGreaterThan(0) + expect(chunks[0].context.language).toBe('go') + }) +}) + +// ============================================================================ +// Java Pipeline Debug +// ============================================================================ + +describe('pipeline debug: Java', () => { + const code = `package com.example; + +import java.util.ArrayList; +import java.util.List; + +/** + * A simple task manager. + */ +public class TaskManager { + private List tasks; + + /** + * Create a new task manager. + */ + public TaskManager() { + this.tasks = new ArrayList<>(); + } + + /** + * Add a task to the list. + * @param task the task to add + */ + public void addTask(String task) { + tasks.add(task); + } + + /** + * Get all tasks. + * @return list of all tasks + */ + public List getTasks() { + return new ArrayList<>(tasks); + } +}` + + test('step 1: parse - produces valid AST', async () => { + const result = await parseCode(code, 'java') + + expect(result.error).toBeNull() + expect(result.tree.rootNode.type).toBe('program') + + console.log('\n[Java] AST root children:') + for (const child of result.tree.rootNode.namedChildren) { + console.log( + ` - ${child.type} at lines ${child.startPosition.row}-${child.endPosition.row}`, + ) + } + }) + + test('step 2: extract - finds class and methods with Javadoc', async () => { + const result = await parseCode(code, 'java') + const entities = await extractEntitiesAsync( + result.tree.rootNode, + 'java', + code, + ) + + console.log('\n[Java] Extracted entities:') + for (const e of entities) { + console.log(` - ${e.type}: ${e.name} (parent: ${e.parent ?? 'none'})`) + if (e.docstring) { + console.log(` docstring: "${e.docstring.slice(0, 50)}..."`) + } + } + + const classes = entities.filter((e) => e.type === 'class') + const methods = entities.filter((e) => e.type === 'method') + + expect(classes).toHaveLength(1) + expect(classes[0].name).toBe('TaskManager') + expect(classes[0].docstring).toContain('task manager') + + expect(methods.length).toBeGreaterThanOrEqual(2) + expect(methods.some((m) => m.name === 'addTask')).toBe(true) + expect(methods.some((m) => m.name === 'getTasks')).toBe(true) + }) + + test('step 4: chunk - produces valid chunks', async () => { + const chunks = await chunk('TaskManager.java', code) + + console.log('\n[Java] Chunks:') + for (const c of chunks) { + console.log( + ` Chunk ${c.index}: lines ${c.lineRange.start}-${c.lineRange.end}, entities: ${c.context.entities.map((e) => e.name).join(', ')}`, + ) + } + + expect(chunks.length).toBeGreaterThan(0) + expect(chunks[0].context.language).toBe('java') + }) +}) + +// ============================================================================ +// Chunk Splitting Verification +// ============================================================================ + +describe('pipeline debug: chunk splitting', () => { + test('large code splits into multiple chunks correctly', async () => { + const code = `import { Database } from './db' +import { Logger } from './utils' + +/** + * Service for managing user accounts. + */ +export class UserService { + private db: Database + private logger: Logger + + constructor(db: Database, logger: Logger) { + this.db = db + this.logger = logger + } + + /** + * Fetch a user by ID. + */ + async getUser(id: string): Promise { + this.logger.info(\`Fetching user: \${id}\`) + return this.db.query('SELECT * FROM users WHERE id = ?', [id]) + } + + /** + * Create a new user. + */ + async createUser(data: CreateUserInput): Promise { + this.logger.info('Creating new user') + const result = await this.db.insert('users', data) + return { id: result.insertId, ...data } + } +} + +function validateUserInput(input: unknown): boolean { + return typeof input === 'object' && input !== null +}` + + const chunks = await chunk('user-service.ts', code, { maxChunkSize: 300 }) + + console.log('\n[Split Test] Chunk count:', chunks.length) + + // Should produce multiple chunks + expect(chunks.length).toBeGreaterThan(1) + + // Verify no overlaps + const sorted = [...chunks].sort( + (a, b) => a.byteRange.start - b.byteRange.start, + ) + for (let i = 1; i < sorted.length; i++) { + const prev = sorted[i - 1] + const curr = sorted[i] + expect(curr.byteRange.start).toBeGreaterThanOrEqual(prev.byteRange.end) + } + + // Verify all text matches byte slices + for (const c of chunks) { + const sliced = code.slice(c.byteRange.start, c.byteRange.end) + expect(c.text).toBe(sliced) + } + + // Verify partial entities are marked correctly + const partialChunks = chunks.filter((c) => + c.context.entities.some((e) => e.isPartial), + ) + console.log( + '[Split Test] Chunks with partial entities:', + partialChunks.length, + ) + + // If UserService spans multiple chunks, it should be marked partial + const userServiceChunks = chunks.filter((c) => + c.context.entities.some((e) => e.name === 'UserService'), + ) + if (userServiceChunks.length > 1) { + // All but possibly the last should have it marked as partial + const partialUserService = userServiceChunks.filter((c) => + c.context.entities.some((e) => e.name === 'UserService' && e.isPartial), + ) + expect(partialUserService.length).toBeGreaterThan(0) + } + + // Verify scope chain for nested chunks + for (const c of chunks) { + const methods = c.context.entities.filter((e) => e.type === 'method') + if (methods.length > 0) { + // Chunks with methods should have UserService in scope + const hasUserServiceInScope = c.context.scope.some( + (s) => s.name === 'UserService', + ) + console.log( + `[Split Test] Chunk ${c.index} has methods: ${methods.map((m) => m.name).join(', ')}, UserService in scope: ${hasUserServiceInScope}`, + ) + } + } + }) + + test('siblings are populated correctly', async () => { + const code = `function first() { return 1 } +function second() { return 2 } +function third() { return 3 } +function fourth() { return 4 }` + + // Use small chunk size to force multiple chunks so siblings are visible + const chunks = await chunk('funcs.ts', code, { + maxChunkSize: 50, + siblingDetail: 'names', + }) + + console.log('\n[Siblings Test] Chunks:', chunks.length) + for (const c of chunks) { + console.log( + ` Chunk ${c.index}: entities=${c.context.entities.map((e) => e.name).join(',')}, siblings=${c.context.siblings.map((s) => `${s.name}(${s.position})`).join(',')}`, + ) + } + + // At least one chunk should have siblings + const hasSiblings = chunks.some((c) => c.context.siblings.length > 0) + expect(hasSiblings).toBe(true) + + // Verify sibling positions make sense + for (const c of chunks) { + for (const sibling of c.context.siblings) { + expect(['before', 'after']).toContain(sibling.position) + expect(sibling.distance).toBeGreaterThan(0) + } + } + }) + + test('imports are included in context', async () => { + const code = `import { Effect, Context } from 'effect' +import type { Option } from 'effect/Option' + +function useEffect(): Effect.Effect { + return Effect.void +}` + + const chunks = await chunk('effect-usage.ts', code) + + console.log('\n[Imports Test] Chunks:', chunks.length) + for (const c of chunks) { + console.log( + ` Chunk ${c.index}: imports=${c.context.imports.map((i) => `${i.name}(${i.source})`).join(', ')}`, + ) + } + + // All chunks should have imports in context + for (const c of chunks) { + expect(c.context.imports.length).toBeGreaterThan(0) + } + + // Verify import sources + const allImports = chunks.flatMap((c) => c.context.imports) + expect(allImports.some((i) => i.source === 'effect')).toBe(true) + }) +}) + +// ============================================================================ +// Cross-Language Consistency +// ============================================================================ + +describe('pipeline debug: cross-language consistency', () => { + const samples: { + lang: Language + path: string + code: string + expectedClass: string + expectedMethod: string + }[] = [ + { + lang: 'typescript', + path: 'test.ts', + code: `class Foo { bar(): void { console.log('hello') } }`, + expectedClass: 'Foo', + expectedMethod: 'bar', + }, + { + lang: 'javascript', + path: 'test.js', + code: `class Foo { bar() { console.log('hello') } }`, + expectedClass: 'Foo', + expectedMethod: 'bar', + }, + { + lang: 'python', + path: 'test.py', + code: `class Foo:\n def bar(self):\n print('hello')`, + expectedClass: 'Foo', + expectedMethod: 'bar', + }, + { + lang: 'java', + path: 'Foo.java', + code: `public class Foo { void bar() { System.out.println("hello"); } }`, + expectedClass: 'Foo', + expectedMethod: 'bar', + }, + ] + + for (const sample of samples) { + test(`${sample.lang}: extracts class and method consistently`, async () => { + const chunks = await chunk(sample.path, sample.code) + + const allEntities = chunks.flatMap((c) => c.context.entities) + const entityNames = allEntities.map((e) => e.name) + + console.log(`\n[${sample.lang}] Entities: ${entityNames.join(', ')}`) + + expect(entityNames).toContain(sample.expectedClass) + expect(entityNames).toContain(sample.expectedMethod) + }) + } +}) diff --git a/test/scope.test.ts b/test/scope.test.ts index 43dffb1..a324f6f 100644 --- a/test/scope.test.ts +++ b/test/scope.test.ts @@ -71,6 +71,24 @@ describe('rangeContains', () => { expect(rangeContains(outer, innerAtStart)).toBe(true) expect(rangeContains(outer, innerAtEnd)).toBe(true) }) + + test('returns true for zero-length inner range at boundary', () => { + const outer = { start: 0, end: 100 } + const zeroLengthStart = { start: 0, end: 0 } + const zeroLengthMid = { start: 50, end: 50 } + const zeroLengthEnd = { start: 100, end: 100 } + expect(rangeContains(outer, zeroLengthStart)).toBe(true) + expect(rangeContains(outer, zeroLengthMid)).toBe(true) + expect(rangeContains(outer, zeroLengthEnd)).toBe(true) + }) + + test('returns false for zero-length inner range outside outer', () => { + const outer = { start: 10, end: 50 } + const zeroLengthBefore = { start: 5, end: 5 } + const zeroLengthAfter = { start: 60, end: 60 } + expect(rangeContains(outer, zeroLengthBefore)).toBe(false) + expect(rangeContains(outer, zeroLengthAfter)).toBe(false) + }) }) // ============================================================================ @@ -85,12 +103,18 @@ describe('buildScopeTreeFromEntities', () => { const entities = await getEntities(code, 'typescript') const tree = buildScopeTreeFromEntities(entities) - expect(tree.root.length).toBe(1) - expect(tree.root[0]?.entity.name).toBe('greet') - expect(tree.root[0]?.entity.type).toBe('function') + expect(tree.root).toHaveLength(1) + expect(tree.root[0]).toMatchObject({ + entity: { + name: 'greet', + type: 'function', + }, + children: [], + parent: null, + }) }) - test('builds tree with class and nested methods', async () => { + test('builds tree with class and nested methods with exact structure', async () => { const code = `class Calculator { add(a: number, b: number): number { return a + b @@ -104,18 +128,105 @@ describe('buildScopeTreeFromEntities', () => { const tree = buildScopeTreeFromEntities(entities) // Should have one root: the class - const classNode = tree.root.find((n) => n.entity.name === 'Calculator') - expect(classNode).toBeDefined() - expect(classNode?.entity.type).toBe('class') + expect(tree.root).toHaveLength(1) + const classNode = tree.root[0] + expect(classNode).toMatchObject({ + entity: { + name: 'Calculator', + type: 'class', + }, + parent: null, + }) + + // Class should have exactly 2 method children + expect(classNode?.children).toHaveLength(2) + + // Verify children are in source order + expect(classNode?.children[0]?.entity.name).toBe('add') + expect(classNode?.children[1]?.entity.name).toBe('subtract') + + // Verify method byte ranges are contained within class range + if (classNode) { + const classRange = classNode.entity.byteRange + for (const child of classNode.children) { + expect(child.entity.byteRange.start).toBeGreaterThanOrEqual( + classRange.start, + ) + expect(child.entity.byteRange.end).toBeLessThanOrEqual(classRange.end) + } + } + }) + + test('verifies byte range containment (parent contains children)', async () => { + const code = `class Outer { + innerMethod() { + function nestedFn() { + return 1 + } + return nestedFn() + } +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const outerClass = tree.root.find((n) => n.entity.name === 'Outer') + expect(outerClass).toBeDefined() + + // Recursively verify all children are within parent byte ranges + const verifyByteRangeContainment = ( + node: (typeof tree.root)[0], + parentRange?: { start: number; end: number }, + ) => { + if (parentRange) { + expect(node.entity.byteRange.start).toBeGreaterThanOrEqual( + parentRange.start, + ) + expect(node.entity.byteRange.end).toBeLessThanOrEqual(parentRange.end) + } + for (const child of node.children) { + verifyByteRangeContainment(child, node.entity.byteRange) + } + } - // Class should have method children - expect(classNode?.children.length).toBe(2) - const methodNames = classNode?.children.map((c) => c.entity.name) - expect(methodNames).toContain('add') - expect(methodNames).toContain('subtract') + for (const root of tree.root) { + verifyByteRangeContainment(root) + } }) - test('separates imports from tree structure', async () => { + test('verifies children are ordered by source position', async () => { + const code = `class MultiMethod { + first() { return 1 } + second() { return 2 } + third() { return 3 } +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const classNode = tree.root.find((n) => n.entity.name === 'MultiMethod') + expect(classNode?.children).toHaveLength(3) + + // Verify ordering by byte position + if (classNode) { + for (let i = 1; i < classNode.children.length; i++) { + const prevChild = classNode.children[i - 1] + const currChild = classNode.children[i] + if (prevChild && currChild) { + expect(currChild.entity.byteRange.start).toBeGreaterThan( + prevChild.entity.byteRange.start, + ) + } + } + } + + // Verify exact order + expect(classNode?.children.map((c) => c.entity.name)).toEqual([ + 'first', + 'second', + 'third', + ]) + }) + + test('separates imports from tree structure with exact counts', async () => { const code = `import { Effect } from 'effect' import type { Option } from 'effect/Option' @@ -124,12 +235,26 @@ function test() { return 1 }` const tree = buildScopeTreeFromEntities(entities) // Imports should be in imports array, not in root - expect(tree.imports.length).toBeGreaterThan(0) - expect(tree.imports.every((e) => e.type === 'import')).toBe(true) - - // Root should have the function - const fnNode = tree.root.find((n) => n.entity.name === 'test') - expect(fnNode).toBeDefined() + expect(tree.imports).toHaveLength(2) + expect(tree.imports[0]).toMatchObject({ + type: 'import', + name: 'Effect', + source: 'effect', + }) + expect(tree.imports[1]).toMatchObject({ + type: 'import', + name: 'Option', + source: 'effect/Option', + }) + + // Root should have only the function + expect(tree.root).toHaveLength(1) + expect(tree.root[0]).toMatchObject({ + entity: { + name: 'test', + type: 'function', + }, + }) }) test('separates exports from tree structure', async () => { @@ -138,11 +263,13 @@ export default function defaultFn() { return 2 }` const entities = await getEntities(code, 'typescript') const tree = buildScopeTreeFromEntities(entities) - // Exports should be captured - expect(tree.exports.length).toBeGreaterThanOrEqual(0) // May vary by query + // Root should have functions (exports are the functions themselves) + expect(tree.root.length).toBeGreaterThanOrEqual(1) + const fnNames = tree.root.map((n) => n.entity.name) + expect(fnNames).toContain('publicFn') }) - test('handles deeply nested structures', async () => { + test('handles deeply nested structures with depth verification', async () => { const code = `class Outer { innerMethod() { function nestedFn() { @@ -156,10 +283,27 @@ export default function defaultFn() { return 2 }` // Should have class at root const outerClass = tree.root.find((n) => n.entity.name === 'Outer') - expect(outerClass).toBeDefined() + expect(outerClass).toMatchObject({ + entity: { name: 'Outer', type: 'class' }, + parent: null, + }) + + // Find innerMethod + const innerMethod = outerClass?.children.find( + (n) => n.entity.name === 'innerMethod', + ) + expect(innerMethod).toBeDefined() + expect(innerMethod?.parent).toBe(outerClass) + + // Check nesting depth via ancestor chain + if (innerMethod) { + const methodAncestors = getAncestorChain(innerMethod) + expect(methodAncestors).toHaveLength(1) + expect(methodAncestors[0]?.entity.name).toBe('Outer') + } }) - test('allEntities contains all extracted entities', async () => { + test('allEntities contains all extracted entities with exact count', async () => { const code = `import { foo } from 'bar' class MyClass { @@ -171,7 +315,9 @@ function standalone() { return 2 }` const tree = buildScopeTreeFromEntities(entities) // allEntities should have everything - expect(tree.allEntities.length).toBe(entities.length) + expect(tree.allEntities).toHaveLength(entities.length) + // Verify exact count: 1 import + 1 class + 1 method + 1 function = 4 + expect(tree.allEntities.length).toBe(4) }) test('handles empty entity list', () => { @@ -189,20 +335,30 @@ function standalone() { return 2 }` // ============================================================================ describe('buildScopeTree', () => { - test('returns Effect with scope tree', async () => { + test('returns Effect with scope tree with exact structure', async () => { const code = `function test() { return 1 }` const entities = await getEntities(code, 'typescript') const tree = await Effect.runPromise(buildScopeTree(entities)) - expect(tree.root.length).toBe(1) - expect(tree.root[0]?.entity.name).toBe('test') + expect(tree.root).toHaveLength(1) + expect(tree.root[0]).toMatchObject({ + entity: { + name: 'test', + type: 'function', + }, + parent: null, + children: [], + }) }) test('handles errors gracefully', async () => { // Even with empty input, should not fail const tree = await Effect.runPromise(buildScopeTree([])) expect(tree.root).toEqual([]) + expect(tree.imports).toEqual([]) + expect(tree.exports).toEqual([]) + expect(tree.allEntities).toEqual([]) }) }) @@ -211,18 +367,34 @@ describe('buildScopeTree', () => { // ============================================================================ describe('buildScopeTreeSync', () => { - test('builds tree synchronously', async () => { + test('builds tree synchronously with correct structure', async () => { const code = `class Foo { bar() { return 1 } }` const entities = await getEntities(code, 'typescript') const tree = buildScopeTreeSync(entities) - expect(tree.root.length).toBeGreaterThan(0) + expect(tree.root).toHaveLength(1) + expect(tree.root[0]).toMatchObject({ + entity: { + name: 'Foo', + type: 'class', + }, + }) + expect(tree.root[0]?.children).toHaveLength(1) + expect(tree.root[0]?.children[0]).toMatchObject({ + entity: { + name: 'bar', + type: 'method', + }, + }) }) test('handles empty input', () => { const tree = buildScopeTreeSync([]) expect(tree.root).toEqual([]) + expect(tree.imports).toEqual([]) + expect(tree.exports).toEqual([]) + expect(tree.allEntities).toEqual([]) }) }) @@ -231,7 +403,7 @@ describe('buildScopeTreeSync', () => { // ============================================================================ describe('findScopeAtOffset', () => { - test('finds scope node containing offset', async () => { + test('finds scope node containing offset with exact match', async () => { const code = `class Calculator { add(a: number, b: number): number { return a + b @@ -244,15 +416,15 @@ describe('findScopeAtOffset', () => { const addMethod = entities.find( (e) => e.name === 'add' && e.type === 'method', ) - if (addMethod) { - const midpoint = Math.floor( - (addMethod.byteRange.start + addMethod.byteRange.end) / 2, - ) - const scope = findScopeAtOffset(tree, midpoint) - - expect(scope).not.toBeNull() - expect(scope?.entity.name).toBe('add') - } + expect(addMethod).toBeDefined() + const midpoint = Math.floor( + (addMethod?.byteRange.start + addMethod?.byteRange.end) / 2, + ) + const scope = findScopeAtOffset(tree, midpoint) + + expect(scope).not.toBeNull() + expect(scope?.entity.name).toBe('add') + expect(scope?.entity.type).toBe('method') }) test('finds deepest scope when nested', async () => { @@ -266,12 +438,36 @@ describe('findScopeAtOffset', () => { // Find method's byte range const method = entities.find((e) => e.name === 'method') - if (method) { - const offset = method.byteRange.start + 5 // Inside method - const scope = findScopeAtOffset(tree, offset) + expect(method).toBeDefined() + const offset = method?.byteRange.start + 5 // Inside method + const scope = findScopeAtOffset(tree, offset) + + // Should find the method, not the class + expect(scope).toMatchObject({ + entity: { + name: 'method', + type: 'method', + }, + }) + }) - // Should find the method, not the class - expect(scope?.entity.name).toBe('method') + test('finds class scope at offset before method starts', async () => { + const code = `class Outer { + method() { return 1 } +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const classEntity = entities.find((e) => e.name === 'Outer') + const methodEntity = entities.find((e) => e.name === 'method') + expect(classEntity).toBeDefined() + expect(methodEntity).toBeDefined() + + // Offset at start of class but before method + const offsetInClass = classEntity?.byteRange.start + 1 + if (offsetInClass < methodEntity?.byteRange.start) { + const scope = findScopeAtOffset(tree, offsetInClass) + expect(scope?.entity.name).toBe('Outer') } }) @@ -285,6 +481,15 @@ describe('findScopeAtOffset', () => { expect(scope).toBeNull() }) + test('returns null for negative offset', async () => { + const code = `function test() { return 1 }` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const scope = findScopeAtOffset(tree, -1) + expect(scope).toBeNull() + }) + test('returns null for empty tree', () => { const tree: ScopeTree = { root: [], @@ -296,6 +501,26 @@ describe('findScopeAtOffset', () => { const scope = findScopeAtOffset(tree, 0) expect(scope).toBeNull() }) + + test('finds correct scope at exact boundary', async () => { + const code = `function first() { return 1 } +function second() { return 2 }` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const first = entities.find((e) => e.name === 'first') + const second = entities.find((e) => e.name === 'second') + expect(first).toBeDefined() + expect(second).toBeDefined() + + // At exact start of first function + const scopeAtStart = findScopeAtOffset(tree, first?.byteRange.start) + expect(scopeAtStart?.entity.name).toBe('first') + + // At exact start of second function + const scopeAtSecondStart = findScopeAtOffset(tree, second?.byteRange.start) + expect(scopeAtSecondStart?.entity.name).toBe('second') + }) }) // ============================================================================ @@ -309,13 +534,12 @@ describe('getAncestorChain', () => { const tree = buildScopeTreeFromEntities(entities) const fnNode = tree.root[0] - if (fnNode) { - const ancestors = getAncestorChain(fnNode) - expect(ancestors).toEqual([]) - } + expect(fnNode).toBeDefined() + const ancestors = getAncestorChain(fnNode!) + expect(ancestors).toEqual([]) }) - test('returns parent chain for nested node', async () => { + test('returns parent chain for nested node with exact length', async () => { const code = `class Outer { method() { return 1 } }` @@ -328,10 +552,36 @@ describe('getAncestorChain', () => { (n) => n.entity.name === 'method', ) - if (methodNode) { - const ancestors = getAncestorChain(methodNode) - expect(ancestors.length).toBe(1) - expect(ancestors[0]?.entity.name).toBe('Outer') + expect(methodNode).toBeDefined() + const ancestors = getAncestorChain(methodNode!) + expect(ancestors).toHaveLength(1) + expect(ancestors[0]).toBe(classNode) + expect(ancestors[0]?.entity.name).toBe('Outer') + }) + + test('returns correct ancestor chain for deeply nested node', async () => { + const code = `class Level1 { + level2Method() { + function level3() { + return 1 + } + return level3() + } +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const level1 = tree.root.find((n) => n.entity.name === 'Level1') + const level2 = level1?.children.find( + (n) => n.entity.name === 'level2Method', + ) + const level3 = level2?.children.find((n) => n.entity.name === 'level3') + + if (level3) { + const ancestors = getAncestorChain(level3) + expect(ancestors).toHaveLength(2) + expect(ancestors[0]?.entity.name).toBe('level2Method') + expect(ancestors[1]?.entity.name).toBe('Level1') } }) }) @@ -341,7 +591,7 @@ describe('getAncestorChain', () => { // ============================================================================ describe('flattenScopeTree', () => { - test('flattens tree to array of all scope nodes', async () => { + test('flattens tree to array of all scope nodes with exact count', async () => { const code = `class Outer { method1() { return 1 } method2() { return 2 } @@ -353,7 +603,9 @@ function standalone() { return 3 }` const flattened = flattenScopeTree(tree) - // Should include class, both methods, and standalone function + // Should include class, both methods, and standalone function = 4 + expect(flattened).toHaveLength(4) + const names = flattened.map((n) => n.entity.name) expect(names).toContain('Outer') expect(names).toContain('method1') @@ -361,6 +613,23 @@ function standalone() { return 3 }` expect(names).toContain('standalone') }) + test('flattens in DFS order', async () => { + const code = `class Parent { + child1() { return 1 } + child2() { return 2 } +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const flattened = flattenScopeTree(tree) + const names = flattened.map((n) => n.entity.name) + + // DFS: Parent first, then its children + expect(names[0]).toBe('Parent') + expect(names).toContain('child1') + expect(names).toContain('child2') + }) + test('returns empty array for empty tree', () => { const tree: ScopeTree = { root: [], @@ -379,7 +648,7 @@ function standalone() { return 3 }` // ============================================================================ describe('parent/child relationships', () => { - test('child nodes have parent reference set', async () => { + test('child nodes have parent reference set correctly', async () => { const code = `class Parent { child() { return 1 } }` @@ -387,9 +656,12 @@ describe('parent/child relationships', () => { const tree = buildScopeTreeFromEntities(entities) const parentNode = tree.root.find((n) => n.entity.name === 'Parent') - const childNode = parentNode?.children[0] + expect(parentNode).toBeDefined() + expect(parentNode?.children).toHaveLength(1) + const childNode = parentNode?.children[0] expect(childNode?.parent).toBe(parentNode) + expect(childNode?.parent?.entity.name).toBe('Parent') }) test('root nodes have null parent', async () => { @@ -397,8 +669,44 @@ describe('parent/child relationships', () => { const entities = await getEntities(code, 'typescript') const tree = buildScopeTreeFromEntities(entities) + expect(tree.root).toHaveLength(1) expect(tree.root[0]?.parent).toBeNull() }) + + test('entity.parent string field matches scope parent', async () => { + const code = `class Container { + contained() { return 1 } +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const containerNode = tree.root.find((n) => n.entity.name === 'Container') + const containedNode = containerNode?.children[0] + + // The ScopeNode parent reference + expect(containedNode?.parent?.entity.name).toBe('Container') + + // The ExtractedEntity.parent string field (set during extraction) + expect(containedNode?.entity.parent).toBe('Container') + }) + + test('deeply nested parent references are correct', async () => { + const code = `class Level1 { + level2() { + function level3() { return 1 } + } +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const level1 = tree.root.find((n) => n.entity.name === 'Level1') + const level2 = level1?.children[0] + const level3 = level2?.children[0] + + expect(level1?.parent).toBeNull() + expect(level2?.parent).toBe(level1) + expect(level3?.parent).toBe(level2) + }) }) // ============================================================================ @@ -406,7 +714,7 @@ describe('parent/child relationships', () => { // ============================================================================ describe('multi-language scope trees', () => { - test('builds scope tree for Python', async () => { + test('builds scope tree for Python with exact structure', async () => { const code = `class Calculator: def add(self, a, b): return a + b @@ -417,11 +725,30 @@ describe('multi-language scope trees', () => { const tree = buildScopeTreeFromEntities(entities) const cls = tree.root.find((n) => n.entity.name === 'Calculator') - expect(cls).toBeDefined() - expect(cls?.children.length).toBe(2) + expect(cls).toMatchObject({ + entity: { + name: 'Calculator', + type: 'class', + }, + parent: null, + }) + expect(cls?.children).toHaveLength(2) + + // Verify Python methods are nested under class (Python extracts methods as 'function' type) + expect(cls?.children[0]?.entity.type).toBe('function') + expect(cls?.children[1]?.entity.type).toBe('function') + expect(cls?.children[0]?.entity.name).toBe('add') + expect(cls?.children[1]?.entity.name).toBe('subtract') + + // Verify byte range containment + const classRange = cls?.entity.byteRange + for (const child of cls?.children) { + expect(child.entity.byteRange.start).toBeGreaterThan(classRange.start) + expect(child.entity.byteRange.end).toBeLessThanOrEqual(classRange.end) + } }) - test('builds scope tree for Rust', async () => { + test('builds scope tree for Rust with struct and impl', async () => { const code = `struct Calculator {} impl Calculator { @@ -434,9 +761,15 @@ impl Calculator { // Should have struct and/or impl at root expect(tree.root.length).toBeGreaterThan(0) + + // Check for function in the tree + const flattened = flattenScopeTree(tree) + const addFn = flattened.find((n) => n.entity.name === 'add') + expect(addFn).toBeDefined() + expect(addFn?.entity.type).toBe('function') }) - test('builds scope tree for Go', async () => { + test('builds scope tree for Go with exact function count', async () => { const code = `package main func add(a, b int) int { @@ -450,12 +783,18 @@ func subtract(a, b int) int { const tree = buildScopeTreeFromEntities(entities) // Should have both functions at root + expect(tree.root).toHaveLength(2) const fnNames = tree.root.map((n) => n.entity.name) - expect(fnNames).toContain('add') - expect(fnNames).toContain('subtract') + expect(fnNames).toEqual(['add', 'subtract']) + + // Go functions should have no nesting + for (const node of tree.root) { + expect(node.parent).toBeNull() + expect(node.children).toHaveLength(0) + } }) - test('builds scope tree for Java', async () => { + test('builds scope tree for Java with class nesting', async () => { const code = `public class Calculator { public int add(int a, int b) { return a + b; @@ -464,8 +803,40 @@ func subtract(a, b int) int { const entities = await getEntities(code, 'java') const tree = buildScopeTreeFromEntities(entities) - const cls = tree.root.find((n) => n.entity.name === 'Calculator') - expect(cls).toBeDefined() + expect(tree.root).toHaveLength(1) + const cls = tree.root[0] + expect(cls).toMatchObject({ + entity: { + name: 'Calculator', + type: 'class', + }, + parent: null, + }) + + // Method should be nested under class + expect(cls?.children).toHaveLength(1) + expect(cls?.children[0]).toMatchObject({ + entity: { + name: 'add', + type: 'method', + }, + }) + expect(cls?.children[0]?.parent).toBe(cls) + }) + + test('JavaScript class has proper nesting', async () => { + const code = `class MyClass { + myMethod() { + return 42 + } +}` + const entities = await getEntities(code, 'javascript') + const tree = buildScopeTreeFromEntities(entities) + + expect(tree.root).toHaveLength(1) + expect(tree.root[0]?.entity.name).toBe('MyClass') + expect(tree.root[0]?.children).toHaveLength(1) + expect(tree.root[0]?.children[0]?.entity.name).toBe('myMethod') }) }) @@ -474,30 +845,33 @@ func subtract(a, b int) int { // ============================================================================ describe('context attachment', () => { - test('getEntitiesInRange returns entities with isPartial flag', async () => { + test('getEntitiesInRange returns entities with exact isPartial=false for full range', async () => { const code = `function foo() { return 1 } function bar() { return 2 } function baz() { return 3 }` const entities = await getEntities(code, 'typescript') const tree = buildScopeTreeFromEntities(entities) - // Import the function we need to test const { getEntitiesInRange } = await import('../src/context/index') // Get entities for a range that fully contains 'bar' but not 'foo' or 'baz' const barEntity = entities.find((e) => e.name === 'bar') - if (barEntity) { - const entitiesInRange = getEntitiesInRange(barEntity.byteRange, tree) - - // Should find bar - const bar = entitiesInRange.find((e) => e.name === 'bar') - expect(bar).toBeDefined() - // bar should NOT be partial since we're using its exact range - expect(bar?.isPartial).toBe(false) - } + expect(barEntity).toBeDefined() + + const entitiesInRange = getEntitiesInRange(barEntity?.byteRange, tree) + + // Should find bar + const bar = entitiesInRange.find((e) => e.name === 'bar') + expect(bar).toBeDefined() + // bar should NOT be partial since we're using its exact range + expect(bar?.isPartial).toBe(false) + + // Should not find foo or baz (non-overlapping) + expect(entitiesInRange.find((e) => e.name === 'foo')).toBeUndefined() + expect(entitiesInRange.find((e) => e.name === 'baz')).toBeUndefined() }) - test('getEntitiesInRange marks partial entities correctly', async () => { + test('getEntitiesInRange marks partial entities correctly with exact values', async () => { const code = `class BigClass { method1() { return 1 } method2() { return 2 } @@ -510,19 +884,50 @@ function baz() { return 3 }` // Get just method2's range - this should be inside BigClass const method2 = entities.find((e) => e.name === 'method2') - if (method2) { - const entitiesInRange = getEntitiesInRange(method2.byteRange, tree) + expect(method2).toBeDefined() - // method2 should not be partial (its full range is included) - const m2 = entitiesInRange.find((e) => e.name === 'method2') - expect(m2?.isPartial).toBe(false) + const entitiesInRange = getEntitiesInRange(method2?.byteRange, tree) - // BigClass should be partial (we only have a slice of it) - const cls = entitiesInRange.find((e) => e.name === 'BigClass') - if (cls) { - expect(cls.isPartial).toBe(true) - } + // method2 should not be partial (its full range is included) + const m2 = entitiesInRange.find((e) => e.name === 'method2') + expect(m2).toBeDefined() + expect(m2?.isPartial).toBe(false) + + // BigClass should be partial (we only have a slice of it) + const cls = entitiesInRange.find((e) => e.name === 'BigClass') + expect(cls).toBeDefined() + expect(cls?.isPartial).toBe(true) + + // method1 and method3 should not be in range + expect(entitiesInRange.find((e) => e.name === 'method1')).toBeUndefined() + expect(entitiesInRange.find((e) => e.name === 'method3')).toBeUndefined() + }) + + test('getEntitiesInRange isPartial is true when range cuts through entity', async () => { + const code = `function longFunction() { + const a = 1 + const b = 2 + const c = 3 + return a + b + c +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const { getEntitiesInRange } = await import('../src/context/index') + + const fn = entities.find((e) => e.name === 'longFunction') + expect(fn).toBeDefined() + + // Range that cuts through the function (starts at function start, ends before function end) + const partialRange = { + start: fn?.byteRange.start, + end: fn?.byteRange.start + 20, } + const entitiesInRange = getEntitiesInRange(partialRange, tree) + + const longFn = entitiesInRange.find((e) => e.name === 'longFunction') + expect(longFn).toBeDefined() + expect(longFn?.isPartial).toBe(true) }) test('getEntitiesInRange includes docstring and lineRange', async () => { @@ -538,85 +943,153 @@ function documented() { const { getEntitiesInRange } = await import('../src/context/index') const fn = entities.find((e) => e.name === 'documented') - if (fn) { - const entitiesInRange = getEntitiesInRange(fn.byteRange, tree) - const docFn = entitiesInRange.find((e) => e.name === 'documented') - - expect(docFn).toBeDefined() - expect(docFn?.lineRange).toBeDefined() - // Docstring should be present if extracted - if (fn.docstring) { - expect(docFn?.docstring).toContain('test function') - } + expect(fn).toBeDefined() + + const entitiesInRange = getEntitiesInRange(fn?.byteRange, tree) + const docFn = entitiesInRange.find((e) => e.name === 'documented') + + expect(docFn).toBeDefined() + expect(docFn?.lineRange).toMatchObject({ + start: expect.any(Number), + end: expect.any(Number), + }) + expect(docFn?.isPartial).toBe(false) + + // Docstring should be present if extracted + if (fn?.docstring) { + expect(docFn?.docstring).toContain('test function') } }) - test('attachContext includes filepath and language', async () => { - const { Effect } = await import('effect') - const { attachContext } = await import('../src/context/index') + test('getEntitiesInRange returns empty array for non-overlapping range', async () => { + const code = `function only() { return 1 }` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + const { getEntitiesInRange } = await import('../src/context/index') + + // Range completely after the function + const nonOverlappingRange = { start: 1000, end: 2000 } + const entitiesInRange = getEntitiesInRange(nonOverlappingRange, tree) + + expect(entitiesInRange).toEqual([]) + }) +}) + +// ============================================================================ +// Imports/Exports Array Tests +// ============================================================================ + +describe('imports and exports arrays', () => { + test('imports array has exact count and structure', async () => { + const code = `import { a, b, c } from 'module1' +import defaultExport from 'module2' +import * as namespace from 'module3' - const code = `function test() { return 1 }` +function main() { return 1 }` const entities = await getEntities(code, 'typescript') const tree = buildScopeTreeFromEntities(entities) - const fn = entities[0] - if (fn) { - const mockText = { - text: code, - byteRange: { start: 0, end: code.length }, - lineRange: { start: 0, end: 0 }, - } + // Count depends on how parser extracts individual imports + expect(tree.imports.length).toBeGreaterThan(0) - const chunk = await Effect.runPromise( - attachContext({ - text: mockText, - scopeTree: tree, - options: {}, - index: 0, - totalChunks: 1, - filepath: 'test.ts', - language: 'typescript', - }), - ) - - expect(chunk.context.filepath).toBe('test.ts') - expect(chunk.context.language).toBe('typescript') + // All items in imports array should be import type + for (const imp of tree.imports) { + expect(imp.type).toBe('import') + expect(imp.source).toBeDefined() } }) - test('attachContext respects contextMode none', async () => { - const { Effect } = await import('effect') - const { attachContext } = await import('../src/context/index') + test('exports array captures export declarations', async () => { + const code = `export const x = 1 +export function exported() { return 2 } +export class ExportedClass {}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) - const code = `function test() { return 1 }` + // Root should contain the exported items + expect(tree.root.length).toBeGreaterThan(0) + + // Should find exported function in root + const exportedFn = tree.root.find((n) => n.entity.name === 'exported') + expect(exportedFn).toBeDefined() + }) + + test('imports are not included in root tree', async () => { + const code = `import { helper } from './helper' + +function main() { return helper() }` const entities = await getEntities(code, 'typescript') const tree = buildScopeTreeFromEntities(entities) - const mockText = { - text: code, - byteRange: { start: 0, end: code.length }, - lineRange: { start: 0, end: 0 }, + // Imports should be in imports array + expect(tree.imports).toHaveLength(1) + expect(tree.imports[0]?.name).toBe('helper') + + // Root should only have the function + expect(tree.root).toHaveLength(1) + expect(tree.root[0]?.entity.name).toBe('main') + + // Verify imports are not in root + const importInRoot = tree.root.find((n) => n.entity.type === 'import') + expect(importInRoot).toBeUndefined() + }) +}) + +// ============================================================================ +// Nesting Depth Verification Tests +// ============================================================================ + +describe('nesting depth verification', () => { + test('verifies exact nesting depth for complex structures', async () => { + const code = `class Outer { + innerMethod() { + function nested() { + return 1 + } + } +}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) + + // Calculate depth for each node + const getDepth = (node: (typeof tree.root)[0]): number => { + let depth = 0 + let current = node.parent + while (current) { + depth++ + current = current.parent + } + return depth } - const chunk = await Effect.runPromise( - attachContext({ - text: mockText, - scopeTree: tree, - options: { contextMode: 'none' }, - index: 0, - totalChunks: 1, - filepath: 'test.ts', - language: 'typescript', - }), - ) + const flattened = flattenScopeTree(tree) + + const outer = flattened.find((n) => n.entity.name === 'Outer') + const innerMethod = flattened.find((n) => n.entity.name === 'innerMethod') + const nested = flattened.find((n) => n.entity.name === 'nested') + + expect(outer).toBeDefined() + expect(innerMethod).toBeDefined() + + expect(getDepth(outer!)).toBe(0) + expect(getDepth(innerMethod!)).toBe(1) + if (nested) { + expect(getDepth(nested)).toBe(2) + } + }) + + test('multiple top-level items all have depth 0', async () => { + const code = `function fn1() {} +function fn2() {} +class Cls1 {} +class Cls2 {}` + const entities = await getEntities(code, 'typescript') + const tree = buildScopeTreeFromEntities(entities) - // Even in 'none' mode, filepath and language should be present - expect(chunk.context.filepath).toBe('test.ts') - expect(chunk.context.language).toBe('typescript') - // But scope, entities, siblings, imports should be empty - expect(chunk.context.scope).toEqual([]) - expect(chunk.context.entities).toEqual([]) - expect(chunk.context.siblings).toEqual([]) - expect(chunk.context.imports).toEqual([]) + expect(tree.root).toHaveLength(4) + for (const node of tree.root) { + expect(node.parent).toBeNull() + } }) })