Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion biome.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,23 @@
"enabled": true,
"useIgnoreFile": true,
"clientKind": "git"
}
},
"overrides": [
{
"includes": ["test/**/*.ts"],
"linter": {
"rules": {
"style": {
"noNonNullAssertion": "off"
},
"suspicious": {
"noNonNullAssertedOptionalChain": "off"
},
"correctness": {
"noUnsafeOptionalChaining": "off"
}
}
}
}
]
}
187 changes: 183 additions & 4 deletions src/chunk.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
import { Effect } from 'effect'
import { chunk as chunkInternal } from './chunking'
import { Effect, Stream } from 'effect'
import {
chunk as chunkInternal,
streamChunks as streamChunksInternal,
} from './chunking'
import { extractEntities } from './extract'
import { parseCode } from './parser'
import { detectLanguage } from './parser/languages'
import { buildScopeTree } from './scope'
import type { Chunk, ChunkOptions, Language } from './types'
import type {
Chunk,
ChunkOptions,
Language,
ParseResult,
ScopeTree,
} from './types'

/**
* Error thrown when chunking fails
Expand Down Expand Up @@ -74,14 +83,15 @@ const chunkEffect = (
new ChunkingError('Failed to build scope tree', error),
)

// Step 5: Chunk the code
// Step 5: Chunk the code (passing filepath for context)
const chunks = yield* Effect.mapError(
chunkInternal(
parseResult.tree.rootNode,
code,
scopeTree,
language,
options,
filepath,
),
(error: unknown) => new ChunkingError('Failed to chunk code', error),
)
Expand Down Expand Up @@ -133,3 +143,172 @@ export async function chunk(
): Promise<Chunk[]> {
return Effect.runPromise(chunkEffect(filepath, code, options))
}

/**
* Prepare the chunking pipeline (parse, extract, build scope tree)
* Returns the parsed result and scope tree needed for chunking
*/
const prepareChunking = (
filepath: string,
code: string,
options?: ChunkOptions,
): Effect.Effect<
{ parseResult: ParseResult; scopeTree: ScopeTree; language: Language },
ChunkingError | UnsupportedLanguageError
> => {
return Effect.gen(function* () {
// Step 1: Detect language (or use override)
const language: Language | null =
options?.language ?? detectLanguage(filepath)

if (!language) {
return yield* Effect.fail(new UnsupportedLanguageError(filepath))
}

// Step 2: Parse the code
const parseResult = yield* Effect.tryPromise({
try: () => parseCode(code, language),
catch: (error: unknown) =>
new ChunkingError('Failed to parse code', error),
})

// Step 3: Extract entities from AST
const entities = yield* Effect.mapError(
extractEntities(parseResult.tree.rootNode, language, code),
(error: unknown) =>
new ChunkingError('Failed to extract entities', error),
)

// Step 4: Build scope tree
const scopeTree = yield* Effect.mapError(
buildScopeTree(entities),
(error: unknown) =>
new ChunkingError('Failed to build scope tree', error),
)

return { parseResult, scopeTree, language }
})
}

/**
* Create an Effect Stream that yields chunks
*
* This is the Effect-native streaming API. Use this if you're working
* within the Effect ecosystem and want full composability.
*
* @param filepath - The file path (used for language detection)
* @param code - The source code to chunk
* @param options - Optional chunking configuration
* @returns Effect Stream of chunks with context
*
* @example
* ```ts
* import { chunkStreamEffect } from 'astchunk'
* import { Effect, Stream } from 'effect'
*
* const program = Stream.runForEach(
* chunkStreamEffect('src/utils.ts', sourceCode),
* (chunk) => Effect.log(chunk.text)
* )
*
* Effect.runPromise(program)
* ```
*/
export const chunkStreamEffect = (
filepath: string,
code: string,
options?: ChunkOptions,
): Stream.Stream<Chunk, ChunkingError | UnsupportedLanguageError> => {
return Stream.unwrap(
Effect.map(prepareChunking(filepath, code, options), (prepared) => {
const { parseResult, scopeTree, language } = prepared

// Create stream from the internal generator
return Stream.fromAsyncIterable(
streamChunksInternal(
parseResult.tree.rootNode,
code,
scopeTree,
language,
options,
filepath,
),
(error) => new ChunkingError('Stream iteration failed', error),
).pipe(
// Attach parse error to chunks if present
Stream.map((chunk) =>
parseResult.error
? {
...chunk,
context: {
...chunk.context,
parseError: parseResult.error,
},
}
: chunk,
),
)
}),
)
}

/**
* Stream source code chunks as they are generated
*
* This function returns an async generator that yields chunks one at a time,
* which is useful for processing large files without waiting for all chunks
* to be generated.
*
* @param filepath - The file path (used for language detection)
* @param code - The source code to chunk
* @param options - Optional chunking configuration
* @returns Async generator of chunks with context
* @throws ChunkingError if chunking fails
* @throws UnsupportedLanguageError if the file type is not supported
*
* @example
* ```ts
* import { chunkStream } from 'astchunk'
*
* for await (const chunk of chunkStream('src/utils.ts', sourceCode)) {
* console.log(chunk.text, chunk.context)
* }
* ```
*/
export async function* chunkStream(
filepath: string,
code: string,
options?: ChunkOptions,
): AsyncGenerator<Chunk> {
// Prepare the chunking pipeline
const prepared = await Effect.runPromise(
prepareChunking(filepath, code, options),
)

const { parseResult, scopeTree, language } = prepared

// Stream chunks from the internal generator
const chunkGenerator = streamChunksInternal(
parseResult.tree.rootNode,
code,
scopeTree,
language,
options,
filepath,
)

// Yield chunks, optionally attaching parse error if present
for await (const chunk of chunkGenerator) {
if (parseResult.error) {
yield {
...chunk,
context: {
...chunk.context,
parseError: parseResult.error,
},
}
} else {
yield chunk
}
}
}
75 changes: 29 additions & 46 deletions src/chunker.ts
Original file line number Diff line number Diff line change
@@ -1,97 +1,80 @@
import { chunk } from './chunk'
import { chunk as chunkFn, chunkStream as streamFn } from './chunk'
import { DEFAULT_CHUNK_OPTIONS } from './chunking'
import type { Chunk, Chunker, ChunkOptions } from './types'

/**
* Default options for the chunker
*/
const DEFAULT_OPTIONS: ChunkOptions = {
maxChunkSize: 4096,
contextMode: 'full',
siblingDetail: 'signatures',
filterImports: false,
}

/**
* Implementation of the Chunker interface
*
* Provides a stateful wrapper around the chunk function that:
* - Stores default options
* - Tracks the filepath for language detection
* Provides a stateful wrapper around the chunk and stream functions that
* stores default options and allows per-call overrides.
*/
class ChunkerImpl implements Chunker {
private readonly filepath: string
private readonly defaultOptions: ChunkOptions

constructor(filepath: string, options: ChunkOptions = {}) {
this.filepath = filepath
this.defaultOptions = { ...DEFAULT_OPTIONS, ...options }
constructor(options: ChunkOptions = {}) {
this.defaultOptions = { ...DEFAULT_CHUNK_OPTIONS, ...options }
}

/**
* Chunk source code into pieces with context
*
* @param source - The source code to chunk
* @param filepath - The file path (used for language detection)
* @param code - The source code to chunk
* @param options - Optional overrides for chunking options
* @returns Promise resolving to array of chunks
*/
async chunk(source: string, options?: ChunkOptions): Promise<Chunk[]> {
async chunk(
filepath: string,
code: string,
options?: ChunkOptions,
): Promise<Chunk[]> {
const mergedOptions = { ...this.defaultOptions, ...options }
return chunk(this.filepath, source, mergedOptions)
return chunkFn(filepath, code, mergedOptions)
}

/**
* Stream chunks as they are generated
*
* @param source - The source code to chunk
* @param filepath - The file path (used for language detection)
* @param code - The source code to chunk
* @param options - Optional overrides for chunking options
* @returns Async iterable of chunks
*
* TODO: Implement true streaming - for now, this just iterates the array
*/
async *stream(source: string, options?: ChunkOptions): AsyncIterable<Chunk> {
async *stream(
filepath: string,
code: string,
options?: ChunkOptions,
): AsyncIterable<Chunk> {
const mergedOptions = { ...this.defaultOptions, ...options }
const chunks = await chunk(this.filepath, source, mergedOptions)

for (const c of chunks) {
yield c
}
yield* streamFn(filepath, code, mergedOptions)
}
}

/**
* Create a new Chunker instance for a specific file
* Create a new Chunker instance with default options
*
* The Chunker provides a convenient interface for chunking source code
* with pre-configured options. It's particularly useful when you need to
* chunk multiple versions of the same file or want to stream chunks.
* chunk multiple files with the same configuration.
*
* @param filepath - The file path (used for language detection)
* @param options - Default options for all chunking operations
* @returns A Chunker instance
*
* @example
* ```ts
* import { createChunker } from 'astchunk'
*
* const chunker = createChunker('src/utils.ts', { maxChunkSize: 2048 })
* const chunker = createChunker({ maxChunkSize: 2048 })
*
* // Chunk synchronously
* const chunks = await chunker.chunk(sourceCode)
* const chunks = await chunker.chunk('src/utils.ts', sourceCode)
*
* // Or stream chunks
* for await (const chunk of chunker.stream(sourceCode)) {
* for await (const chunk of chunker.stream('src/utils.ts', sourceCode)) {
* process.stdout.write(chunk.text)
* }
* ```
*/
export function createChunker(
filepath: string,
options?: ChunkOptions,
): Chunker {
return new ChunkerImpl(filepath, options)
export function createChunker(options?: ChunkOptions): Chunker {
return new ChunkerImpl(options)
}

/**
* Re-export the Chunker type for convenience
*/
export type { Chunker } from './types'
Loading
Loading