Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion packages/code-chunk/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"url": "git+https://github.com/supermemoryai/code-chunk.git"
},
"scripts": {
"build": "bunup",
"build": "bunup src/index.ts src/wasm.ts",
"dev": "bunup --watch",
"release": "bumpp --commit --push --tag",
"test": "bun test",
Expand Down Expand Up @@ -46,6 +46,14 @@
"default": "./dist/index.js"
}
},
"./wasm": {
"types": "./src/wasm.ts",
"bun": "./src/wasm.ts",
"import": {
"types": "./dist/wasm.d.ts",
"default": "./dist/wasm.js"
}
},
"./package.json": "./package.json"
},
"module": "./dist/index.js",
Expand Down
125 changes: 11 additions & 114 deletions packages/code-chunk/src/parser/index.ts
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
import { Effect } from 'effect'
import {
Parser,
type Node as TSNode,
type Tree as TSTree,
} from 'web-tree-sitter'
import { Parser } from 'web-tree-sitter'
import type { Language, ParseError, ParseResult } from '../types'
import {
clearGrammarCache,
type GrammarLoadError,
getLanguageGrammar,
} from './languages'
import { buildParseResult } from './shared'

// Re-export language utilities
export {
clearGrammarCache,
detectLanguage,
GrammarLoadError,
LANGUAGE_EXTENSIONS,
loadGrammar,
} from './languages'
export {
buildParseResult,
getParseErrorMessage,
hasParseErrors,
} from './shared'

/**
* Error thrown when parser initialization fails
*/
export class ParserInitError extends Error {
readonly _tag = 'ParserInitError'
override readonly cause?: unknown
Expand All @@ -34,16 +32,8 @@ export class ParserInitError extends Error {
}
}

/**
* Flag to track if tree-sitter has been initialized
*/
let initialized: boolean = false

/**
* Initialize the tree-sitter WASM module
*
* @returns Effect that initializes tree-sitter
*/
export function initParser(): Effect.Effect<void, ParserInitError> {
return Effect.gen(function* () {
if (initialized) {
Expand All @@ -60,99 +50,28 @@ export function initParser(): Effect.Effect<void, ParserInitError> {
})
}

/**
* Check if a parse tree has errors
*/
function hasParseErrors(tree: TSTree): boolean {
return tree.rootNode.hasError
}

/**
* Get error message from a tree with errors
*/
function getParseErrorMessage(tree: TSTree): string {
const errorNodes: string[] = []

function findErrors(node: TSNode) {
if (node.isError || node.isMissing) {
const pos = node.startPosition
errorNodes.push(
`${node.isError ? 'ERROR' : 'MISSING'} at line ${pos.row + 1}, column ${pos.column + 1}`,
)
}
for (const child of node.children) {
findErrors(child)
}
}

findErrors(tree.rootNode)
return errorNodes.length > 0
? errorNodes.slice(0, 3).join('; ') +
(errorNodes.length > 3 ? `; ... and ${errorNodes.length - 3} more` : '')
: 'Unknown parse error'
}

/**
* Parse source code into an AST
*
* Uses Effect internally for error handling. Tree-sitter always produces a tree
* even with syntax errors (recoverable parsing).
*
* @param parser - The tree-sitter parser instance
* @param code - The source code to parse
* @param language - The programming language
* @returns Effect resolving to ParseResult
*/
export function parse(
parser: Parser,
code: string,
language: Language,
): Effect.Effect<ParseResult, ParseError | GrammarLoadError> {
return Effect.gen(function* () {
// Load and set the language grammar
const grammar = yield* getLanguageGrammar(language)
parser.setLanguage(grammar)

// Parse the code
const tree = parser.parse(code)
const result = buildParseResult(tree)

if (!tree) {
return yield* Effect.fail({
message: 'Parser returned null - no language set or parsing cancelled',
recoverable: false,
} satisfies ParseError)
if (result.error && !result.error.recoverable) {
return yield* Effect.fail(result.error)
}

// Check for parse errors
if (hasParseErrors(tree)) {
return {
tree,
error: {
message: getParseErrorMessage(tree),
recoverable: true, // Tree-sitter always produces a tree
},
} satisfies ParseResult
}

return {
tree,
error: null,
} satisfies ParseResult
return result
})
}

// ============================================================================
// Public API - Unwraps Effect for consumers
// ============================================================================

/**
* Shared parser instance for the public API
*/
let sharedParser: Parser | null = null

/**
* Get or create the shared parser instance
*/
async function getSharedParser(): Promise<Parser> {
if (sharedParser) {
return sharedParser
Expand All @@ -163,14 +82,6 @@ async function getSharedParser(): Promise<Parser> {
return sharedParser
}

/**
* Parse source code into an AST (public async API)
*
* @param code - The source code to parse
* @param language - The programming language
* @returns Promise resolving to ParseResult
* @throws ParseError or GrammarLoadError if parsing fails irrecoverably
*/
export async function parseCode(
code: string,
language: Language,
Expand All @@ -179,24 +90,10 @@ export async function parseCode(
return Effect.runPromise(parse(parser, code, language))
}

/**
* Initialize the parser module (public async API)
*
* Call this before using other parser functions to ensure tree-sitter is ready.
* This is called automatically by parseCode, but can be called explicitly for
* early initialization.
*
* @returns Promise that resolves when initialization is complete
* @throws ParserInitError if initialization fails
*/
export async function initializeParser(): Promise<void> {
await getSharedParser()
}

/**
* Reset the shared parser state (useful for testing)
* Also clears the grammar cache to ensure clean reinitialization
*/
export function resetParser(): void {
if (sharedParser) {
sharedParser.delete()
Expand Down
52 changes: 52 additions & 0 deletions packages/code-chunk/src/parser/shared.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import type { Node as TSNode, Tree as TSTree } from 'web-tree-sitter'
import type { ParseResult } from '../types'

export function hasParseErrors(tree: TSTree): boolean {
return tree.rootNode.hasError
}

export function getParseErrorMessage(tree: TSTree): string {
const errorNodes: string[] = []

function findErrors(node: TSNode) {
if (node.isError || node.isMissing) {
const pos = node.startPosition
errorNodes.push(
`${node.isError ? 'ERROR' : 'MISSING'} at line ${pos.row + 1}, column ${pos.column + 1}`,
)
}
for (const child of node.children) {
findErrors(child)
}
}

findErrors(tree.rootNode)
return errorNodes.length > 0
? errorNodes.slice(0, 3).join('; ') +
(errorNodes.length > 3 ? `; ... and ${errorNodes.length - 3} more` : '')
: 'Unknown parse error'
}

export function buildParseResult(tree: TSTree | null): ParseResult {
if (!tree) {
return {
tree: undefined as unknown as TSTree,
error: {
message: 'Parser returned null - no language set or parsing cancelled',
recoverable: false,
},
}
}

if (hasParseErrors(tree)) {
return {
tree,
error: {
message: getParseErrorMessage(tree),
recoverable: true,
},
}
}

return { tree, error: null }
}
Loading
Loading