From aed0e2161a17fa1920eaec6e25e2ba79c2ea122b Mon Sep 17 00:00:00 2001
From: Shoubhit Dash <shoubhit2005@gmail.com>
Date: Wed, 17 Dec 2025 01:32:23 +0530
Subject: [PATCH 1/2] feat: implement entity extraction system with queries,
 signatures, docstrings

---
 src/extract/docstring.ts | 406 +++++++++++++++++--
 src/extract/fallback.ts  | 206 +++++++++-
 src/extract/index.ts     | 348 +++++++++++++++-
 src/extract/queries.ts   | 724 ++++++++++++++++++++++++++++++++-
 src/extract/signature.ts | 362 +++++++++++++++--
 test/extract.test.ts     | 838 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 2778 insertions(+), 106 deletions(-)
 create mode 100644 test/extract.test.ts
diff --git a/src/extract/docstring.ts b/src/extract/docstring.ts
index e9b8dfb..1df1f59 100644
--- a/src/extract/docstring.ts
+++ b/src/extract/docstring.ts
@@ -14,26 +14,9 @@ export const COMMENT_NODE_TYPES: Record<Language, readonly string[]> = {
 }
 
 /**
- * Extract the docstring/documentation comment for an entity
- *
- * @param node - The AST node representing the entity
- * @param language - The programming language
- * @param code - The source code
- * @returns Effect yielding the docstring, or null if none found
- *
- * TODO: Implement docstring extraction
+ * Python docstring node types (triple-quoted strings)
  */
-export const extractDocstring = (
-	_node: SyntaxNode,
-	_language: Language,
-	_code: string,
-): Effect.Effect<string | null, never> => {
-	// TODO: Implement docstring extraction
-	// 1. Look for comment nodes immediately preceding the entity
-	// 2. For Python, also check for string literal as first child
-	// 3. Parse and clean up the comment format
-	return Effect.succeed(null)
-}
+const PYTHON_STRING_TYPES: readonly string[] = ['string', 'string_content']
 
 /**
  * Check if a comment is a documentation comment (JSDoc, docstring, etc.)
@@ -43,14 +26,381 @@ export const extractDocstring = (
  * @returns Whether the comment is a documentation comment
  */
 export const isDocComment = (
-	_commentText: string,
-	_language: Language,
+	commentText: string,
+	language: Language,
 ): boolean => {
-	// TODO: Implement doc comment detection
-	// - JSDoc: starts with /**
-	// - Python: triple quotes
-	// - Rust: starts with /// or //!
-	// - Go: starts with //
-	// - Java: starts with /**
-	return false
+	const trimmed = commentText.trim()
+
+	switch (language) {
+		case 'typescript':
+		case 'javascript':
+		case 'java':
+			// JSDoc/Javadoc: starts with /** (but not /***+)
+			return /^\/\*\*[^*]/.test(trimmed) || trimmed === '/**/'
+
+		case 'python':
+			// Python docstrings: triple quotes
+			return (
+				trimmed.startsWith('"""') ||
+				trimmed.startsWith("'''") ||
+				trimmed.startsWith('r"""') ||
+				trimmed.startsWith("r'''")
+			)
+
+		case 'rust':
+			// Rust doc comments: /// (outer) or //! (inner)
+			return trimmed.startsWith('///') || trimmed.startsWith('//!')
+
+		case 'go':
+			// Go: any // comment immediately before a declaration is considered doc
+			return trimmed.startsWith('//')
+
+		default:
+			return false
+	}
+}
+
+/**
+ * Parse and clean up a docstring, removing comment markers and normalizing whitespace
+ *
+ * @param text - The raw docstring text
+ * @param language - The programming language
+ * @returns The cleaned docstring text
+ */
+export const parseDocstring = (text: string, language: Language): string => {
+	switch (language) {
+		case 'typescript':
+		case 'javascript':
+		case 'java':
+			return parseJSDocStyle(text)
+
+		case 'python':
+			return parsePythonDocstring(text)
+
+		case 'rust':
+			return parseRustDocComment(text)
+
+		case 'go':
+			return parseGoComment(text)
+
+		default:
+			return text.trim()
+	}
+}
+
+/**
+ * Parse JSDoc/Javadoc style comments
+ * Handles: /** ... *\/
+ */
+function parseJSDocStyle(text: string): string {
+	let content = text.trim()
+
+	// Remove opening /** and closing */
+	if (content.startsWith('/**')) {
+		content = content.slice(3)
+	}
+	if (content.endsWith('*/')) {
+		content = content.slice(0, -2)
+	}
+
+	// Split into lines and process each
+	const lines = content.split('\n')
+	const processedLines = lines.map((line) => {
+		let processed = line.trim()
+		// Remove leading * from each line (common JSDoc style)
+		if (processed.startsWith('*')) {
+			processed = processed.slice(1)
+			// Remove one space after * if present
+			if (processed.startsWith(' ')) {
+				processed = processed.slice(1)
+			}
+		}
+		return processed
+	})
+
+	// Remove empty lines at start and end
+	while (processedLines.length > 0 && processedLines[0] === '') {
+		processedLines.shift()
+	}
+	while (
+		processedLines.length > 0 &&
+		processedLines[processedLines.length - 1] === ''
+	) {
+		processedLines.pop()
+	}
+
+	return processedLines.join('\n')
+}
+
+/**
+ * Parse Python docstrings (triple-quoted strings)
+ * Handles: ''' ... ''' and """ ... """
+ */
+function parsePythonDocstring(text: string): string {
+	let content = text.trim()
+
+	// Handle raw strings
+	if (content.startsWith('r"""') || content.startsWith("r'''")) {
+		content = content.slice(1)
+	}
+
+	// Remove opening and closing quotes
+	if (content.startsWith('"""')) {
+		content = content.slice(3)
+		if (content.endsWith('"""')) {
+			content = content.slice(0, -3)
+		}
+	} else if (content.startsWith("'''")) {
+		content = content.slice(3)
+		if (content.endsWith("'''")) {
+			content = content.slice(0, -3)
+		}
+	}
+
+	// Split into lines
+	const lines = content.split('\n')
+
+	// Find minimum indentation (excluding empty lines)
+	let minIndent = Number.POSITIVE_INFINITY
+	for (const line of lines) {
+		if (line.trim().length > 0) {
+			const leadingSpaces = line.match(/^(\s*)/)?.[1]?.length ?? 0
+			minIndent = Math.min(minIndent, leadingSpaces)
+		}
+	}
+
+	if (minIndent === Number.POSITIVE_INFINITY) {
+		minIndent = 0
+	}
+
+	// Remove common indentation
+	const dedentedLines = lines.map((line) => {
+		if (line.trim().length === 0) {
+			return ''
+		}
+		return line.slice(minIndent)
+	})
+
+	// Remove empty lines at start and end
+	while (dedentedLines.length > 0 && dedentedLines[0]?.trim() === '') {
+		dedentedLines.shift()
+	}
+	while (
+		dedentedLines.length > 0 &&
+		dedentedLines[dedentedLines.length - 1]?.trim() === ''
+	) {
+		dedentedLines.pop()
+	}
+
+	return dedentedLines.join('\n')
+}
+
+/**
+ * Parse Rust doc comments
+ * Handles: /// and //!
+ */
+function parseRustDocComment(text: string): string {
+	const lines = text.split('\n')
+	const processedLines: string[] = []
+
+	for (const line of lines) {
+		const trimmed = line.trim()
+		let content = trimmed
+
+		// Remove /// or //! prefix
+		if (trimmed.startsWith('///')) {
+			content = trimmed.slice(3)
+		} else if (trimmed.startsWith('//!')) {
+			content = trimmed.slice(3)
+		}
+
+		// Remove one leading space if present
+		if (content.startsWith(' ')) {
+			content = content.slice(1)
+		}
+
+		processedLines.push(content)
+	}
+
+	// Remove empty lines at start and end
+	while (processedLines.length > 0 && processedLines[0] === '') {
+		processedLines.shift()
+	}
+	while (
+		processedLines.length > 0 &&
+		processedLines[processedLines.length - 1] === ''
+	) {
+		processedLines.pop()
+	}
+
+	return processedLines.join('\n')
+}
+
+/**
+ * Parse Go comments
+ * Handles: // style comments
+ */
+function parseGoComment(text: string): string {
+	const lines = text.split('\n')
+	const processedLines: string[] = []
+
+	for (const line of lines) {
+		const trimmed = line.trim()
+		let content = trimmed
+
+		// Remove // prefix
+		if (trimmed.startsWith('//')) {
+			content = trimmed.slice(2)
+		}
+
+		// Remove one leading space if present
+		if (content.startsWith(' ')) {
+			content = content.slice(1)
+		}
+
+		processedLines.push(content)
+	}
+
+	// Remove empty lines at start and end
+	while (processedLines.length > 0 && processedLines[0] === '') {
+		processedLines.shift()
+	}
+	while (
+		processedLines.length > 0 &&
+		processedLines[processedLines.length - 1] === ''
+	) {
+		processedLines.pop()
+	}
+
+	return processedLines.join('\n')
+}
+
+/**
+ * Get the text content of a node
+ */
+function getNodeText(node: SyntaxNode, code: string): string {
+	return code.slice(node.startIndex, node.endIndex)
+}
+
+/**
+ * Find preceding comment nodes (handles consecutive comment lines)
+ */
+function findPrecedingComments(
+	node: SyntaxNode,
+	language: Language,
+	code: string,
+): string | null {
+	const commentTypes = COMMENT_NODE_TYPES[language]
+	const comments: string[] = []
+	let current = node.previousNamedSibling
+
+	// Walk backwards collecting consecutive comment nodes
+	while (current) {
+		const nodeType = current.type
+
+		if (commentTypes.includes(nodeType)) {
+			const text = getNodeText(current, code)
+
+			// For Python, only consider string literals that are docstrings (but they come after, not before)
+			// For Python comments that precede, they're not docstrings
+			if (language === 'python' && PYTHON_STRING_TYPES.includes(nodeType)) {
+				break
+			}
+
+			if (isDocComment(text, language)) {
+				comments.unshift(text) // Add to front since we're going backwards
+				current = current.previousNamedSibling
+			} else {
+				break
+			}
+		} else {
+			// Check if there's a comment between the current named sibling and our node
+			// by looking at the previous sibling (including non-named)
+			break
+		}
+	}
+
+	if (comments.length === 0) {
+		return null
+	}
+
+	// Combine consecutive comments (for Rust /// style)
+	const combinedText = comments.join('\n')
+	return parseDocstring(combinedText, language)
+}
+
+/**
+ * Find Python docstring (first string literal in function/class body)
+ */
+function findPythonDocstring(node: SyntaxNode, code: string): string | null {
+	// Look for a block/body child
+	const bodyNode =
+		node.childForFieldName('body') ??
+		node.namedChildren.find((c) => c.type === 'block')
+
+	if (!bodyNode) {
+		return null
+	}
+
+	// Get the first statement in the body
+	const firstChild = bodyNode.namedChildren[0]
+
+	if (!firstChild) {
+		return null
+	}
+
+	// Check if it's an expression statement containing a string
+	if (firstChild.type === 'expression_statement') {
+		const stringNode = firstChild.namedChildren[0]
+		if (stringNode && PYTHON_STRING_TYPES.includes(stringNode.type)) {
+			const text = getNodeText(stringNode, code)
+			if (isDocComment(text, 'python')) {
+				return parseDocstring(text, 'python')
+			}
+		}
+	}
+
+	// Direct string literal (shouldn't happen in valid Python, but handle it)
+	if (PYTHON_STRING_TYPES.includes(firstChild.type)) {
+		const text = getNodeText(firstChild, code)
+		if (isDocComment(text, 'python')) {
+			return parseDocstring(text, 'python')
+		}
+	}
+
+	return null
+}
+
+/**
+ * Extract the docstring/documentation comment for an entity
+ *
+ * @param node - The AST node representing the entity
+ * @param language - The programming language
+ * @param code - The source code
+ * @returns Effect yielding the docstring, or null if none found
+ *
+ * Handles:
+ * - JSDoc (/** ... *\/) for TypeScript/JavaScript
+ * - Python docstrings (triple-quoted string as first statement in body)
+ * - Rust doc comments (/// and //!)
+ * - Go comments (// before declaration)
+ * - Java Javadoc (/** ... *\/)
+ */
+export const extractDocstring = (
+	node: SyntaxNode,
+	language: Language,
+	code: string,
+): Effect.Effect<string | null, never> => {
+	return Effect.sync(() => {
+		// For Python, first check for docstring inside the body
+		if (language === 'python') {
+			const docstring = findPythonDocstring(node, code)
+			if (docstring) {
+				return docstring
+			}
+		}
+
+		// Look for preceding comments
+		return findPrecedingComments(node, language, code)
+	})
 }
diff --git a/src/extract/fallback.ts b/src/extract/fallback.ts
index a5bc614..7aa8263 100644
--- a/src/extract/fallback.ts
+++ b/src/extract/fallback.ts
@@ -1,5 +1,12 @@
 import { Effect } from 'effect'
-import type { ExtractedEntity, Language, SyntaxNode } from '../types'
+import type {
+	EntityType,
+	ExtractedEntity,
+	Language,
+	SyntaxNode,
+} from '../types'
+import { extractDocstring } from './docstring'
+import { extractName, extractSignature } from './signature'
 
 /**
  * Node types that represent extractable entities by language
@@ -53,25 +60,50 @@ export const ENTITY_NODE_TYPES: Record<Language, readonly string[]> = {
 }
 
 /**
- * Extract entities by matching node types (fallback when no query available)
- *
- * @param rootNode - The root node of the AST
- * @param language - The programming language
- * @param code - The source code
- * @returns Effect yielding extracted entities
- *
- * TODO: Implement node type based extraction
+ * Map node type to EntityType
  */
-export const extractByNodeTypes = (
-	_rootNode: SyntaxNode,
-	_language: Language,
-	_code: string,
-): Effect.Effect<ExtractedEntity[], never> => {
-	// TODO: Implement fallback extraction
-	// 1. Get node types for language
-	// 2. Walk the tree
-	// 3. Extract entities for matching nodes
-	return Effect.succeed([])
+export const NODE_TYPE_TO_ENTITY_TYPE: Record<string, EntityType> = {
+	// Functions
+	function_declaration: 'function',
+	function_definition: 'function',
+	function_item: 'function',
+	generator_function_declaration: 'function',
+	arrow_function: 'function',
+
+	// Methods
+	method_definition: 'method',
+	method_declaration: 'method',
+
+	// Classes
+	class_declaration: 'class',
+	class_definition: 'class',
+	abstract_class_declaration: 'class',
+
+	// Interfaces
+	interface_declaration: 'interface',
+	trait_item: 'interface',
+
+	// Types
+	type_alias_declaration: 'type',
+	type_item: 'type',
+	type_declaration: 'type',
+	struct_item: 'type',
+
+	// Enums
+	enum_declaration: 'enum',
+	enum_item: 'enum',
+
+	// Imports
+	import_statement: 'import',
+	import_declaration: 'import',
+	import_from_statement: 'import',
+	use_declaration: 'import',
+
+	// Exports
+	export_statement: 'export',
+
+	// Impl blocks (Rust - treat as class-like)
+	impl_item: 'class',
 }
 
 /**
@@ -84,3 +116,139 @@ export const isEntityNodeType = (
 	const types = ENTITY_NODE_TYPES[language]
 	return types.includes(nodeType)
 }
+
+/**
+ * Get EntityType from node type string
+ */
+export const getEntityType = (nodeType: string): EntityType | null => {
+	return NODE_TYPE_TO_ENTITY_TYPE[nodeType] ?? null
+}
+
+/**
+ * Item in the traversal stack for iterative tree walking
+ */
+interface StackItem {
+	node: SyntaxNode
+	parentName: string | null
+}
+
+/**
+ * Walk the AST iteratively and extract entities by matching node types
+ * Uses an explicit stack to avoid stack overflow on deeply nested ASTs
+ */
+function walkAndExtract(
+	rootNode: SyntaxNode,
+	language: Language,
+	code: string,
+	entities: ExtractedEntity[],
+	entityNodes: Set<number>,
+): Effect.Effect<void, never> {
+	return Effect.gen(function* () {
+		// Use explicit stack for depth-first traversal
+		const stack: StackItem[] = [{ node: rootNode, parentName: null }]
+
+		while (stack.length > 0) {
+			const current = stack.pop()
+			if (!current) continue
+			const { node, parentName } = current
+
+			// Check if this node is an entity type
+			if (isEntityNodeType(node.type, language)) {
+				// Skip if we've already processed this node
+				if (entityNodes.has(node.id)) {
+					continue
+				}
+				entityNodes.add(node.id)
+
+				const entityType = getEntityType(node.type)
+				if (entityType) {
+					// Extract name
+					const name = extractName(node, language) ?? '<anonymous>'
+
+					// Extract signature
+					const signature = yield* extractSignature(
+						node,
+						entityType,
+						language,
+						code,
+					)
+
+					// Extract docstring
+					const docstring = yield* extractDocstring(node, language, code)
+
+					// Create entity
+					const entity: ExtractedEntity = {
+						type: entityType,
+						name,
+						signature: signature || name,
+						docstring,
+						byteRange: {
+							start: node.startIndex,
+							end: node.endIndex,
+						},
+						lineRange: {
+							start: node.startPosition.row,
+							end: node.endPosition.row,
+						},
+						parent: parentName,
+						node,
+					}
+
+					entities.push(entity)
+
+					// For nested entities, use this entity's name as parent
+					const newParentName =
+						entityType === 'class' ||
+						entityType === 'interface' ||
+						entityType === 'function' ||
+						entityType === 'method'
+							? name
+							: parentName
+
+					// Add children to stack (in reverse order for correct DFS order)
+					const children = node.namedChildren
+					for (let i = children.length - 1; i >= 0; i--) {
+						const child = children[i]
+						if (child) {
+							stack.push({ node: child, parentName: newParentName })
+						}
+					}
+				}
+			} else {
+				// Not an entity node, but might contain entity nodes
+				// Add children to stack (in reverse order for correct DFS order)
+				const children = node.namedChildren
+				for (let i = children.length - 1; i >= 0; i--) {
+					const child = children[i]
+					if (child) {
+						stack.push({ node: child, parentName })
+					}
+				}
+			}
+		}
+	})
+}
+
+/**
+ * Extract entities by matching node types (fallback when no query available)
+ *
+ * @param rootNode - The root node of the AST
+ * @param language - The programming language
+ * @param code - The source code
+ * @returns Effect yielding extracted entities
+ */
+export const extractByNodeTypes = (
+	rootNode: SyntaxNode,
+	language: Language,
+	code: string,
+): Effect.Effect<ExtractedEntity[], never> => {
+	return Effect.gen(function* () {
+		const entities: ExtractedEntity[] = []
+		const entityNodes = new Set<number>()
+
+		// Walk the tree starting from root
+		yield* walkAndExtract(rootNode, language, code, entities, entityNodes)
+
+		return entities
+	})
+}
diff --git a/src/extract/index.ts b/src/extract/index.ts
index ee6ad85..31eea9a 100644
--- a/src/extract/index.ts
+++ b/src/extract/index.ts
@@ -1,5 +1,18 @@
 import { Effect } from 'effect'
-import type { ExtractedEntity, Language, SyntaxNode } from '../types'
+import type {
+	EntityType,
+	ExtractedEntity,
+	Language,
+	SyntaxNode,
+} from '../types'
+import { extractDocstring } from './docstring'
+import {
+	ENTITY_NODE_TYPES,
+	extractByNodeTypes,
+	getEntityType,
+} from './fallback'
+import { type CompiledQuery, loadQuery, loadQuerySync } from './queries'
+import { extractName, extractSignature } from './signature'
 
 /**
  * Error when entity extraction fails
@@ -12,37 +25,338 @@ export class ExtractError {
 	) {}
 }
 
+/**
+ * Interface for query match captures (compatible with future queries.ts implementation)
+ */
+interface QueryCapture {
+	name: string
+	node: SyntaxNode
+	patternIndex: number
+}
+
+/**
+ * Interface for query matches (compatible with future queries.ts implementation)
+ */
+interface QueryMatch {
+	patternIndex: number
+	captures: QueryCapture[]
+}
+
+/**
+ * Extract the entity node and name node from a query match
+ * This will be provided by queries.ts when merged, but we define it here for now
+ */
+function extractEntityFromMatch(match: QueryMatch): {
+	itemNode: SyntaxNode
+	nameNode: SyntaxNode | null
+	contextNodes: SyntaxNode[]
+	annotationNodes: SyntaxNode[]
+} | null {
+	const itemCapture = match.captures.find((c) => c.name === 'item')
+	if (!itemCapture) {
+		return null
+	}
+
+	const nameCapture = match.captures.find((c) => c.name === 'name')
+	const contextCaptures = match.captures.filter((c) => c.name === 'context')
+	const annotationCaptures = match.captures.filter(
+		(c) => c.name === 'annotation',
+	)
+
+	return {
+		itemNode: itemCapture.node,
+		nameNode: nameCapture?.node ?? null,
+		contextNodes: contextCaptures.map((c) => c.node),
+		annotationNodes: annotationCaptures.map((c) => c.node),
+	}
+}
+
+/**
+ * Execute a query against a tree (compatible interface)
+ * This will be provided by queries.ts when merged
+ */
+function executeQueryOnTree(
+	query: CompiledQuery,
+	rootNode: SyntaxNode,
+): { matches: QueryMatch[] } | null {
+	// Check if query has a matches method (compiled web-tree-sitter Query)
+	if (
+		query &&
+		typeof query === 'object' &&
+		'matches' in query &&
+		typeof (query as { matches: unknown }).matches === 'function'
+	) {
+		try {
+			const matches = (
+				query as { matches: (node: SyntaxNode) => unknown[] }
+			).matches(rootNode)
+			const queryMatches: QueryMatch[] = matches.map((match: unknown) => {
+				const m = match as {
+					patternIndex: number
+					captures: { name: string; node: SyntaxNode }[]
+				}
+				return {
+					patternIndex: m.patternIndex,
+					captures: m.captures.map((capture) => ({
+						name: capture.name,
+						node: capture.node,
+						patternIndex: m.patternIndex,
+					})),
+				}
+			})
+			return { matches: queryMatches }
+		} catch {
+			return null
+		}
+	}
+	return null
+}
+
+/**
+ * Convert query matches to extracted entities
+ */
+function matchesToEntities(
+	matches: QueryMatch[],
+	language: Language,
+	code: string,
+	rootNode: SyntaxNode,
+): Effect.Effect<ExtractedEntity[], never> {
+	return Effect.gen(function* () {
+		const entities: ExtractedEntity[] = []
+		const processedNodes = new Set<number>()
+
+		for (const match of matches) {
+			const extracted = extractEntityFromMatch(match)
+			if (!extracted) {
+				continue
+			}
+
+			const { itemNode, nameNode } = extracted
+
+			// Skip if already processed
+			if (processedNodes.has(itemNode.id)) {
+				continue
+			}
+			processedNodes.add(itemNode.id)
+
+			// Get entity type from node type
+			let entityType = getEntityType(itemNode.type)
+			if (!entityType) {
+				// Fallback: try to infer from node type pattern
+				entityType = inferEntityType(itemNode.type)
+				if (!entityType) {
+					continue
+				}
+			}
+
+			// Extract name - prefer name node from query, fallback to extraction
+			const name = nameNode
+				? nameNode.text
+				: (extractName(itemNode, language) ?? '<anonymous>')
+
+			// Extract signature
+			const signature = yield* extractSignature(
+				itemNode,
+				entityType,
+				language,
+				code,
+			)
+
+			// Extract docstring
+			const docstring = yield* extractDocstring(itemNode, language, code)
+
+			// Find parent entity
+			const parent = findParentEntityName(itemNode, rootNode, language)
+
+			const entity: ExtractedEntity = {
+				type: entityType,
+				name,
+				signature: signature || name,
+				docstring,
+				byteRange: {
+					start: itemNode.startIndex,
+					end: itemNode.endIndex,
+				},
+				lineRange: {
+					start: itemNode.startPosition.row,
+					end: itemNode.endPosition.row,
+				},
+				parent,
+				node: itemNode,
+			}
+
+			entities.push(entity)
+		}
+
+		return entities
+	})
+}
+
+/**
+ * Infer entity type from node type string for cases not covered by the map
+ */
+function inferEntityType(nodeType: string): EntityType | null {
+	const lowerType = nodeType.toLowerCase()
+
+	if (lowerType.includes('function') || lowerType.includes('arrow')) {
+		return 'function'
+	}
+	if (lowerType.includes('method')) {
+		return 'method'
+	}
+	if (lowerType.includes('class')) {
+		return 'class'
+	}
+	if (lowerType.includes('interface') || lowerType.includes('trait')) {
+		return 'interface'
+	}
+	if (lowerType.includes('type') || lowerType.includes('struct')) {
+		return 'type'
+	}
+	if (lowerType.includes('enum')) {
+		return 'enum'
+	}
+	if (lowerType.includes('import') || lowerType.includes('use')) {
+		return 'import'
+	}
+	if (lowerType.includes('export')) {
+		return 'export'
+	}
+
+	return null
+}
+
+/**
+ * Find the name of the parent entity (if any) by walking up the AST
+ */
+function findParentEntityName(
+	node: SyntaxNode,
+	rootNode: SyntaxNode,
+	language: Language,
+): string | null {
+	const entityTypes = ENTITY_NODE_TYPES[language]
+	let current = node.parent
+
+	while (current && current.id !== rootNode.id) {
+		if (entityTypes.includes(current.type)) {
+			// This is a parent entity
+			const name = extractName(current, language)
+			if (name) {
+				return name
+			}
+		}
+		current = current.parent
+	}
+
+	return null
+}
+
 /**
  * Extract entities from an AST tree
  *
+ * Uses tree-sitter queries when available, falling back to node type matching.
+ *
  * @param rootNode - The root node of the AST
  * @param language - The programming language
  * @param code - The source code (for extracting text)
  * @returns Effect yielding extracted entities
- *
- * TODO: Implement entity extraction using tree-sitter queries
  */
 export const extractEntities = (
-	_rootNode: SyntaxNode,
-	_language: Language,
-	_code: string,
+	rootNode: SyntaxNode,
+	language: Language,
+	code: string,
 ): Effect.Effect<ExtractedEntity[], ExtractError> => {
-	// TODO: Implement entity extraction
-	// 1. Load appropriate query for language
-	// 2. Run query on AST
-	// 3. Extract entities from matches
-	// 4. Fall back to node type matching if no query
-	return Effect.succeed([])
+	return Effect.gen(function* () {
+		// Try to load query for this language
+		const queryResult = yield* Effect.either(loadQuery(language))
+
+		if (queryResult._tag === 'Right' && queryResult.right !== null) {
+			// Query loaded successfully - execute it
+			const query = queryResult.right
+
+			const result = executeQueryOnTree(query, rootNode)
+
+			if (result) {
+				// Convert matches to entities
+				const entities = yield* matchesToEntities(
+					result.matches,
+					language,
+					code,
+					rootNode,
+				)
+				return entities
+			}
+		}
+
+		// No query available or query loading failed - use fallback extraction
+		const entities = yield* extractByNodeTypes(rootNode, language, code)
+		return entities
+	}).pipe(
+		Effect.catchAll((error: unknown) =>
+			Effect.fail(
+				new ExtractError(
+					`Entity extraction failed: ${error instanceof Error ? error.message : String(error)}`,
+					error,
+				),
+			),
+		),
+	)
 }
 
 /**
  * Sync version of extractEntities for public API
+ *
+ * Note: This function will use query-based extraction if the query is already cached,
+ * otherwise it falls back to node type matching. For guaranteed query-based extraction,
+ * use extractEntitiesAsync() instead.
+ *
+ * @param rootNode - The root node of the AST
+ * @param language - The programming language
+ * @param code - The source code
+ * @returns Array of extracted entities
  */
 export const extractEntitiesSync = (
-	_rootNode: SyntaxNode,
-	_language: Language,
-	_code: string,
+	rootNode: SyntaxNode,
+	language: Language,
+	code: string,
 ): ExtractedEntity[] => {
-	// TODO: Implement sync wrapper
-	return []
+	// Try to use cached query if available (loadQuerySync returns cached query or null)
+	const cachedQuery = loadQuerySync(language)
+
+	if (cachedQuery) {
+		// Query is cached - use it
+		const result = executeQueryOnTree(cachedQuery, rootNode)
+		if (result) {
+			const effect = matchesToEntities(result.matches, language, code, rootNode)
+			return Effect.runSync(effect)
+		}
+	}
+
+	// No cached query - use fallback extraction
+	const effect = extractByNodeTypes(rootNode, language, code)
+	return Effect.runSync(effect)
 }
+
+/**
+ * Extract entities async (for when query loading might be needed)
+ */
+export const extractEntitiesAsync = async (
+	rootNode: SyntaxNode,
+	language: Language,
+	code: string,
+): Promise<ExtractedEntity[]> => {
+	return Effect.runPromise(extractEntities(rootNode, language, code))
+}
+
+// Re-export useful types and functions
+export type { EntityType, ExtractedEntity } from '../types'
+export { extractDocstring, isDocComment } from './docstring'
+export {
+	ENTITY_NODE_TYPES,
+	extractByNodeTypes,
+	getEntityType,
+	NODE_TYPE_TO_ENTITY_TYPE,
+} from './fallback'
+export type { CompiledQuery, QueryLoadError } from './queries'
+export { clearQueryCache, loadQuery, loadQuerySync } from './queries'
+export { extractName, extractSignature } from './signature'
diff --git a/src/extract/queries.ts b/src/extract/queries.ts
index 530c1ef..c1dcf4c 100644
--- a/src/extract/queries.ts
+++ b/src/extract/queries.ts
@@ -1,5 +1,12 @@
 import { Effect } from 'effect'
-import type { Language } from '../types'
+import {
+	Query,
+	type Language as TSLanguage,
+	type QueryCapture as TSQueryCapture,
+	type QueryMatch as TSQueryMatch,
+} from 'web-tree-sitter'
+import { type GrammarLoadError, getLanguageGrammar } from '../parser/languages'
+import type { Language, SyntaxNode, SyntaxTree } from '../types'
 
 /**
  * Error when loading a tree-sitter query fails
@@ -13,34 +20,717 @@ export class QueryLoadError {
 	) {}
 }
 
+/**
+ * Error when executing a query fails
+ */
+export class QueryExecutionError {
+	readonly _tag = 'QueryExecutionError'
+	constructor(
+		readonly message: string,
+		readonly cause?: unknown,
+	) {}
+}
+
 /**
  * A compiled tree-sitter query
- * TODO: Use actual tree-sitter Query type when implementing
  */
-export type CompiledQuery = unknown
+export type CompiledQuery = Query
+
+/**
+ * A single capture from a query match
+ */
+export interface QueryCapture {
+	/** The capture name (e.g., "name", "item", "context") */
+	name: string
+	/** The captured AST node */
+	node: SyntaxNode
+	/** Pattern index this capture belongs to */
+	patternIndex: number
+}
+
+/**
+ * A complete match from a query, containing all captures from one pattern
+ */
+export interface QueryMatch {
+	/** Pattern index that matched */
+	patternIndex: number
+	/** All captures from this match */
+	captures: QueryCapture[]
+}
+
+/**
+ * Result of executing a query
+ */
+export interface QueryResult {
+	/** All matches from the query */
+	matches: QueryMatch[]
+	/** All captures from the query (flat list) */
+	captures: QueryCapture[]
+}
+
+// =============================================================================
+// Embedded Query Strings
+// These are embedded at build time for portability - no filesystem access needed
+// =============================================================================
+
+const TYPESCRIPT_QUERY = `; TypeScript Entity Extraction Queries
+; Adapted from Zed editor's outline.scm
+; Uses @name for entity names, @item for full entity node, @context for signature context
+
+; Namespaces/Modules
+(internal_module
+    "namespace" @context
+    name: (_) @name) @item
+
+; Enums
+(enum_declaration
+    "enum" @context
+    name: (_) @name) @item
+
+; Type Aliases
+(type_alias_declaration
+    "type" @context
+    name: (_) @name) @item
+
+; Functions
+(function_declaration
+    "async"? @context
+    "function" @context
+    name: (_) @name
+    parameters: (formal_parameters
+      "(" @context
+      ")" @context)) @item
+
+; Generator Functions
+(generator_function_declaration
+    "async"? @context
+    "function" @context
+    "*" @context
+    name: (_) @name
+    parameters: (formal_parameters
+      "(" @context
+      ")" @context)) @item
+
+; Interfaces
+(interface_declaration
+    "interface" @context
+    name: (_) @name) @item
+
+; Exported variable declarations
+(export_statement
+    (lexical_declaration
+        ["let" "const"] @context
+        (variable_declarator
+            name: (identifier) @name) @item))
+
+; Top-level variable declarations
+(program
+    (lexical_declaration
+        ["let" "const"] @context
+        (variable_declarator
+            name: (identifier) @name) @item))
+
+; Classes
+(class_declaration
+    "class" @context
+    name: (_) @name) @item
+
+; Abstract Classes
+(abstract_class_declaration
+    "abstract" @context
+    "class" @context
+    name: (_) @name) @item
+
+; Method definitions in classes
+(class_body
+    (method_definition
+        [
+            "get"
+            "set"
+            "async"
+            "*"
+            "readonly"
+            "static"
+            (override_modifier)
+            (accessibility_modifier)
+        ]* @context
+        name: (_) @name
+        parameters: (formal_parameters
+          "(" @context
+          ")" @context)) @item)
+
+; Public field definitions
+(public_field_definition
+    [
+        "declare"
+        "readonly"
+        "abstract"
+        "static"
+        (accessibility_modifier)
+    ]* @context
+    name: (_) @name) @item
+
+; Arrow functions assigned to variables (exported)
+(export_statement
+    (lexical_declaration
+        ["let" "const"] @context
+        (variable_declarator
+            name: (identifier) @name
+            value: (arrow_function)) @item))
+
+; Arrow functions assigned to variables (top-level)
+(program
+    (lexical_declaration
+        ["let" "const"] @context
+        (variable_declarator
+            name: (identifier) @name
+            value: (arrow_function)) @item))
+
+; Import declarations
+(import_statement) @item
+
+; Export declarations (re-exports)
+(export_statement
+    (export_clause)) @item
+`
+
+const JAVASCRIPT_QUERY = `; JavaScript Entity Extraction Queries
+; Adapted from Zed editor's outline.scm
+; Uses @name for entity names, @item for full entity node, @context for signature context
+
+; Functions
+(function_declaration
+    name: (identifier) @name) @item
+
+; Generator Functions
+(generator_function_declaration
+    name: (identifier) @name) @item
+
+; Classes
+(class_declaration
+    name: (identifier) @name) @item
+
+; Method definitions in classes
+(class_body
+    (method_definition
+        name: (property_identifier) @name) @item)
+
+; Top-level variable declarations
+(program
+    (lexical_declaration
+        (variable_declarator
+            name: (identifier) @name) @item))
+
+; Arrow functions assigned to variables (top-level)
+(program
+    (lexical_declaration
+        (variable_declarator
+            name: (identifier) @name
+            value: (arrow_function)) @item))
+
+; Import declarations
+(import_statement) @item
+
+; Export declarations
+(export_statement) @item
+`
+
+const PYTHON_QUERY = `; Python Entity Extraction Queries
+; Adapted from Zed editor's outline.scm
+; Uses @name for entity names, @item for full entity node, @context for signature context
+
+; Decorators (captured for context)
+(decorator) @annotation
+
+; Classes
+(class_definition
+    name: (identifier) @name) @item
+
+; Functions (including async)
+(function_definition
+    name: (identifier) @name) @item
+
+; Import statements
+(import_statement) @item
+
+; Import from statements
+(import_from_statement) @item
+`
+
+const RUST_QUERY = `; Rust Entity Extraction Queries
+; Uses @name for entity names, @item for full entity node
+
+; Structs
+(struct_item
+    name: (type_identifier) @name) @item
+
+; Enums
+(enum_item
+    name: (type_identifier) @name) @item
+
+; Traits
+(trait_item
+    name: (type_identifier) @name) @item
+
+; Impl blocks
+(impl_item) @item
+
+; Functions
+(function_item
+    name: (identifier) @name) @item
+
+; Modules
+(mod_item
+    name: (identifier) @name) @item
+
+; Type aliases
+(type_item
+    name: (type_identifier) @name) @item
+
+; Constants
+(const_item
+    name: (identifier) @name) @item
+
+; Use statements (imports)
+(use_declaration) @item
+`
+
+const GO_QUERY = `; Go Entity Extraction Queries
+; Adapted from Zed editor's outline.scm
+; Uses @name for entity names, @item for full entity node, @context for signature context
+
+; Comments (for doc extraction)
+(comment) @annotation
+
+; Type declarations
+(type_declaration
+    "type" @context
+    [
+        (type_spec
+            name: (_) @name) @item
+        (
+            "("
+            (type_spec
+                name: (_) @name) @item
+            ")"
+        )
+    ]
+)
+
+; Functions
+(function_declaration
+    "func" @context
+    name: (identifier) @name
+    parameters: (parameter_list
+      "("
+      ")")) @item
+
+; Methods
+(method_declaration
+    "func" @context
+    receiver: (parameter_list
+        "(" @context
+        (parameter_declaration
+            name: (_) @context
+            type: (_) @context)
+        ")" @context)
+    name: (field_identifier) @name
+    parameters: (parameter_list
+      "("
+      ")")) @item
+
+; Constants
+(const_declaration
+    "const" @context
+    (const_spec
+        name: (identifier) @name) @item)
+
+; Top-level variables
+(source_file
+    (var_declaration
+        "var" @context
+        [
+            (var_spec
+                name: (identifier) @name @item)
+            (var_spec_list
+                (var_spec
+                    name: (identifier) @name @item)
+            )
+        ]
+    )
+)
+
+; Interface methods
+(method_elem
+    name: (_) @name
+    parameters: (parameter_list
+      "(" @context
+      ")" @context)) @item
+
+; Struct fields
+(field_declaration
+    name: (_) @name @item)
+
+; Import declarations
+(import_declaration) @item
+
+; Package declaration
+(package_clause
+    "package" @context
+    (package_identifier) @name) @item
+`
+
+const JAVA_QUERY = `; Java Entity Extraction Queries
+; Adapted from nvim-treesitter's locals.scm
+; Uses @name for entity names, @item for full entity node, @context for signature context
+
+; Package declaration
+(package_declaration
+    "package" @context
+    (scoped_identifier) @name) @item
+
+; Import declarations
+(import_declaration) @item
+
+; Classes
+(class_declaration
+    (modifiers)? @context
+    "class" @context
+    name: (identifier) @name) @item
+
+; Interfaces
+(interface_declaration
+    (modifiers)? @context
+    "interface" @context
+    name: (identifier) @name) @item
+
+; Records (Java 14+)
+(record_declaration
+    (modifiers)? @context
+    "record" @context
+    name: (identifier) @name) @item
+
+; Enums
+(enum_declaration
+    (modifiers)? @context
+    "enum" @context
+    name: (identifier) @name) @item
+
+; Enum constants
+(enum_constant
+    name: (identifier) @name) @item
+
+; Annotation types
+(annotation_type_declaration
+    (modifiers)? @context
+    "@interface" @context
+    name: (identifier) @name) @item
+
+; Methods
+(method_declaration
+    (modifiers)? @context
+    type: (_) @context
+    name: (identifier) @name
+    parameters: (formal_parameters
+        "(" @context
+        ")" @context)) @item
+
+; Constructors
+(constructor_declaration
+    (modifiers)? @context
+    name: (identifier) @name
+    parameters: (formal_parameters
+        "(" @context
+        ")" @context)) @item
+
+; Fields
+(field_declaration
+    (modifiers)? @context
+    type: (_) @context
+    declarator: (variable_declarator
+        name: (identifier) @name)) @item
+
+; Static initializer blocks
+(static_initializer
+    "static" @context) @item
+
+; Annotation members (methods in annotations)
+(annotation_type_element_declaration
+    type: (_) @context
+    name: (identifier) @name) @item
+
+; Inner classes
+(class_body
+    (class_declaration
+        (modifiers)? @context
+        "class" @context
+        name: (identifier) @name) @item)
+
+; Inner interfaces
+(class_body
+    (interface_declaration
+        (modifiers)? @context
+        "interface" @context
+        name: (identifier) @name) @item)
+
+; Inner enums
+(class_body
+    (enum_declaration
+        (modifiers)? @context
+        "enum" @context
+        name: (identifier) @name) @item)
+`
+
+/**
+ * Query patterns by language - embedded as strings for portability
+ */
+export const QUERY_PATTERNS: Record<Language, string> = {
+	typescript: TYPESCRIPT_QUERY,
+	javascript: JAVASCRIPT_QUERY,
+	python: PYTHON_QUERY,
+	rust: RUST_QUERY,
+	go: GO_QUERY,
+	java: JAVA_QUERY,
+}
+
+// =============================================================================
+// Query Loading & Caching
+// =============================================================================
+
+/**
+ * Cache for compiled queries by language
+ */
+const queryCache: Map<Language, CompiledQuery> = new Map()
+
+/**
+ * Compile a query string for a specific language
+ *
+ * @param language - The programming language
+ * @param tsLanguage - The loaded tree-sitter language grammar
+ * @param queryString - The query pattern string
+ * @returns The compiled Query
+ */
+function compileQuery(
+	language: Language,
+	tsLanguage: TSLanguage,
+	queryString: string,
+): Effect.Effect<CompiledQuery, QueryLoadError> {
+	return Effect.try({
+		try: () => new Query(tsLanguage, queryString),
+		catch: (error: unknown) =>
+			new QueryLoadError(
+				language,
+				`Failed to compile query: ${error instanceof Error ? error.message : String(error)}`,
+				error,
+			),
+	})
+}
 
 /**
  * Load a tree-sitter query for entity extraction
  *
- * @param language - The programming language to load query for
- * @returns Effect yielding the compiled query, or null if no query exists
+ * Loads and compiles the query for the given language. Queries are cached
+ * after first compilation.
  *
- * TODO: Implement query loading from .scm files
+ * @param language - The programming language to load query for
+ * @returns Effect yielding the compiled query, or null if no query exists for the language
  */
 export const loadQuery = (
-	_language: Language,
-): Effect.Effect<CompiledQuery | null, QueryLoadError> => {
-	// TODO: Implement query loading
-	// 1. Look up query file path for language
-	// 2. Load .scm file contents
-	// 3. Compile query using tree-sitter
-	return Effect.succeed(null)
+	language: Language,
+): Effect.Effect<CompiledQuery | null, QueryLoadError | GrammarLoadError> => {
+	return Effect.gen(function* () {
+		// Check cache first
+		const cached = queryCache.get(language)
+		if (cached) {
+			return cached
+		}
+
+		// Get the query pattern for this language
+		const queryPattern = QUERY_PATTERNS[language]
+		if (!queryPattern) {
+			return null
+		}
+
+		// Load the language grammar
+		const tsLanguage = yield* getLanguageGrammar(language)
+
+		// Compile the query
+		const query = yield* compileQuery(language, tsLanguage, queryPattern)
+
+		// Cache for future use
+		queryCache.set(language, query)
+
+		return query
+	})
+}
+
+/**
+ * Load a query (public async API)
+ *
+ * @param language - The language to load the query for
+ * @returns Promise resolving to the compiled query, or null if no query exists
+ */
+export async function loadQueryAsync(
+	language: Language,
+): Promise<CompiledQuery | null> {
+	return Effect.runPromise(loadQuery(language))
+}
+
+/**
+ * Clear the query cache (useful for testing)
+ */
+export function clearQueryCache(): void {
+	queryCache.clear()
 }
 
 /**
- * Query patterns by language
- * TODO: Populate with actual query patterns
+ * Synchronously load a cached query
+ *
+ * This only returns a query if it's already been compiled and cached.
+ * Use this for sync code paths where you can't await query loading.
+ *
+ * @param language - The language to get the cached query for
+ * @returns The cached query, or null if not cached
+ */
+export function loadQuerySync(language: Language): CompiledQuery | null {
+	return queryCache.get(language) ?? null
+}
+
+// =============================================================================
+// Query Execution
+// =============================================================================
+
+/**
+ * Execute a query against a syntax tree
+ *
+ * @param query - The compiled query to execute
+ * @param tree - The syntax tree to query
+ * @param startNode - Optional node to start querying from (defaults to root)
+ * @returns Effect yielding the query result with matches and captures
+ */
+export const executeQuery = (
+	query: CompiledQuery,
+	tree: SyntaxTree,
+	startNode?: SyntaxNode,
+): Effect.Effect<QueryResult, QueryExecutionError> => {
+	return Effect.try({
+		try: () => {
+			const node = startNode ?? tree.rootNode
+
+			// Execute the query and get all matches
+			const matches = query.matches(node)
+
+			// Convert to our QueryMatch format
+			const queryMatches: QueryMatch[] = matches.map((match: TSQueryMatch) => ({
+				patternIndex: match.patternIndex,
+				captures: match.captures.map((capture: TSQueryCapture) => ({
+					name: capture.name,
+					node: capture.node,
+					patternIndex: match.patternIndex,
+				})),
+			}))
+
+			// Also collect all captures as a flat list
+			const allCaptures: QueryCapture[] = queryMatches.flatMap(
+				(match) => match.captures,
+			)
+
+			return {
+				matches: queryMatches,
+				captures: allCaptures,
+			}
+		},
+		catch: (error: unknown) =>
+			new QueryExecutionError(
+				`Query execution failed: ${error instanceof Error ? error.message : String(error)}`,
+				error,
+			),
+	})
+}
+
+/**
+ * Execute a query and get captures (public async API)
+ *
+ * @param query - The compiled query to execute
+ * @param tree - The syntax tree to query
+ * @param startNode - Optional node to start querying from
+ * @returns Promise resolving to the query result
+ */
+export async function executeQueryAsync(
+	query: CompiledQuery,
+	tree: SyntaxTree,
+	startNode?: SyntaxNode,
+): Promise<QueryResult> {
+	return Effect.runPromise(executeQuery(query, tree, startNode))
+}
+
+// =============================================================================
+// Utility Functions
+// =============================================================================
+
+/**
+ * Get all captures with a specific name from a query result
+ *
+ * @param result - The query result
+ * @param captureName - The capture name to filter by (e.g., "name", "item")
+ * @returns Array of captures matching the name
+ */
+export function getCapturesByName(
+	result: QueryResult,
+	captureName: string,
+): QueryCapture[] {
+	return result.captures.filter((capture) => capture.name === captureName)
+}
+
+/**
+ * Get all matches that have an "item" capture (entity nodes)
+ *
+ * @param result - The query result
+ * @returns Array of matches that contain entity items
+ */
+export function getEntityMatches(result: QueryResult): QueryMatch[] {
+	return result.matches.filter((match) =>
+		match.captures.some((capture) => capture.name === 'item'),
+	)
+}
+
+/**
+ * Extract the entity node and name node from a match
+ *
+ * @param match - A query match
+ * @returns Object with item and name nodes, or null if not found
+ */
+export function extractEntityFromMatch(match: QueryMatch): {
+	itemNode: SyntaxNode
+	nameNode: SyntaxNode | null
+	contextNodes: SyntaxNode[]
+	annotationNodes: SyntaxNode[]
+} | null {
+	const itemCapture = match.captures.find((c) => c.name === 'item')
+	if (!itemCapture) {
+		return null
+	}
+
+	const nameCapture = match.captures.find((c) => c.name === 'name')
+	const contextCaptures = match.captures.filter((c) => c.name === 'context')
+	const annotationCaptures = match.captures.filter(
+		(c) => c.name === 'annotation',
+	)
+
+	return {
+		itemNode: itemCapture.node,
+		nameNode: nameCapture?.node ?? null,
+		contextNodes: contextCaptures.map((c) => c.node),
+		annotationNodes: annotationCaptures.map((c) => c.node),
+	}
+}
+
+/**
+ * Check if a language has a query available
+ *
+ * @param language - The language to check
+ * @returns True if a query is available for the language
  */
-export const QUERY_PATTERNS: Partial<Record<Language, string>> = {
-	// TODO: Add query patterns for each language
+export function hasQueryForLanguage(language: Language): boolean {
+	return language in QUERY_PATTERNS
 }
diff --git a/src/extract/signature.ts b/src/extract/signature.ts
index 612ab0d..60cfb6b 100644
--- a/src/extract/signature.ts
+++ b/src/extract/signature.ts
@@ -1,6 +1,306 @@
 import { Effect } from 'effect'
 import type { EntityType, Language, SyntaxNode } from '../types'
 
+/**
+ * Body delimiters by language - the character that marks the start of the body
+ */
+export const BODY_DELIMITERS: Record<Language, string> = {
+	typescript: '{',
+	javascript: '{',
+	python: ':',
+	rust: '{',
+	go: '{',
+	java: '{',
+}
+
+/**
+ * Node types that represent identifiers/names by language
+ * Order matters - first match wins
+ */
+const NAME_NODE_TYPES: readonly string[] = [
+	'name',
+	'identifier',
+	'type_identifier',
+	'property_identifier',
+]
+
+/**
+ * Extract the name of an entity from its AST node
+ *
+ * @param node - The AST node representing the entity
+ * @param _language - The programming language (unused but kept for consistency)
+ * @returns The entity name, or null if not found
+ */
+export const extractName = (
+	node: SyntaxNode,
+	_language: Language,
+): string | null => {
+	// Try to find a named child that is an identifier
+	for (const nameType of NAME_NODE_TYPES) {
+		const nameNode = node.childForFieldName(nameType)
+		if (nameNode) {
+			return nameNode.text
+		}
+	}
+
+	// Try to find any child with a name-like type
+	for (const child of node.children) {
+		if (NAME_NODE_TYPES.includes(child.type)) {
+			return child.text
+		}
+	}
+
+	// For some languages, try the first identifier child
+	for (const child of node.children) {
+		if (child.type === 'identifier' || child.type === 'type_identifier') {
+			return child.text
+		}
+	}
+
+	return null
+}
+
+/**
+ * Find the position of the body delimiter in a signature
+ *
+ * This handles nested brackets/parens/generics to avoid matching
+ * delimiters inside parameter lists or type annotations.
+ */
+const findBodyDelimiterPos = (text: string, delimiter: string): number => {
+	// Handle nested brackets/parens before the body delimiter
+	let parenDepth = 0
+	let bracketDepth = 0
+	let angleDepth = 0
+	let inString = false
+	let stringChar = ''
+
+	for (let i = 0; i < text.length; i++) {
+		const char = text[i]
+		const prevChar = i > 0 ? text[i - 1] : ''
+
+		// Track string literals to avoid matching inside them
+		if ((char === '"' || char === "'" || char === '`') && prevChar !== '\\') {
+			if (!inString) {
+				inString = true
+				stringChar = char
+			} else if (char === stringChar) {
+				inString = false
+				stringChar = ''
+			}
+			continue
+		}
+
+		if (inString) continue
+
+		// Track nested structures
+		if (char === '(') {
+			parenDepth++
+		} else if (char === ')') {
+			parenDepth--
+		} else if (char === '[') {
+			bracketDepth++
+		} else if (char === ']') {
+			bracketDepth--
+		} else if (char === '<') {
+			// Only count as generic bracket if followed by identifier or another <
+			// This helps avoid matching comparison operators like <, <=, <<
+			const nextChar = text[i + 1] ?? ''
+			if (/[A-Za-z_<]/.test(nextChar) || nextChar === '>' || nextChar === ' ') {
+				angleDepth++
+			}
+		} else if (char === '>' && angleDepth > 0) {
+			// Only decrement if we're tracking angle brackets
+			angleDepth--
+		}
+
+		// Only match delimiter at depth 0
+		if (
+			char === delimiter &&
+			parenDepth === 0 &&
+			bracketDepth === 0 &&
+			angleDepth === 0
+		) {
+			return i
+		}
+	}
+
+	return -1
+}
+
+/**
+ * Node types that represent body/block structures
+ */
+const BODY_NODE_TYPES: readonly string[] = [
+	'block',
+	'statement_block',
+	'class_body',
+	'interface_body',
+	'enum_body',
+]
+
+/**
+ * Try to extract signature using AST body field
+ * Look for 'body' or block-like child and extract everything before it
+ * Returns null if body node not found
+ */
+const tryExtractSignatureFromBody = (
+	node: SyntaxNode,
+	code: string,
+	language: Language,
+): string | null => {
+	// Find the body/block child node
+	const bodyNode =
+		node.childForFieldName('body') ||
+		node.children.find((c) => BODY_NODE_TYPES.includes(c.type))
+
+	if (!bodyNode) {
+		return null
+	}
+
+	// Extract from node start to body start
+	let signature = code.slice(node.startIndex, bodyNode.startIndex).trim()
+
+	// For Python, remove trailing colon
+	if (language === 'python' && signature.endsWith(':')) {
+		signature = signature.slice(0, -1)
+	}
+
+	// For arrow functions, remove trailing =>
+	if (signature.endsWith('=>')) {
+		signature = signature.slice(0, -2).trim()
+	}
+
+	return cleanSignature(signature)
+}
+
+/**
+ * Extract signature for function/method entities
+ * Extract from start to opening brace '{' (or ':' for Python)
+ */
+const extractFunctionSignature = (
+	node: SyntaxNode,
+	language: Language,
+	code: string,
+): string => {
+	// Try AST-based extraction first (more reliable for languages with complex type syntax)
+	const astSignature = tryExtractSignatureFromBody(node, code, language)
+	if (astSignature) {
+		return astSignature
+	}
+
+	// Fallback to text-based extraction
+	const nodeText = code.slice(node.startIndex, node.endIndex)
+	const delimiter = BODY_DELIMITERS[language]
+	const delimPos = findBodyDelimiterPos(nodeText, delimiter)
+
+	if (delimPos === -1) {
+		// No body delimiter found - might be a declaration without body
+		// Return the full node text cleaned up
+		return cleanSignature(nodeText)
+	}
+
+	// Extract up to (but not including) the body delimiter
+	const signature = nodeText.slice(0, delimPos).trim()
+	return cleanSignature(signature)
+}
+
+/**
+ * Extract signature for class/interface entities
+ * Extract the declaration line (up to opening brace or first line)
+ */
+const extractClassSignature = (
+	node: SyntaxNode,
+	language: Language,
+	code: string,
+): string => {
+	// Try AST-based extraction first
+	const astSignature = tryExtractSignatureFromBody(node, code, language)
+	if (astSignature) {
+		return astSignature
+	}
+
+	// Fallback to text-based extraction
+	const nodeText = code.slice(node.startIndex, node.endIndex)
+	const delimiter = BODY_DELIMITERS[language]
+	const delimPos = findBodyDelimiterPos(nodeText, delimiter)
+
+	if (delimPos === -1) {
+		// No body - return first line or full text
+		const firstNewline = nodeText.indexOf('\n')
+		if (firstNewline !== -1) {
+			return cleanSignature(nodeText.slice(0, firstNewline))
+		}
+		return cleanSignature(nodeText)
+	}
+
+	// Extract up to (but not including) the opening brace
+	const signature = nodeText.slice(0, delimPos).trim()
+	return cleanSignature(signature)
+}
+
+/**
+ * Extract signature for type/enum entities
+ * Extract until '=' or '{'
+ */
+const extractTypeSignature = (
+	node: SyntaxNode,
+	language: Language,
+	code: string,
+): string => {
+	const nodeText = code.slice(node.startIndex, node.endIndex)
+
+	// For type aliases, look for '=' first
+	const equalsPos = nodeText.indexOf('=')
+	const bracePos = findBodyDelimiterPos(nodeText, '{')
+	const colonPos =
+		language === 'python' ? findBodyDelimiterPos(nodeText, ':') : -1
+
+	// Find the earliest delimiter
+	let delimPos = -1
+	if (equalsPos !== -1) delimPos = equalsPos
+	if (bracePos !== -1 && (delimPos === -1 || bracePos < delimPos))
+		delimPos = bracePos
+	if (colonPos !== -1 && (delimPos === -1 || colonPos < delimPos))
+		delimPos = colonPos
+
+	if (delimPos === -1) {
+		// No delimiter found - return first line or full text
+		const firstNewline = nodeText.indexOf('\n')
+		if (firstNewline !== -1) {
+			return cleanSignature(nodeText.slice(0, firstNewline))
+		}
+		return cleanSignature(nodeText)
+	}
+
+	const signature = nodeText.slice(0, delimPos).trim()
+	return cleanSignature(signature)
+}
+
+/**
+ * Extract signature for import/export entities
+ * Extract the full statement
+ */
+const extractImportExportSignature = (
+	node: SyntaxNode,
+	code: string,
+): string => {
+	const nodeText = code.slice(node.startIndex, node.endIndex)
+	return cleanSignature(nodeText)
+}
+
+/**
+ * Clean up a signature string:
+ * - Collapse multiple whitespace to single space
+ * - Normalize multi-line to single line
+ * - Trim leading/trailing whitespace
+ */
+const cleanSignature = (signature: string): string => {
+	return signature
+		.replace(/[\r\n]+/g, ' ') // Replace newlines with space
+		.replace(/\s+/g, ' ') // Collapse multiple whitespace
+		.trim()
+}
+
 /**
  * Extract the signature of an entity from its AST node
  *
@@ -9,38 +309,50 @@ import type { EntityType, Language, SyntaxNode } from '../types'
  * @param language - The programming language
  * @param code - The source code
  * @returns Effect yielding the signature string
- *
- * TODO: Implement signature extraction for different entity types
  */
 export const extractSignature = (
-	_node: SyntaxNode,
-	_entityType: EntityType,
-	_language: Language,
-	_code: string,
+	node: SyntaxNode,
+	entityType: EntityType,
+	language: Language,
+	code: string,
 ): Effect.Effect<string, never> => {
-	// TODO: Implement signature extraction
-	// Different strategies based on entity type:
-	// - function: extract until opening brace/colon
-	// - class: extract declaration line
-	// - interface/type: extract until opening brace or =
-	// - import/export: extract full statement
-	return Effect.succeed('')
+	return Effect.sync(() => {
+		switch (entityType) {
+			case 'function':
+			case 'method':
+				return extractFunctionSignature(node, language, code)
+
+			case 'class':
+			case 'interface':
+				return extractClassSignature(node, language, code)
+
+			case 'type':
+			case 'enum':
+				return extractTypeSignature(node, language, code)
+
+			case 'import':
+			case 'export':
+				return extractImportExportSignature(node, code)
+
+			default: {
+				// Fallback: extract first line
+				const nodeText = code.slice(node.startIndex, node.endIndex)
+				const firstNewline = nodeText.indexOf('\n')
+				if (firstNewline !== -1) {
+					return cleanSignature(nodeText.slice(0, firstNewline))
+				}
+				return cleanSignature(nodeText)
+			}
+		}
+	})
 }
 
 /**
- * Extract the name of an entity from its AST node
+ * Get the body delimiter for a language
  *
- * @param node - The AST node representing the entity
  * @param language - The programming language
- * @returns The entity name, or null if not found
- *
- * TODO: Implement name extraction
+ * @returns The character that marks the start of a body block
  */
-export const extractName = (
-	_node: SyntaxNode,
-	_language: Language,
-): string | null => {
-	// TODO: Implement name extraction
-	// Look for identifier/name child nodes based on language
-	return null
+export const getBodyDelimiter = (language: Language): string => {
+	return BODY_DELIMITERS[language]
 }
diff --git a/test/extract.test.ts b/test/extract.test.ts
new file mode 100644
index 0000000..fa5de4d
--- /dev/null
+++ b/test/extract.test.ts
@@ -0,0 +1,838 @@
+import { beforeAll, describe, expect, test } from 'bun:test'
+import { Effect } from 'effect'
+import {
+	clearQueryCache,
+	ENTITY_NODE_TYPES,
+	extractByNodeTypes,
+	extractEntitiesAsync,
+	extractEntitiesSync,
+	getEntityType,
+	loadQuery,
+	loadQuerySync,
+} from '../src/extract'
+import {
+	extractDocstring,
+	isDocComment,
+	parseDocstring,
+} from '../src/extract/docstring'
+import { extractName, extractSignature } from '../src/extract/signature'
+import { initializeParser, parseCode } from '../src/parser'
+import type { Language } from '../src/types'
+
+// ============================================================================
+// Setup
+// ============================================================================
+
+beforeAll(async () => {
+	await initializeParser()
+})
+
+// ============================================================================
+// Query Loading Tests
+// ============================================================================
+
+describe('query loading', () => {
+	beforeAll(() => {
+		clearQueryCache()
+	})
+
+	test('loadQuery loads and caches TypeScript query', async () => {
+		const query = await Effect.runPromise(loadQuery('typescript'))
+		expect(query).not.toBeNull()
+
+		// Second call should return cached
+		const cached = await Effect.runPromise(loadQuery('typescript'))
+		expect(cached).toBe(query)
+	})
+
+	test('loadQuery loads queries for all supported languages', async () => {
+		const languages: Language[] = [
+			'typescript',
+			'javascript',
+			'python',
+			'rust',
+			'go',
+			'java',
+		]
+
+		for (const lang of languages) {
+			const query = await Effect.runPromise(loadQuery(lang))
+			expect(query).not.toBeNull()
+		}
+	})
+
+	test('loadQuerySync returns null when query not cached', () => {
+		clearQueryCache()
+		const query = loadQuerySync('typescript')
+		// Not cached yet, should return null
+		expect(query).toBeNull()
+	})
+
+	test('loadQuerySync returns cached query after loadQuery', async () => {
+		clearQueryCache()
+
+		// First load with async
+		await Effect.runPromise(loadQuery('javascript'))
+
+		// Now sync should return it
+		const cached = loadQuerySync('javascript')
+		expect(cached).not.toBeNull()
+	})
+})
+
+// ============================================================================
+// Sync/Async Behavior Consistency Tests
+// ============================================================================
+
+describe('extractEntities sync/async consistency', () => {
+	test('extractEntitiesSync uses cached query when available', async () => {
+		clearQueryCache()
+
+		const code = `
+function greet(name: string): string {
+  return \`Hello, \${name}!\`
+}
+`
+		const result = await parseCode(code, 'typescript')
+		const rootNode = result.tree.rootNode
+
+		// First, preload the query
+		await Effect.runPromise(loadQuery('typescript'))
+
+		// Now sync should use the cached query
+		const entitiesSync = extractEntitiesSync(rootNode, 'typescript', code)
+
+		// Compare with async version
+		const entitiesAsync = await extractEntitiesAsync(
+			rootNode,
+			'typescript',
+			code,
+		)
+
+		// Both should find the same entities
+		expect(entitiesSync.length).toBe(entitiesAsync.length)
+		expect(entitiesSync.map((e) => e.name)).toEqual(
+			entitiesAsync.map((e) => e.name),
+		)
+	})
+
+	test('extractEntitiesSync falls back to node types when query not cached', () => {
+		clearQueryCache()
+
+		const code = `
+function test() {
+  return 1
+}
+`
+		// Parse synchronously (we need the tree)
+		const parseEffect = Effect.gen(function* () {
+			const result = yield* Effect.tryPromise(() =>
+				parseCode(code, 'typescript'),
+			)
+			return result
+		})
+
+		Effect.runPromise(parseEffect).then((result) => {
+			const rootNode = result.tree.rootNode
+			// With no cached query, should still work via fallback
+			const entities = extractEntitiesSync(rootNode, 'typescript', code)
+			expect(entities.length).toBeGreaterThan(0)
+		})
+	})
+})
+
+// ============================================================================
+// Entity Extraction Tests
+// ============================================================================
+
+describe('extractEntities', () => {
+	test('extracts TypeScript function declaration', async () => {
+		const code = `
+function greet(name: string): string {
+  return \`Hello, \${name}!\`
+}
+`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		expect(entities.length).toBeGreaterThan(0)
+		const fn = entities.find((e) => e.name === 'greet')
+		expect(fn).toBeDefined()
+		expect(fn?.type).toBe('function')
+		expect(fn?.signature).toContain('greet')
+	})
+
+	test('extracts TypeScript class with methods', async () => {
+		const code = `
+class Calculator {
+  add(a: number, b: number): number {
+    return a + b
+  }
+
+  subtract(a: number, b: number): number {
+    return a - b
+  }
+}
+`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		const cls = entities.find((e) => e.name === 'Calculator')
+		expect(cls).toBeDefined()
+		expect(cls?.type).toBe('class')
+
+		const methods = entities.filter((e) => e.type === 'method')
+		expect(methods.length).toBe(2)
+		expect(methods.map((m) => m.name)).toContain('add')
+		expect(methods.map((m) => m.name)).toContain('subtract')
+	})
+
+	test('extracts TypeScript interface', async () => {
+		const code = `
+interface User {
+  name: string
+  age: number
+}
+`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		const iface = entities.find((e) => e.name === 'User')
+		expect(iface).toBeDefined()
+		expect(iface?.type).toBe('interface')
+	})
+
+	test('extracts Python function with docstring', async () => {
+		const code = `
+def greet(name):
+    """Say hello to someone."""
+    return f"Hello, {name}!"
+`
+		const result = await parseCode(code, 'python')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'python',
+			code,
+		)
+
+		const fn = entities.find((e) => e.name === 'greet')
+		expect(fn).toBeDefined()
+		expect(fn?.type).toBe('function')
+		expect(fn?.docstring).toBe('Say hello to someone.')
+	})
+
+	test('extracts Python class', async () => {
+		const code = `
+class Calculator:
+    """A simple calculator."""
+
+    def add(self, a, b):
+        return a + b
+`
+		const result = await parseCode(code, 'python')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'python',
+			code,
+		)
+
+		const cls = entities.find((e) => e.name === 'Calculator')
+		expect(cls).toBeDefined()
+		expect(cls?.type).toBe('class')
+	})
+
+	test('extracts Rust function', async () => {
+		const code = `
+fn add(a: i32, b: i32) -> i32 {
+    a + b
+}
+`
+		const result = await parseCode(code, 'rust')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'rust',
+			code,
+		)
+
+		const fn = entities.find((e) => e.name === 'add')
+		expect(fn).toBeDefined()
+		expect(fn?.type).toBe('function')
+	})
+
+	test('extracts Go function', async () => {
+		const code = `
+package main
+
+func add(a, b int) int {
+    return a + b
+}
+`
+		const result = await parseCode(code, 'go')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'go',
+			code,
+		)
+
+		const fn = entities.find((e) => e.name === 'add')
+		expect(fn).toBeDefined()
+		expect(fn?.type).toBe('function')
+	})
+
+	test('extracts Java class and method', async () => {
+		const code = `
+public class Calculator {
+    public int add(int a, int b) {
+        return a + b;
+    }
+}
+`
+		const result = await parseCode(code, 'java')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'java',
+			code,
+		)
+
+		const cls = entities.find((e) => e.name === 'Calculator')
+		expect(cls).toBeDefined()
+		expect(cls?.type).toBe('class')
+	})
+
+	test('tracks parent relationships for nested entities', async () => {
+		const code = `
+class Outer {
+  inner() {
+    return 1
+  }
+}
+`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		const method = entities.find((e) => e.name === 'inner')
+		expect(method?.parent).toBe('Outer')
+	})
+})
+
+// ============================================================================
+// Fallback Extraction Tests (Iterative Walk)
+// ============================================================================
+
+describe('fallback extraction (iterative)', () => {
+	test('handles deeply nested code without stack overflow', async () => {
+		// Generate deeply nested functions (more reliable nesting)
+		let code = ''
+		const depth = 50
+
+		for (let i = 0; i < depth; i++) {
+			code += `function level${i}() {\n`
+		}
+		code += 'return 1\n'
+		for (let i = 0; i < depth; i++) {
+			code += '}\n'
+		}
+
+		const result = await parseCode(code, 'typescript')
+
+		// Should not throw stack overflow
+		const entities = await Effect.runPromise(
+			extractByNodeTypes(result.tree.rootNode, 'typescript', code),
+		)
+
+		// Should find nested functions (exact count may vary based on nesting support)
+		const functions = entities.filter((e) => e.type === 'function')
+		expect(functions.length).toBeGreaterThan(0)
+		// At minimum the outer function should be found
+		expect(functions.some((f) => f.name === 'level0')).toBe(true)
+	})
+
+	test('extractByNodeTypes extracts entities correctly', async () => {
+		const code = `
+function foo() { return 1 }
+class Bar {
+  baz() { return 2 }
+}
+`
+		const result = await parseCode(code, 'typescript')
+		const entities = await Effect.runPromise(
+			extractByNodeTypes(result.tree.rootNode, 'typescript', code),
+		)
+
+		expect(entities.find((e) => e.name === 'foo')).toBeDefined()
+		expect(entities.find((e) => e.name === 'Bar')).toBeDefined()
+		expect(entities.find((e) => e.name === 'baz')).toBeDefined()
+	})
+
+	test('getEntityType maps node types correctly', () => {
+		expect(getEntityType('function_declaration')).toBe('function')
+		expect(getEntityType('method_definition')).toBe('method')
+		expect(getEntityType('class_declaration')).toBe('class')
+		expect(getEntityType('interface_declaration')).toBe('interface')
+		expect(getEntityType('unknown_type')).toBeNull()
+	})
+
+	test('ENTITY_NODE_TYPES contains all supported languages', () => {
+		const languages: Language[] = [
+			'typescript',
+			'javascript',
+			'python',
+			'rust',
+			'go',
+			'java',
+		]
+
+		for (const lang of languages) {
+			expect(ENTITY_NODE_TYPES[lang]).toBeDefined()
+			expect(ENTITY_NODE_TYPES[lang].length).toBeGreaterThan(0)
+		}
+	})
+})
+
+// ============================================================================
+// Signature Extraction Tests
+// ============================================================================
+
+describe('signature extraction', () => {
+	test('extracts TypeScript function signature', async () => {
+		const code = `function greet(name: string): string {
+  return \`Hello, \${name}!\`
+}`
+		const result = await parseCode(code, 'typescript')
+		const fnNode = result.tree.rootNode.namedChildren[0]
+
+		const signature = await Effect.runPromise(
+			extractSignature(fnNode, 'function', 'typescript', code),
+		)
+
+		expect(signature).toBe('function greet(name: string): string')
+	})
+
+	test('extracts Python function signature (stops at colon)', async () => {
+		const code = `def greet(name):
+    return f"Hello, {name}!"`
+		const result = await parseCode(code, 'python')
+		const fnNode = result.tree.rootNode.namedChildren[0]
+
+		const signature = await Effect.runPromise(
+			extractSignature(fnNode, 'function', 'python', code),
+		)
+
+		expect(signature).toBe('def greet(name)')
+	})
+
+	test('handles generic type parameters correctly', async () => {
+		const code = `function identity<T>(arg: T): T {
+  return arg
+}`
+		const result = await parseCode(code, 'typescript')
+		const fnNode = result.tree.rootNode.namedChildren[0]
+
+		const signature = await Effect.runPromise(
+			extractSignature(fnNode, 'function', 'typescript', code),
+		)
+
+		// Should include the generic parameter
+		expect(signature).toContain('<T>')
+		expect(signature).toContain('identity')
+	})
+
+	test('handles comparison operators in signatures (angle bracket fix)', async () => {
+		// This tests that < in comparisons doesn't break generic tracking
+		const code = `function compare(a: number, b: number): boolean {
+  return a < b
+}`
+		const result = await parseCode(code, 'typescript')
+		const fnNode = result.tree.rootNode.namedChildren[0]
+
+		const signature = await Effect.runPromise(
+			extractSignature(fnNode, 'function', 'typescript', code),
+		)
+
+		// Should extract signature correctly without being confused by < in body
+		expect(signature).toBe('function compare(a: number, b: number): boolean')
+	})
+
+	test('extracts class signature', async () => {
+		const code = `class Calculator extends Base implements ICalc {
+  add(a: number, b: number): number {
+    return a + b
+  }
+}`
+		const result = await parseCode(code, 'typescript')
+		const classNode = result.tree.rootNode.namedChildren[0]
+
+		const signature = await Effect.runPromise(
+			extractSignature(classNode, 'class', 'typescript', code),
+		)
+
+		expect(signature).toContain('class Calculator')
+		expect(signature).toContain('extends Base')
+		expect(signature).toContain('implements ICalc')
+	})
+
+	test('cleans multi-line signatures to single line', async () => {
+		const code = `function multiLine(
+  param1: string,
+  param2: number,
+  param3: boolean
+): void {
+  console.log(param1)
+}`
+		const result = await parseCode(code, 'typescript')
+		const fnNode = result.tree.rootNode.namedChildren[0]
+
+		const signature = await Effect.runPromise(
+			extractSignature(fnNode, 'function', 'typescript', code),
+		)
+
+		// Should not contain newlines
+		expect(signature).not.toContain('\n')
+		// Should have all params
+		expect(signature).toContain('param1')
+		expect(signature).toContain('param2')
+		expect(signature).toContain('param3')
+	})
+
+	test('extractName finds identifier in node', async () => {
+		const code = `function greet() { return 1 }`
+		const result = await parseCode(code, 'typescript')
+		const fnNode = result.tree.rootNode.namedChildren[0]
+
+		const name = extractName(fnNode, 'typescript')
+		expect(name).toBe('greet')
+	})
+})
+
+// ============================================================================
+// Docstring Extraction Tests
+// ============================================================================
+
+describe('docstring extraction', () => {
+	test('extracts JSDoc for TypeScript function', async () => {
+		const code = `/**
+ * Greet someone by name.
+ * @param name The name to greet
+ */
+function greet(name: string): string {
+  return \`Hello, \${name}!\`
+}`
+		const result = await parseCode(code, 'typescript')
+		const fnNode = result.tree.rootNode.namedChildren[1] // Skip comment, get function
+
+		const docstring = await Effect.runPromise(
+			extractDocstring(fnNode, 'typescript', code),
+		)
+
+		expect(docstring).toContain('Greet someone by name')
+		expect(docstring).toContain('@param name')
+	})
+
+	test('extracts Python docstring from function body', async () => {
+		const code = `def greet(name):
+    """
+    Say hello to someone.
+
+    Args:
+        name: The person to greet
+    """
+    return f"Hello, {name}!"`
+		const result = await parseCode(code, 'python')
+		const fnNode = result.tree.rootNode.namedChildren[0]
+
+		const docstring = await Effect.runPromise(
+			extractDocstring(fnNode, 'python', code),
+		)
+
+		expect(docstring).toContain('Say hello to someone')
+		expect(docstring).toContain('Args:')
+	})
+
+	test('extracts Rust doc comment', async () => {
+		const code = `/// Add two numbers together.
+/// Returns the sum.
+fn add(a: i32, b: i32) -> i32 {
+    a + b
+}`
+		const result = await parseCode(code, 'rust')
+		// Find the function node
+		const fnNode = result.tree.rootNode.namedChildren.find(
+			(n) => n.type === 'function_item',
+		)
+
+		if (fnNode) {
+			const docstring = await Effect.runPromise(
+				extractDocstring(fnNode, 'rust', code),
+			)
+
+			expect(docstring).toContain('Add two numbers')
+		}
+	})
+
+	test('extracts Go comment', async () => {
+		const code = `// Add returns the sum of a and b.
+func Add(a, b int) int {
+    return a + b
+}`
+		const result = await parseCode(code, 'go')
+		const fnNode = result.tree.rootNode.namedChildren.find(
+			(n) => n.type === 'function_declaration',
+		)
+
+		if (fnNode) {
+			const docstring = await Effect.runPromise(
+				extractDocstring(fnNode, 'go', code),
+			)
+
+			expect(docstring).toContain('Add returns the sum')
+		}
+	})
+
+	test('extracts Javadoc', async () => {
+		const code = `/**
+ * Add two integers.
+ * @param a First number
+ * @param b Second number
+ * @return The sum
+ */
+public int add(int a, int b) {
+    return a + b;
+}`
+		const result = await parseCode(code, 'java')
+		const methodNode = result.tree.rootNode.namedChildren.find(
+			(n) => n.type === 'method_declaration',
+		)
+
+		if (methodNode) {
+			const docstring = await Effect.runPromise(
+				extractDocstring(methodNode, 'java', code),
+			)
+
+			expect(docstring).toContain('Add two integers')
+		}
+	})
+
+	test('returns null when no docstring present', async () => {
+		const code = `function noDoc() { return 1 }`
+		const result = await parseCode(code, 'typescript')
+		const fnNode = result.tree.rootNode.namedChildren[0]
+
+		const docstring = await Effect.runPromise(
+			extractDocstring(fnNode, 'typescript', code),
+		)
+
+		expect(docstring).toBeNull()
+	})
+})
+
+// ============================================================================
+// isDocComment Tests
+// ============================================================================
+
+describe('isDocComment', () => {
+	test('recognizes JSDoc comments', () => {
+		expect(isDocComment('/** This is JSDoc */', 'typescript')).toBe(true)
+		expect(isDocComment('/* Regular comment */', 'typescript')).toBe(false)
+		expect(isDocComment('// Line comment', 'typescript')).toBe(false)
+	})
+
+	test('recognizes Python docstrings', () => {
+		expect(isDocComment('"""Docstring"""', 'python')).toBe(true)
+		expect(isDocComment("'''Docstring'''", 'python')).toBe(true)
+		expect(isDocComment('r"""Raw docstring"""', 'python')).toBe(true)
+		expect(isDocComment('# Comment', 'python')).toBe(false)
+	})
+
+	test('recognizes Rust doc comments', () => {
+		expect(isDocComment('/// Doc comment', 'rust')).toBe(true)
+		expect(isDocComment('//! Inner doc', 'rust')).toBe(true)
+		expect(isDocComment('// Regular comment', 'rust')).toBe(false)
+	})
+
+	test('recognizes Go comments', () => {
+		// Go considers any // comment before a declaration as doc
+		expect(isDocComment('// Comment', 'go')).toBe(true)
+	})
+
+	test('recognizes Javadoc', () => {
+		expect(isDocComment('/** Javadoc */', 'java')).toBe(true)
+		expect(isDocComment('/* Block comment */', 'java')).toBe(false)
+	})
+})
+
+// ============================================================================
+// parseDocstring Tests
+// ============================================================================
+
+describe('parseDocstring', () => {
+	test('parses JSDoc and removes markers', () => {
+		const input = `/**
+ * This is a description.
+ * @param name The name
+ */`
+		const parsed = parseDocstring(input, 'typescript')
+
+		expect(parsed).not.toContain('/**')
+		expect(parsed).not.toContain('*/')
+		expect(parsed).toContain('This is a description')
+		expect(parsed).toContain('@param name')
+	})
+
+	test('parses Python docstring and dedents', () => {
+		const input = `"""
+    This is indented.
+    So is this.
+    """`
+		const parsed = parseDocstring(input, 'python')
+
+		expect(parsed).not.toContain('"""')
+		expect(parsed).toContain('This is indented')
+		// Should be dedented
+		expect(parsed).not.toMatch(/^\s{4}This/)
+	})
+
+	test('parses Rust doc comments and removes ///', () => {
+		const input = `/// First line.
+/// Second line.`
+		const parsed = parseDocstring(input, 'rust')
+
+		expect(parsed).not.toContain('///')
+		expect(parsed).toContain('First line')
+		expect(parsed).toContain('Second line')
+	})
+
+	test('parses Go comments and removes //', () => {
+		const input = `// First line.
+// Second line.`
+		const parsed = parseDocstring(input, 'go')
+
+		expect(parsed).not.toContain('//')
+		expect(parsed).toContain('First line')
+		expect(parsed).toContain('Second line')
+	})
+})
+
+// ============================================================================
+// Edge Cases
+// ============================================================================
+
+describe('extraction edge cases', () => {
+	test('handles anonymous functions via variable declaration', async () => {
+		// Note: anonymous functions themselves aren't extracted as entities,
+		// but top-level variable declarations are
+		const code = `const fn = function() { return 1 }`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		// Query extracts top-level const declarations
+		// If no entities found, that's acceptable - the function is anonymous
+		// What matters is it doesn't crash
+		expect(Array.isArray(entities)).toBe(true)
+	})
+
+	test('handles arrow functions via variable declaration', async () => {
+		// Arrow functions assigned to const are extracted as the variable
+		const code = `const add = (a: number, b: number) => a + b`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		// Queries should capture top-level const with arrow function value
+		// The entity would be named 'add' (the variable name)
+		expect(Array.isArray(entities)).toBe(true)
+	})
+
+	test('handles arrow functions - no crash', async () => {
+		// Arrow functions may or may not be extracted depending on query patterns
+		// The key is the system handles them without crashing
+		const code = `const add = (a: number, b: number) => a + b`
+		const result = await parseCode(code, 'typescript')
+
+		// Should not throw
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		// Result should be an array (may be empty if arrow function isn't captured)
+		expect(Array.isArray(entities)).toBe(true)
+	})
+
+	test('handles async functions', async () => {
+		const code = `async function fetchData(): Promise<string> {
+  return await fetch('/api')
+}`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		const fn = entities.find((e) => e.name === 'fetchData')
+		expect(fn).toBeDefined()
+		expect(fn?.signature).toContain('async')
+	})
+
+	test('handles export declarations', async () => {
+		const code = `export function publicFn() { return 1 }
+export default function defaultFn() { return 2 }`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		expect(entities.length).toBeGreaterThan(0)
+	})
+
+	test('handles empty file', async () => {
+		const code = ''
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		expect(entities).toEqual([])
+	})
+
+	test('handles file with only comments', async () => {
+		const code = `// Just a comment
+/* Another comment */`
+		const result = await parseCode(code, 'typescript')
+		const entities = await extractEntitiesAsync(
+			result.tree.rootNode,
+			'typescript',
+			code,
+		)
+
+		expect(entities).toEqual([])
+	})
+})

From b3e0cf2b6197e96d2d84198d0978f3e92ab7d1bd Mon Sep 17 00:00:00 2001
From: Shoubhit Dash <shoubhit2005@gmail.com>
Date: Wed, 17 Dec 2025 01:36:20 +0530
Subject: [PATCH 2/2] fix: remove non-null assertions in rebuild.ts

---
 src/chunking/rebuild.ts | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/chunking/rebuild.ts b/src/chunking/rebuild.ts
index 6769fed..660b1d3 100644
--- a/src/chunking/rebuild.ts
+++ b/src/chunking/rebuild.ts
@@ -85,8 +85,15 @@ export const rebuildText = (window: ASTWindow, code: string): RebuiltText => {
 
 	// Normal case: slice from first node start to last node end
 	// Use startPosition/endPosition from nodes for optimized line calculation
-	const firstNode = window.nodes[0]!
-	const lastNode = window.nodes[window.nodes.length - 1]!
+	const firstNode = window.nodes[0]
+	const lastNode = window.nodes[window.nodes.length - 1]
+	if (!firstNode || !lastNode) {
+		return {
+			text: '',
+			byteRange: { start: 0, end: 0 },
+			lineRange: { start: 0, end: 0 },
+		}
+	}
 
 	const startByte = firstNode.startIndex
 	const endByte = lastNode.endIndex
@@ -114,12 +121,26 @@ const rebuildFromLineRanges = (
 	window: ASTWindow,
 	code: string,
 ): RebuiltText => {
-	const lineRanges = window.lineRanges!
+	const lineRanges = window.lineRanges
+	if (!lineRanges || lineRanges.length === 0) {
+		return {
+			text: '',
+			byteRange: { start: 0, end: 0 },
+			lineRange: { start: 0, end: 0 },
+		}
+	}
 	const lineStarts = buildLineStartsTable(code)
 
 	// Get the overall line range
-	const firstRange = lineRanges[0]!
-	const lastRange = lineRanges[lineRanges.length - 1]!
+	const firstRange = lineRanges[0]
+	const lastRange = lineRanges[lineRanges.length - 1]
+	if (!firstRange || !lastRange) {
+		return {
+			text: '',
+			byteRange: { start: 0, end: 0 },
+			lineRange: { start: 0, end: 0 },
+		}
+	}
 	const startLine = firstRange.start
 	const endLine = lastRange.end
 
@@ -127,7 +148,9 @@ const rebuildFromLineRanges = (
 	const startByte = lineStarts[startLine] ?? 0
 	// End byte is start of line after endLine, or end of file
 	const endByte =
-		endLine + 1 < lineStarts.length ? lineStarts[endLine + 1]! : code.length
+		endLine + 1 < lineStarts.length
+			? (lineStarts[endLine + 1] ?? code.length)
+			: code.length
 
 	const text = code.slice(startByte, endByte)