diff --git a/.github/meta/commit.txt b/.github/meta/commit.txt index 0476c2cb7..051068257 100644 --- a/.github/meta/commit.txt +++ b/.github/meta/commit.txt @@ -1,15 +1,29 @@ -feat: add task intent classification telemetry event +fix: tool reliability improvements for sql-classify, edit, and webfetch (#581) -Add `task_classified` event emitted at session start with keyword/regex -classification of the first user message. Categories: debug_dbt, write_sql, -optimize_query, build_model, analyze_lineage, explore_schema, migrate_sql, -manage_warehouse, finops, general. +**sql-classify.ts:** +- Fix `computeSqlFingerprint` referencing undefined `core` variable after + safe-import refactor — extract `extractMetadata` as module-level guard +- Invert fallback classifier to whitelist reads (`READ_PATTERN`) instead of + blacklisting writes — treats unknown statements as "write" for safety +- Handle multi-statement SQL in fallback by splitting on semicolons +- Strip `--` line comments in fallback (block comments already stripped) +- Fix `HARD_DENY_PATTERN` trailing `\s` → `\b` to match `TRUNCATE;` -- `classifyTaskIntent()` — pure regex matcher, zero LLM cost, <1ms -- Includes warehouse type from fingerprint cache -- Strong/weak confidence levels (1.0 vs 0.5) -- 15 unit tests covering all intent categories + edge cases +**edit.ts:** +- Add `buildNotFoundMessage` with Levenshtein nearest-match snippets for + LLM self-correction when `oldString` not found +- Fix substring matching to prefer exact equality over short-line matches -Closes AI-6029 +**webfetch.ts:** +- Add session-level URL failure cache (404/410/451) with 5-min TTL +- Add `buildFetchError` with actionable status-specific error messages +- Add `sanitizeUrl` to strip query strings from error messages +- Add URL validation via `new URL()` constructor +- Add `MAX_CACHED_URLS = 500` size cap with oldest-entry eviction + +**Tests:** 12 new tests for `buildNotFoundMessage`, `replace` error +messages, `computeSqlFingerprint`, and updated webfetch assertions. + +Closes #581 Co-Authored-By: Claude Opus 4.6 (1M context) diff --git a/packages/opencode/src/altimate/tools/sql-classify.ts b/packages/opencode/src/altimate/tools/sql-classify.ts index db3c3a318..363f6ba47 100644 --- a/packages/opencode/src/altimate/tools/sql-classify.ts +++ b/packages/opencode/src/altimate/tools/sql-classify.ts @@ -2,27 +2,69 @@ // // Uses altimate-core's AST-based getStatementTypes() for accurate classification. // Handles CTEs, string literals, procedural blocks, all dialects correctly. +// Falls back to regex-based heuristics if the napi binary fails to load. -// eslint-disable-next-line @typescript-eslint/no-explicit-any -const core: any = require("@altimateai/altimate-core") +// Safe import: napi binary may not be available on all platforms +let getStatementTypes: ((sql: string, dialect?: string | null) => any) | null = null +let extractMetadata: ((sql: string) => any) | null = null +try { + // eslint-disable-next-line @typescript-eslint/no-require-imports + const core = require("@altimateai/altimate-core") + if (typeof core?.getStatementTypes === "function") { + getStatementTypes = core.getStatementTypes + } + if (typeof core?.extractMetadata === "function") { + extractMetadata = core.extractMetadata + } +} catch { + // napi binary failed to load — will use regex fallback +} -// Categories from altimate-core that indicate write operations -const WRITE_CATEGORIES = new Set(["dml", "ddl", "dcl", "tcl"]) // Only SELECT queries are known safe. "other" (SHOW, SET, USE, etc.) is ambiguous — prompt for permission. const READ_CATEGORIES = new Set(["query"]) // Hard-deny patterns — blocked regardless of permissions const HARD_DENY_TYPES = new Set(["DROP DATABASE", "DROP SCHEMA", "TRUNCATE", "TRUNCATE TABLE"]) +// Regex fallback: conservative — only known-safe reads are whitelisted, everything else is "write" +const READ_PATTERN = /^\s*(SELECT|WITH|SHOW|EXPLAIN|DESCRIBE|DESC)\b/i +const HARD_DENY_PATTERN = + /^\s*(DROP\s+(DATABASE|SCHEMA)\b|TRUNCATE(\s+TABLE)?\b)/i + +/** + * Regex-based fallback classifier for when altimate-core is unavailable. + * Conservative: treats anything not clearly a SELECT/WITH/SHOW/EXPLAIN as "write". + * Handles multi-statement SQL by splitting on semicolons and checking each statement. + */ +function classifyFallback(sql: string): { queryType: "read" | "write"; blocked: boolean } { + const cleaned = sql + .replace(/\/\*[\s\S]*?\*\//g, "") // block comments + .replace(/--[^\n]*/g, "") // line comments + const statements = cleaned.split(";").map(s => s.trim()).filter(Boolean) + if (statements.length === 0) return { queryType: "read", blocked: false } + let queryType: "read" | "write" = "read" + let blocked = false + for (const stmt of statements) { + if (HARD_DENY_PATTERN.test(stmt)) blocked = true + if (!READ_PATTERN.test(stmt)) queryType = "write" + } + return { queryType, blocked } +} + /** * Classify a SQL string as "read" or "write" using AST parsing. * If ANY statement is a write, returns "write". */ export function classify(sql: string): "read" | "write" { - const result = core.getStatementTypes(sql) - if (!result?.categories?.length) return "read" - // Treat unknown categories (not in WRITE or READ sets) as write to fail safe - return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read" + if (!sql || typeof sql !== "string") return "read" + if (!getStatementTypes) return classifyFallback(sql).queryType + try { + const result = getStatementTypes(sql) + if (!result?.categories?.length) return "read" + return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read" + } catch { + return classifyFallback(sql).queryType + } } /** @@ -38,17 +80,23 @@ export function classifyMulti(sql: string): "read" | "write" { * Returns both the overall query type and whether a hard-deny pattern was found. */ export function classifyAndCheck(sql: string): { queryType: "read" | "write"; blocked: boolean } { - const result = core.getStatementTypes(sql) - if (!result?.statements?.length) return { queryType: "read", blocked: false } + if (!sql || typeof sql !== "string") return { queryType: "read", blocked: false } + if (!getStatementTypes) return classifyFallback(sql) + try { + const result = getStatementTypes(sql) + if (!result?.statements?.length) return { queryType: "read", blocked: false } - const blocked = result.statements.some((s: { statement_type: string }) => - s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()), - ) + const blocked = result.statements.some( + (s: { statement_type: string }) => + s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()), + ) - const categories = result.categories ?? [] - // Unknown categories (not in WRITE or READ sets) are treated as write to fail safe - const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read" - return { queryType: queryType as "read" | "write", blocked } + const categories = result.categories ?? [] + const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read" + return { queryType: queryType as "read" | "write", blocked } + } catch { + return classifyFallback(sql) + } } // altimate_change start — SQL structure fingerprint for telemetry (no content, only shape) @@ -66,9 +114,10 @@ export interface SqlFingerprint { /** Compute a PII-safe structural fingerprint of a SQL query. * Uses altimate-core AST parsing — local, no API calls, ~1-5ms. */ export function computeSqlFingerprint(sql: string): SqlFingerprint | null { + if (!getStatementTypes || !extractMetadata) return null try { - const stmtResult = core.getStatementTypes(sql) - const meta = core.extractMetadata(sql) + const stmtResult = getStatementTypes(sql) + const meta = extractMetadata(sql) return { statement_types: stmtResult?.types ?? [], categories: stmtResult?.categories ?? [], diff --git a/packages/opencode/src/tool/edit.ts b/packages/opencode/src/tool/edit.ts index 005e0941c..390de8381 100644 --- a/packages/opencode/src/tool/edit.ts +++ b/packages/opencode/src/tool/edit.ts @@ -629,6 +629,76 @@ export function trimDiff(diff: string): string { return trimmedLines.join("\n") } +/** + * Build a helpful error message when oldString isn't found. + * Includes a snippet of the closest-matching region so the model can self-correct. + */ +export function buildNotFoundMessage(content: string, oldString: string): string { + const base = "Could not find oldString in the file." + + // Find the first line of oldString and search for it in the file + const firstLine = oldString.split("\n")[0].trim() + if (!firstLine) return base + " The oldString appears to be empty or whitespace-only." + + const contentLines = content.split("\n") + let bestLine = -1 + let bestScore = 0 + + // Search for the line with highest similarity to the first line of oldString + for (let i = 0; i < contentLines.length; i++) { + const trimmed = contentLines[i].trim() + if (!trimmed) continue + + // Skip very short lines — they produce false similarity matches + const minLen = Math.min(trimmed.length, firstLine.length) + if (minLen < 4) continue + + // Exact equality is best — break immediately + if (trimmed === firstLine) { + bestLine = i + bestScore = 1 + break + } + + // Substring match — strong score but keep searching for exact match + if (trimmed.includes(firstLine) || firstLine.includes(trimmed)) { + if (bestScore < 0.99) { + bestLine = i + bestScore = 0.99 + } + continue + } + + // Skip if lengths are too different (>3x ratio) — not a meaningful comparison + const maxLen = Math.max(trimmed.length, firstLine.length) + if (minLen * 3 < maxLen) continue + + // Levenshtein similarity for close matches + const score = 1 - levenshtein(trimmed, firstLine) / maxLen + if (score > bestScore && score > 0.6) { + bestScore = score + bestLine = i + } + } + + if (bestLine === -1) { + return base + ` The first line of your oldString ("${firstLine.slice(0, 80)}") was not found anywhere in the file. Re-read the file before editing.` + } + + // Show a small window around the best match + const start = Math.max(0, bestLine - 1) + const end = Math.min(contentLines.length, bestLine + 4) + const snippet = contentLines + .slice(start, end) + .map((l, i) => ` ${start + i + 1} | ${l}`) + .join("\n") + + return ( + base + + ` A similar line was found at line ${bestLine + 1}. The file may have changed since you last read it.\n\nNearest match:\n${snippet}\n\nRe-read the file and use the exact current content for oldString.` + ) +} + export function replace(content: string, oldString: string, newString: string, replaceAll = false): string { if (oldString === newString) { throw new Error("No changes to apply: oldString and newString are identical.") @@ -661,9 +731,7 @@ export function replace(content: string, oldString: string, newString: string, r } if (notFound) { - throw new Error( - "Could not find oldString in the file. It must match exactly, including whitespace, indentation, and line endings.", - ) + throw new Error(buildNotFoundMessage(content, oldString)) } throw new Error("Found multiple matches for oldString. Provide more surrounding context to make the match unique.") } diff --git a/packages/opencode/src/tool/webfetch.ts b/packages/opencode/src/tool/webfetch.ts index 6c2d0a3ff..9ca924b33 100644 --- a/packages/opencode/src/tool/webfetch.ts +++ b/packages/opencode/src/tool/webfetch.ts @@ -15,6 +15,67 @@ const BROWSER_UA = // Status codes that warrant a retry with a different User-Agent const RETRYABLE_STATUSES = new Set([403, 406]) +// altimate_change start — session-level URL failure cache (#471) +// Prevents repeated fetches to URLs that already returned 404/410 in this session. +const failedUrls = new Map() +const FAILURE_CACHE_TTL = 5 * 60 * 1000 // 5 minutes + +function isUrlCachedFailure(url: string): { status: number } | null { + const entry = failedUrls.get(url) + if (!entry) return null + if (Date.now() - entry.timestamp > FAILURE_CACHE_TTL) { + failedUrls.delete(url) + return null + } + return { status: entry.status } +} + +const MAX_CACHED_URLS = 500 + +function cacheUrlFailure(url: string, status: number): void { + if (status === 404 || status === 410 || status === 451) { + if (failedUrls.size >= MAX_CACHED_URLS) { + // Evict oldest entry (Map preserves insertion order) + const oldest = failedUrls.keys().next().value + if (oldest) failedUrls.delete(oldest) + } + failedUrls.set(url, { status, timestamp: Date.now() }) + } +} + +/** Strip query string from URL to avoid leaking auth tokens in error messages. */ +function sanitizeUrl(url: string): string { + try { + const parsed = new URL(url) + return parsed.origin + parsed.pathname + } catch { + return url.split("?")[0] + } +} + +/** Build an actionable error message so the model knows whether to retry. */ +function buildFetchError(url: string, status: number, headers?: Headers): string { + const safe = sanitizeUrl(url) + switch (status) { + case 404: + return `HTTP 404: ${safe} does not exist. Do NOT retry this URL — it will fail again. Try a different URL or search for the correct page.` + case 410: + return `HTTP 410: ${safe} has been permanently removed. Do NOT retry. Find an alternative resource.` + case 403: + return `HTTP 403: Access to ${safe} is forbidden. The server rejected both bot and browser User-Agents. Try a different source.` + case 429: { + const retryAfter = headers?.get("retry-after") + const wait = retryAfter ? ` (retry after ${retryAfter})` : "" + return `HTTP 429: Rate limited by ${new URL(url).hostname}${wait}. Wait before fetching from this domain again, or use a different source.` + } + case 451: + return `HTTP 451: ${safe} is unavailable for legal reasons. Do NOT retry.` + default: + return `HTTP ${status}: Request to ${safe} failed. This may be transient — retry once if needed.` + } +} +// altimate_change end + export const WebFetchTool = Tool.define("webfetch", { description: DESCRIPTION, parameters: z.object({ @@ -26,10 +87,22 @@ export const WebFetchTool = Tool.define("webfetch", { timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(), }), async execute(params, ctx) { - // Validate URL + // altimate_change start — URL validation and failure cache (#471) if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) { throw new Error("URL must start with http:// or https://") } + try { + new URL(params.url) + } catch { + throw new Error(`Invalid URL: "${params.url.slice(0, 200)}" is not a valid URL. Check the format and try again.`) + } + + // Check failure cache — avoid re-fetching URLs that already returned 404/410 + const cached = isUrlCachedFailure(params.url) + if (cached) { + throw new Error(buildFetchError(params.url, cached.status)) + } + // altimate_change end await ctx.ask({ permission: "webfetch", @@ -83,19 +156,12 @@ export const WebFetchTool = Tool.define("webfetch", { response = await fetch(params.url, { signal, headers: browserHeaders }) } + // altimate_change start — actionable error messages and failure caching (#471) if (!response.ok) { - // altimate_change start — include URL domain in error for easier triage - let domain: string - try { - domain = new URL(params.url).hostname - } catch { - domain = params.url.slice(0, 60) - } - throw new Error( - `Request failed with status code: ${response.status} (${domain})`, - ) - // altimate_change end + cacheUrlFailure(params.url, response.status) + throw new Error(buildFetchError(params.url, response.status, response.headers)) } + // altimate_change end // Check content length const contentLength = response.headers.get("content-length") diff --git a/packages/opencode/test/altimate/tools/sql-classify.test.ts b/packages/opencode/test/altimate/tools/sql-classify.test.ts index 45d69d249..8c9136d24 100644 --- a/packages/opencode/test/altimate/tools/sql-classify.test.ts +++ b/packages/opencode/test/altimate/tools/sql-classify.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test" -import { classify, classifyMulti, classifyAndCheck } from "../../../src/altimate/tools/sql-classify" +import { classify, classifyMulti, classifyAndCheck, computeSqlFingerprint } from "../../../src/altimate/tools/sql-classify" describe("classify", () => { // --- Read queries --- @@ -254,3 +254,42 @@ describe("sql-execute permission flow", () => { expect(blocked).toBe(true) }) }) + +describe("computeSqlFingerprint", () => { + test("returns fingerprint for valid SQL", () => { + const result = computeSqlFingerprint("SELECT id, name FROM users WHERE active = true") + // If napi is available, we get a real fingerprint; if not, null + if (result !== null) { + expect(result.statement_types).toBeInstanceOf(Array) + expect(result.categories).toBeInstanceOf(Array) + expect(typeof result.table_count).toBe("number") + expect(typeof result.function_count).toBe("number") + expect(typeof result.has_subqueries).toBe("boolean") + expect(typeof result.has_aggregation).toBe("boolean") + expect(typeof result.has_window_functions).toBe("boolean") + expect(typeof result.node_count).toBe("number") + } + }) + + test("returns null for empty string", () => { + const result = computeSqlFingerprint("") + // Either null (no napi) or a valid result with empty arrays + if (result !== null) { + expect(result.statement_types).toBeInstanceOf(Array) + } + }) + + test("does not throw for invalid SQL", () => { + // Should return null or a partial result, never throw + expect(() => computeSqlFingerprint("NOT VALID SQL !!@#$")).not.toThrow() + }) + + test("returns null when napi unavailable (graceful degradation)", () => { + // This tests the guard clause — computeSqlFingerprint returns null + // when getStatementTypes/extractMetadata are null. + // In test env with napi available, this would return a real result. + const result = computeSqlFingerprint("SELECT 1") + // Either a valid result (napi loaded) or null (napi unavailable) — both are correct + expect(result === null || typeof result === "object").toBe(true) + }) +}) diff --git a/packages/opencode/test/tool/edit.test.ts b/packages/opencode/test/tool/edit.test.ts index b0ee95ff6..fcc9ac350 100644 --- a/packages/opencode/test/tool/edit.test.ts +++ b/packages/opencode/test/tool/edit.test.ts @@ -2,6 +2,7 @@ import { describe, test, expect } from "bun:test" import path from "path" import fs from "fs/promises" import { EditTool } from "../../src/tool/edit" +import { buildNotFoundMessage, replace } from "../../src/tool/edit" import { Instance } from "../../src/project/instance" import { tmpdir } from "../fixture/fixture" import { FileTime } from "../../src/file/time" @@ -635,6 +636,65 @@ describe("tool.edit", () => { }) }) + describe("buildNotFoundMessage", () => { + test("returns empty/whitespace message for blank oldString", () => { + const msg = buildNotFoundMessage("hello world", " \n ") + expect(msg).toContain("empty or whitespace-only") + }) + + test("returns not-found message when no similar line exists", () => { + const content = "function foo() {\n return 1\n}" + const msg = buildNotFoundMessage(content, "zzzzCompletely_unrelated_string_12345") + expect(msg).toContain("was not found anywhere") + expect(msg).toContain("Re-read the file") + }) + + test("finds exact match and shows snippet", () => { + const content = "line one\nconst a = 1;\nline three\nline four" + const msg = buildNotFoundMessage(content, "const a = 1;\nconst b = 2;") + expect(msg).toContain("similar line was found at line 2") + expect(msg).toContain("const a = 1;") + }) + + test("prefers exact equality over substring match on short lines", () => { + // "const" is a substring of "const a = 1;" but should not win over exact match + const content = "const\nfoo bar\nconst a = 1;\nmore stuff" + const msg = buildNotFoundMessage(content, "const a = 1;\nconst b = 2;") + // Should find line 3 (exact match of first line) not line 1 (short substring "const") + expect(msg).toContain("similar line was found at line 3") + }) + + test("finds Levenshtein-similar line", () => { + const content = "function foo() {\n return 42\n}\nfunction bar() {\n return 99\n}" + // Slightly misspelled — "rreturn" instead of "return" + const msg = buildNotFoundMessage(content, " rreturn 42") + expect(msg).toContain("similar line was found") + expect(msg).toContain("return 42") + }) + + test("skips very short lines to avoid false matches", () => { + const content = "}\n{\na\nconst realTarget = true" + const msg = buildNotFoundMessage(content, "const realTarget = false") + expect(msg).toContain("similar line was found at line 4") + }) + }) + + describe("replace error messages", () => { + test("shows nearest match when oldString not found", () => { + const content = "function hello() {\n return 'world'\n}" + expect(() => replace(content, "function helo() {", "function hello_world() {")).toThrow( + /similar line was found/, + ) + }) + + test("shows not-found message when completely unrelated", () => { + const content = "function hello() {\n return 'world'\n}" + expect(() => replace(content, "zzz_completely_unrelated_99", "new")).toThrow( + /was not found anywhere/, + ) + }) + }) + describe("concurrent editing", () => { test("serializes concurrent edits to same file", async () => { await using tmp = await tmpdir() diff --git a/packages/opencode/test/tool/webfetch.test.ts b/packages/opencode/test/tool/webfetch.test.ts index 02d887624..aefe78606 100644 --- a/packages/opencode/test/tool/webfetch.test.ts +++ b/packages/opencode/test/tool/webfetch.test.ts @@ -167,7 +167,7 @@ describe("tool.webfetch", () => { fn: async () => { const webfetch = await WebFetchTool.init() expect(webfetch.execute({ url: "https://example.com/page", format: "text" }, ctx)).rejects.toThrow( - "Request failed with status code: 500", + "HTTP 500: Request to https://example.com/page failed. This may be transient — retry once if needed.", ) expect(calls.length).toBe(1) expect(calls[0]).toContain("altimate-code") @@ -214,7 +214,7 @@ describe("tool.webfetch", () => { fn: async () => { const webfetch = await WebFetchTool.init() expect(webfetch.execute({ url: "https://example.com/blocked", format: "text" }, ctx)).rejects.toThrow( - "Request failed with status code: 403", + "HTTP 403: Access to https://example.com/blocked is forbidden. The server rejected both bot and browser User-Agents. Try a different source.", ) expect(calls.length).toBe(2) expect(calls[0]).toContain("altimate-code")