AltimateAI
diff --git a/‎.github/meta/commit.txt‎
Lines changed: 24 additions & 10 deletions b/‎.github/meta/commit.txt‎
Lines changed: 24 additions & 10 deletions
diff --git a/‎packages/opencode/src/altimate/tools/sql-classify.ts‎
Lines changed: 68 additions & 19 deletions b/‎packages/opencode/src/altimate/tools/sql-classify.ts‎
Lines changed: 68 additions & 19 deletions
diff --git a/‎packages/opencode/src/tool/edit.ts‎
Lines changed: 71 additions & 3 deletions b/‎packages/opencode/src/tool/edit.ts‎
Lines changed: 71 additions & 3 deletions
diff --git a/‎packages/opencode/src/tool/webfetch.ts‎
Lines changed: 78 additions & 12 deletions b/‎packages/opencode/src/tool/webfetch.ts‎
Lines changed: 78 additions & 12 deletions
@@ -1,15 +1,29 @@
-feat: add task intent classification telemetry event
+fix: tool reliability improvements for sql-classify, edit, and webfetch (#581)
 
-Add `task_classified` event emitted at session start with keyword/regex
-classification of the first user message. Categories: debug_dbt, write_sql,
-optimize_query, build_model, analyze_lineage, explore_schema, migrate_sql,
-manage_warehouse, finops, general.
+**sql-classify.ts:**
+- Fix `computeSqlFingerprint` referencing undefined `core` variable after
+  safe-import refactor — extract `extractMetadata` as module-level guard
+- Invert fallback classifier to whitelist reads (`READ_PATTERN`) instead of
+  blacklisting writes — treats unknown statements as "write" for safety
+- Handle multi-statement SQL in fallback by splitting on semicolons
+- Strip `--` line comments in fallback (block comments already stripped)
+- Fix `HARD_DENY_PATTERN` trailing `\s` → `\b` to match `TRUNCATE;`
 
-- `classifyTaskIntent()` — pure regex matcher, zero LLM cost, <1ms
-- Includes warehouse type from fingerprint cache
-- Strong/weak confidence levels (1.0 vs 0.5)
-- 15 unit tests covering all intent categories + edge cases
+**edit.ts:**
+- Add `buildNotFoundMessage` with Levenshtein nearest-match snippets for
+  LLM self-correction when `oldString` not found
+- Fix substring matching to prefer exact equality over short-line matches
 
-Closes AI-6029
+**webfetch.ts:**
+- Add session-level URL failure cache (404/410/451) with 5-min TTL
+- Add `buildFetchError` with actionable status-specific error messages
+- Add `sanitizeUrl` to strip query strings from error messages
+- Add URL validation via `new URL()` constructor
+- Add `MAX_CACHED_URLS = 500` size cap with oldest-entry eviction
+
+**Tests:** 12 new tests for `buildNotFoundMessage`, `replace` error
+messages, `computeSqlFingerprint`, and updated webfetch assertions.
+
+Closes #581
 
 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
@@ -2,27 +2,69 @@
 //
 // Uses altimate-core's AST-based getStatementTypes() for accurate classification.
 // Handles CTEs, string literals, procedural blocks, all dialects correctly.
+// Falls back to regex-based heuristics if the napi binary fails to load.
 
-// eslint-disable-next-line @typescript-eslint/no-explicit-any
-const core: any = require("@altimateai/altimate-core")
+// Safe import: napi binary may not be available on all platforms
+let getStatementTypes: ((sql: string, dialect?: string | null) => any) | null = null
+let extractMetadata: ((sql: string) => any) | null = null
+try {
+  // eslint-disable-next-line @typescript-eslint/no-require-imports
+  const core = require("@altimateai/altimate-core")
+  if (typeof core?.getStatementTypes === "function") {
+    getStatementTypes = core.getStatementTypes
+  }
+  if (typeof core?.extractMetadata === "function") {
+    extractMetadata = core.extractMetadata
+  }
+} catch {
+  // napi binary failed to load — will use regex fallback
+}
 
-// Categories from altimate-core that indicate write operations
-const WRITE_CATEGORIES = new Set(["dml", "ddl", "dcl", "tcl"])
 // Only SELECT queries are known safe. "other" (SHOW, SET, USE, etc.) is ambiguous — prompt for permission.
 const READ_CATEGORIES = new Set(["query"])
 
 // Hard-deny patterns — blocked regardless of permissions
 const HARD_DENY_TYPES = new Set(["DROP DATABASE", "DROP SCHEMA", "TRUNCATE", "TRUNCATE TABLE"])
 
+// Regex fallback: conservative — only known-safe reads are whitelisted, everything else is "write"
+const READ_PATTERN = /^\s*(SELECT|WITH|SHOW|EXPLAIN|DESCRIBE|DESC)\b/i
+const HARD_DENY_PATTERN =
+  /^\s*(DROP\s+(DATABASE|SCHEMA)\b|TRUNCATE(\s+TABLE)?\b)/i
+
+/**
+ * Regex-based fallback classifier for when altimate-core is unavailable.
+ * Conservative: treats anything not clearly a SELECT/WITH/SHOW/EXPLAIN as "write".
+ * Handles multi-statement SQL by splitting on semicolons and checking each statement.
+ */
+function classifyFallback(sql: string): { queryType: "read" | "write"; blocked: boolean } {
+  const cleaned = sql
+    .replace(/\/\*[\s\S]*?\*\//g, "") // block comments
+    .replace(/--[^\n]*/g, "")          // line comments
+  const statements = cleaned.split(";").map(s => s.trim()).filter(Boolean)
+  if (statements.length === 0) return { queryType: "read", blocked: false }
+  let queryType: "read" | "write" = "read"
+  let blocked = false
+  for (const stmt of statements) {
+    if (HARD_DENY_PATTERN.test(stmt)) blocked = true
+    if (!READ_PATTERN.test(stmt)) queryType = "write"
+  }
+  return { queryType, blocked }
+}
+
 /**
  * Classify a SQL string as "read" or "write" using AST parsing.
  * If ANY statement is a write, returns "write".
  */
 export function classify(sql: string): "read" | "write" {
-  const result = core.getStatementTypes(sql)
-  if (!result?.categories?.length) return "read"
-  // Treat unknown categories (not in WRITE or READ sets) as write to fail safe
-  return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
+  if (!sql || typeof sql !== "string") return "read"
+  if (!getStatementTypes) return classifyFallback(sql).queryType
+  try {
+    const result = getStatementTypes(sql)
+    if (!result?.categories?.length) return "read"
+    return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
+  } catch {
+    return classifyFallback(sql).queryType
+  }
 }
 
 /**
@@ -38,17 +80,23 @@ export function classifyMulti(sql: string): "read" | "write" {
  * Returns both the overall query type and whether a hard-deny pattern was found.
  */
 export function classifyAndCheck(sql: string): { queryType: "read" | "write"; blocked: boolean } {
-  const result = core.getStatementTypes(sql)
-  if (!result?.statements?.length) return { queryType: "read", blocked: false }
+  if (!sql || typeof sql !== "string") return { queryType: "read", blocked: false }
+  if (!getStatementTypes) return classifyFallback(sql)
+  try {
+    const result = getStatementTypes(sql)
+    if (!result?.statements?.length) return { queryType: "read", blocked: false }
 
-  const blocked = result.statements.some((s: { statement_type: string }) =>
-    s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()),
-  )
+    const blocked = result.statements.some(
+      (s: { statement_type: string }) =>
+        s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()),
+    )
 
-  const categories = result.categories ?? []
-  // Unknown categories (not in WRITE or READ sets) are treated as write to fail safe
-  const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
-  return { queryType: queryType as "read" | "write", blocked }
+    const categories = result.categories ?? []
+    const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
+    return { queryType: queryType as "read" | "write", blocked }
+  } catch {
+    return classifyFallback(sql)
+  }
 }
 
 // altimate_change start — SQL structure fingerprint for telemetry (no content, only shape)
@@ -66,9 +114,10 @@ export interface SqlFingerprint {
 /** Compute a PII-safe structural fingerprint of a SQL query.
  *  Uses altimate-core AST parsing — local, no API calls, ~1-5ms. */
 export function computeSqlFingerprint(sql: string): SqlFingerprint | null {
+  if (!getStatementTypes || !extractMetadata) return null
   try {
-    const stmtResult = core.getStatementTypes(sql)
-    const meta = core.extractMetadata(sql)
+    const stmtResult = getStatementTypes(sql)
+    const meta = extractMetadata(sql)
     return {
       statement_types: stmtResult?.types ?? [],
       categories: stmtResult?.categories ?? [],
 
@@ -629,6 +629,76 @@ export function trimDiff(diff: string): string {
   return trimmedLines.join("\n")
 }
 
+/**
+ * Build a helpful error message when oldString isn't found.
+ * Includes a snippet of the closest-matching region so the model can self-correct.
+ */
+export function buildNotFoundMessage(content: string, oldString: string): string {
+  const base = "Could not find oldString in the file."
+
+  // Find the first line of oldString and search for it in the file
+  const firstLine = oldString.split("\n")[0].trim()
+  if (!firstLine) return base + " The oldString appears to be empty or whitespace-only."
+
+  const contentLines = content.split("\n")
+  let bestLine = -1
+  let bestScore = 0
+
+  // Search for the line with highest similarity to the first line of oldString
+  for (let i = 0; i < contentLines.length; i++) {
+    const trimmed = contentLines[i].trim()
+    if (!trimmed) continue
+
+    // Skip very short lines — they produce false similarity matches
+    const minLen = Math.min(trimmed.length, firstLine.length)
+    if (minLen < 4) continue
+
+    // Exact equality is best — break immediately
+    if (trimmed === firstLine) {
+      bestLine = i
+      bestScore = 1
+      break
+    }
+
+    // Substring match — strong score but keep searching for exact match
+    if (trimmed.includes(firstLine) || firstLine.includes(trimmed)) {
+      if (bestScore < 0.99) {
+        bestLine = i
+        bestScore = 0.99
+      }
+      continue
+    }
+
+    // Skip if lengths are too different (>3x ratio) — not a meaningful comparison
+    const maxLen = Math.max(trimmed.length, firstLine.length)
+    if (minLen * 3 < maxLen) continue
+
+    // Levenshtein similarity for close matches
+    const score = 1 - levenshtein(trimmed, firstLine) / maxLen
+    if (score > bestScore && score > 0.6) {
+      bestScore = score
+      bestLine = i
+    }
+  }
+
+  if (bestLine === -1) {
+    return base + ` The first line of your oldString ("${firstLine.slice(0, 80)}") was not found anywhere in the file. Re-read the file before editing.`
+  }
+
+  // Show a small window around the best match
+  const start = Math.max(0, bestLine - 1)
+  const end = Math.min(contentLines.length, bestLine + 4)
+  const snippet = contentLines
+    .slice(start, end)
+    .map((l, i) => `  ${start + i + 1} | ${l}`)
+    .join("\n")
+
+  return (
+    base +
+    ` A similar line was found at line ${bestLine + 1}. The file may have changed since you last read it.\n\nNearest match:\n${snippet}\n\nRe-read the file and use the exact current content for oldString.`
+  )
+}
+
 export function replace(content: string, oldString: string, newString: string, replaceAll = false): string {
   if (oldString === newString) {
     throw new Error("No changes to apply: oldString and newString are identical.")
@@ -661,9 +731,7 @@ export function replace(content: string, oldString: string, newString: string, r
   }
 
   if (notFound) {
-    throw new Error(
-      "Could not find oldString in the file. It must match exactly, including whitespace, indentation, and line endings.",
-    )
+    throw new Error(buildNotFoundMessage(content, oldString))
   }
   throw new Error("Found multiple matches for oldString. Provide more surrounding context to make the match unique.")
 }
@@ -15,6 +15,67 @@ const BROWSER_UA =
 // Status codes that warrant a retry with a different User-Agent
 const RETRYABLE_STATUSES = new Set([403, 406])
 
+// altimate_change start — session-level URL failure cache (#471)
+// Prevents repeated fetches to URLs that already returned 404/410 in this session.
+const failedUrls = new Map<string, { status: number; timestamp: number }>()
+const FAILURE_CACHE_TTL = 5 * 60 * 1000 // 5 minutes
+
+function isUrlCachedFailure(url: string): { status: number } | null {
+  const entry = failedUrls.get(url)
+  if (!entry) return null
+  if (Date.now() - entry.timestamp > FAILURE_CACHE_TTL) {
+    failedUrls.delete(url)
+    return null
+  }
+  return { status: entry.status }
+}
+
+const MAX_CACHED_URLS = 500
+
+function cacheUrlFailure(url: string, status: number): void {
+  if (status === 404 || status === 410 || status === 451) {
+    if (failedUrls.size >= MAX_CACHED_URLS) {
+      // Evict oldest entry (Map preserves insertion order)
+      const oldest = failedUrls.keys().next().value
+      if (oldest) failedUrls.delete(oldest)
+    }
+    failedUrls.set(url, { status, timestamp: Date.now() })
+  }
+}
+
+/** Strip query string from URL to avoid leaking auth tokens in error messages. */
+function sanitizeUrl(url: string): string {
+  try {
+    const parsed = new URL(url)
+    return parsed.origin + parsed.pathname
+  } catch {
+    return url.split("?")[0]
+  }
+}
+
+/** Build an actionable error message so the model knows whether to retry. */
+function buildFetchError(url: string, status: number, headers?: Headers): string {
+  const safe = sanitizeUrl(url)
+  switch (status) {
+    case 404:
+      return `HTTP 404: ${safe} does not exist. Do NOT retry this URL — it will fail again. Try a different URL or search for the correct page.`
+    case 410:
+      return `HTTP 410: ${safe} has been permanently removed. Do NOT retry. Find an alternative resource.`
+    case 403:
+      return `HTTP 403: Access to ${safe} is forbidden. The server rejected both bot and browser User-Agents. Try a different source.`
+    case 429: {
+      const retryAfter = headers?.get("retry-after")
+      const wait = retryAfter ? ` (retry after ${retryAfter})` : ""
+      return `HTTP 429: Rate limited by ${new URL(url).hostname}${wait}. Wait before fetching from this domain again, or use a different source.`
+    }
+    case 451:
+      return `HTTP 451: ${safe} is unavailable for legal reasons. Do NOT retry.`
+    default:
+      return `HTTP ${status}: Request to ${safe} failed. This may be transient — retry once if needed.`
+  }
+}
+// altimate_change end
+
 export const WebFetchTool = Tool.define("webfetch", {
   description: DESCRIPTION,
   parameters: z.object({
@@ -26,10 +87,22 @@ export const WebFetchTool = Tool.define("webfetch", {
     timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(),
   }),
   async execute(params, ctx) {
-    // Validate URL
+    // altimate_change start — URL validation and failure cache (#471)
     if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) {
       throw new Error("URL must start with http:// or https://")
     }
+    try {
+      new URL(params.url)
+    } catch {
+      throw new Error(`Invalid URL: "${params.url.slice(0, 200)}" is not a valid URL. Check the format and try again.`)
+    }
+
+    // Check failure cache — avoid re-fetching URLs that already returned 404/410
+    const cached = isUrlCachedFailure(params.url)
+    if (cached) {
+      throw new Error(buildFetchError(params.url, cached.status))
+    }
+    // altimate_change end
 
     await ctx.ask({
       permission: "webfetch",
@@ -83,19 +156,12 @@ export const WebFetchTool = Tool.define("webfetch", {
         response = await fetch(params.url, { signal, headers: browserHeaders })
       }
 
+      // altimate_change start — actionable error messages and failure caching (#471)
       if (!response.ok) {
-        // altimate_change start — include URL domain in error for easier triage
-        let domain: string
-        try {
-          domain = new URL(params.url).hostname
-        } catch {
-          domain = params.url.slice(0, 60)
-        }
-        throw new Error(
-          `Request failed with status code: ${response.status} (${domain})`,
-        )
-        // altimate_change end
+        cacheUrlFailure(params.url, response.status)
+        throw new Error(buildFetchError(params.url, response.status, response.headers))
       }
+      // altimate_change end
 
       // Check content length
       const contentLength = response.headers.get("content-length")