Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions .github/meta/commit.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,29 @@
feat: add task intent classification telemetry event
fix: tool reliability improvements for sql-classify, edit, and webfetch (#581)

Add `task_classified` event emitted at session start with keyword/regex
classification of the first user message. Categories: debug_dbt, write_sql,
optimize_query, build_model, analyze_lineage, explore_schema, migrate_sql,
manage_warehouse, finops, general.
**sql-classify.ts:**
- Fix `computeSqlFingerprint` referencing undefined `core` variable after
safe-import refactor — extract `extractMetadata` as module-level guard
- Invert fallback classifier to whitelist reads (`READ_PATTERN`) instead of
blacklisting writes — treats unknown statements as "write" for safety
- Handle multi-statement SQL in fallback by splitting on semicolons
- Strip `--` line comments in fallback (block comments already stripped)
- Fix `HARD_DENY_PATTERN` trailing `\s` → `\b` to match `TRUNCATE;`

- `classifyTaskIntent()` — pure regex matcher, zero LLM cost, <1ms
- Includes warehouse type from fingerprint cache
- Strong/weak confidence levels (1.0 vs 0.5)
- 15 unit tests covering all intent categories + edge cases
**edit.ts:**
- Add `buildNotFoundMessage` with Levenshtein nearest-match snippets for
LLM self-correction when `oldString` not found
- Fix substring matching to prefer exact equality over short-line matches

Closes AI-6029
**webfetch.ts:**
- Add session-level URL failure cache (404/410/451) with 5-min TTL
- Add `buildFetchError` with actionable status-specific error messages
- Add `sanitizeUrl` to strip query strings from error messages
- Add URL validation via `new URL()` constructor
- Add `MAX_CACHED_URLS = 500` size cap with oldest-entry eviction

**Tests:** 12 new tests for `buildNotFoundMessage`, `replace` error
messages, `computeSqlFingerprint`, and updated webfetch assertions.

Closes #581

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
87 changes: 68 additions & 19 deletions packages/opencode/src/altimate/tools/sql-classify.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,69 @@
//
// Uses altimate-core's AST-based getStatementTypes() for accurate classification.
// Handles CTEs, string literals, procedural blocks, all dialects correctly.
// Falls back to regex-based heuristics if the napi binary fails to load.

// eslint-disable-next-line @typescript-eslint/no-explicit-any
const core: any = require("@altimateai/altimate-core")
// Safe import: napi binary may not be available on all platforms
let getStatementTypes: ((sql: string, dialect?: string | null) => any) | null = null
let extractMetadata: ((sql: string) => any) | null = null
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const core = require("@altimateai/altimate-core")
if (typeof core?.getStatementTypes === "function") {
getStatementTypes = core.getStatementTypes
}
if (typeof core?.extractMetadata === "function") {
extractMetadata = core.extractMetadata
}
} catch {
// napi binary failed to load — will use regex fallback
}

// Categories from altimate-core that indicate write operations
const WRITE_CATEGORIES = new Set(["dml", "ddl", "dcl", "tcl"])
// Only SELECT queries are known safe. "other" (SHOW, SET, USE, etc.) is ambiguous — prompt for permission.
const READ_CATEGORIES = new Set(["query"])

// Hard-deny patterns — blocked regardless of permissions
const HARD_DENY_TYPES = new Set(["DROP DATABASE", "DROP SCHEMA", "TRUNCATE", "TRUNCATE TABLE"])

// Regex fallback: conservative — only known-safe reads are whitelisted, everything else is "write"
const READ_PATTERN = /^\s*(SELECT|WITH|SHOW|EXPLAIN|DESCRIBE|DESC)\b/i
const HARD_DENY_PATTERN =
/^\s*(DROP\s+(DATABASE|SCHEMA)\b|TRUNCATE(\s+TABLE)?\b)/i

/**
* Regex-based fallback classifier for when altimate-core is unavailable.
* Conservative: treats anything not clearly a SELECT/WITH/SHOW/EXPLAIN as "write".
* Handles multi-statement SQL by splitting on semicolons and checking each statement.
*/
function classifyFallback(sql: string): { queryType: "read" | "write"; blocked: boolean } {
const cleaned = sql
.replace(/\/\*[\s\S]*?\*\//g, "") // block comments
.replace(/--[^\n]*/g, "") // line comments
const statements = cleaned.split(";").map(s => s.trim()).filter(Boolean)
if (statements.length === 0) return { queryType: "read", blocked: false }
let queryType: "read" | "write" = "read"
let blocked = false
for (const stmt of statements) {
if (HARD_DENY_PATTERN.test(stmt)) blocked = true
if (!READ_PATTERN.test(stmt)) queryType = "write"
}
return { queryType, blocked }
}

/**
* Classify a SQL string as "read" or "write" using AST parsing.
* If ANY statement is a write, returns "write".
*/
export function classify(sql: string): "read" | "write" {
const result = core.getStatementTypes(sql)
if (!result?.categories?.length) return "read"
// Treat unknown categories (not in WRITE or READ sets) as write to fail safe
return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
if (!sql || typeof sql !== "string") return "read"
if (!getStatementTypes) return classifyFallback(sql).queryType
try {
const result = getStatementTypes(sql)
if (!result?.categories?.length) return "read"
return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
} catch {
return classifyFallback(sql).queryType
}
}

/**
Expand All @@ -38,17 +80,23 @@ export function classifyMulti(sql: string): "read" | "write" {
* Returns both the overall query type and whether a hard-deny pattern was found.
*/
export function classifyAndCheck(sql: string): { queryType: "read" | "write"; blocked: boolean } {
const result = core.getStatementTypes(sql)
if (!result?.statements?.length) return { queryType: "read", blocked: false }
if (!sql || typeof sql !== "string") return { queryType: "read", blocked: false }
if (!getStatementTypes) return classifyFallback(sql)
try {
const result = getStatementTypes(sql)
if (!result?.statements?.length) return { queryType: "read", blocked: false }

const blocked = result.statements.some((s: { statement_type: string }) =>
s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()),
)
const blocked = result.statements.some(
(s: { statement_type: string }) =>
s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()),
)

const categories = result.categories ?? []
// Unknown categories (not in WRITE or READ sets) are treated as write to fail safe
const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
return { queryType: queryType as "read" | "write", blocked }
const categories = result.categories ?? []
const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
return { queryType: queryType as "read" | "write", blocked }
} catch {
return classifyFallback(sql)
}
}

// altimate_change start — SQL structure fingerprint for telemetry (no content, only shape)
Expand All @@ -66,9 +114,10 @@ export interface SqlFingerprint {
/** Compute a PII-safe structural fingerprint of a SQL query.
* Uses altimate-core AST parsing — local, no API calls, ~1-5ms. */
export function computeSqlFingerprint(sql: string): SqlFingerprint | null {
if (!getStatementTypes || !extractMetadata) return null
try {
const stmtResult = core.getStatementTypes(sql)
const meta = core.extractMetadata(sql)
const stmtResult = getStatementTypes(sql)
const meta = extractMetadata(sql)
return {
statement_types: stmtResult?.types ?? [],
categories: stmtResult?.categories ?? [],
Expand Down
74 changes: 71 additions & 3 deletions packages/opencode/src/tool/edit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,76 @@ export function trimDiff(diff: string): string {
return trimmedLines.join("\n")
}

/**
* Build a helpful error message when oldString isn't found.
* Includes a snippet of the closest-matching region so the model can self-correct.
*/
export function buildNotFoundMessage(content: string, oldString: string): string {
const base = "Could not find oldString in the file."

// Find the first line of oldString and search for it in the file
const firstLine = oldString.split("\n")[0].trim()
if (!firstLine) return base + " The oldString appears to be empty or whitespace-only."

const contentLines = content.split("\n")
let bestLine = -1
let bestScore = 0

// Search for the line with highest similarity to the first line of oldString
for (let i = 0; i < contentLines.length; i++) {
const trimmed = contentLines[i].trim()
if (!trimmed) continue

// Skip very short lines — they produce false similarity matches
const minLen = Math.min(trimmed.length, firstLine.length)
if (minLen < 4) continue

// Exact equality is best — break immediately
if (trimmed === firstLine) {
bestLine = i
bestScore = 1
break
}

// Substring match — strong score but keep searching for exact match
if (trimmed.includes(firstLine) || firstLine.includes(trimmed)) {
if (bestScore < 0.99) {
bestLine = i
bestScore = 0.99
}
continue
}

// Skip if lengths are too different (>3x ratio) — not a meaningful comparison
const maxLen = Math.max(trimmed.length, firstLine.length)
if (minLen * 3 < maxLen) continue

// Levenshtein similarity for close matches
const score = 1 - levenshtein(trimmed, firstLine) / maxLen
if (score > bestScore && score > 0.6) {
bestScore = score
bestLine = i
}
}

if (bestLine === -1) {
return base + ` The first line of your oldString ("${firstLine.slice(0, 80)}") was not found anywhere in the file. Re-read the file before editing.`
}

// Show a small window around the best match
const start = Math.max(0, bestLine - 1)
const end = Math.min(contentLines.length, bestLine + 4)
const snippet = contentLines
.slice(start, end)
.map((l, i) => ` ${start + i + 1} | ${l}`)
.join("\n")

return (
base +
` A similar line was found at line ${bestLine + 1}. The file may have changed since you last read it.\n\nNearest match:\n${snippet}\n\nRe-read the file and use the exact current content for oldString.`
)
}

export function replace(content: string, oldString: string, newString: string, replaceAll = false): string {
if (oldString === newString) {
throw new Error("No changes to apply: oldString and newString are identical.")
Expand Down Expand Up @@ -661,9 +731,7 @@ export function replace(content: string, oldString: string, newString: string, r
}

if (notFound) {
throw new Error(
"Could not find oldString in the file. It must match exactly, including whitespace, indentation, and line endings.",
)
throw new Error(buildNotFoundMessage(content, oldString))
}
throw new Error("Found multiple matches for oldString. Provide more surrounding context to make the match unique.")
}
90 changes: 78 additions & 12 deletions packages/opencode/src/tool/webfetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,67 @@ const BROWSER_UA =
// Status codes that warrant a retry with a different User-Agent
const RETRYABLE_STATUSES = new Set([403, 406])

// altimate_change start — session-level URL failure cache (#471)
// Prevents repeated fetches to URLs that already returned 404/410 in this session.
const failedUrls = new Map<string, { status: number; timestamp: number }>()
const FAILURE_CACHE_TTL = 5 * 60 * 1000 // 5 minutes

function isUrlCachedFailure(url: string): { status: number } | null {
const entry = failedUrls.get(url)
if (!entry) return null
if (Date.now() - entry.timestamp > FAILURE_CACHE_TTL) {
failedUrls.delete(url)
return null
}
return { status: entry.status }
}

const MAX_CACHED_URLS = 500

function cacheUrlFailure(url: string, status: number): void {
if (status === 404 || status === 410 || status === 451) {
if (failedUrls.size >= MAX_CACHED_URLS) {
// Evict oldest entry (Map preserves insertion order)
const oldest = failedUrls.keys().next().value
if (oldest) failedUrls.delete(oldest)
}
failedUrls.set(url, { status, timestamp: Date.now() })
}
}

/** Strip query string from URL to avoid leaking auth tokens in error messages. */
function sanitizeUrl(url: string): string {
try {
const parsed = new URL(url)
return parsed.origin + parsed.pathname
} catch {
return url.split("?")[0]
}
}

/** Build an actionable error message so the model knows whether to retry. */
function buildFetchError(url: string, status: number, headers?: Headers): string {
const safe = sanitizeUrl(url)
switch (status) {
case 404:
return `HTTP 404: ${safe} does not exist. Do NOT retry this URL — it will fail again. Try a different URL or search for the correct page.`
case 410:
return `HTTP 410: ${safe} has been permanently removed. Do NOT retry. Find an alternative resource.`
case 403:
return `HTTP 403: Access to ${safe} is forbidden. The server rejected both bot and browser User-Agents. Try a different source.`
case 429: {
const retryAfter = headers?.get("retry-after")
const wait = retryAfter ? ` (retry after ${retryAfter})` : ""
return `HTTP 429: Rate limited by ${new URL(url).hostname}${wait}. Wait before fetching from this domain again, or use a different source.`
}
case 451:
return `HTTP 451: ${safe} is unavailable for legal reasons. Do NOT retry.`
default:
return `HTTP ${status}: Request to ${safe} failed. This may be transient — retry once if needed.`
}
}
// altimate_change end

export const WebFetchTool = Tool.define("webfetch", {
description: DESCRIPTION,
parameters: z.object({
Expand All @@ -26,10 +87,22 @@ export const WebFetchTool = Tool.define("webfetch", {
timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(),
}),
async execute(params, ctx) {
// Validate URL
// altimate_change start — URL validation and failure cache (#471)
if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) {
throw new Error("URL must start with http:// or https://")
}
try {
new URL(params.url)
} catch {
throw new Error(`Invalid URL: "${params.url.slice(0, 200)}" is not a valid URL. Check the format and try again.`)
}

// Check failure cache — avoid re-fetching URLs that already returned 404/410
const cached = isUrlCachedFailure(params.url)
if (cached) {
throw new Error(buildFetchError(params.url, cached.status))
}
// altimate_change end

await ctx.ask({
permission: "webfetch",
Expand Down Expand Up @@ -83,19 +156,12 @@ export const WebFetchTool = Tool.define("webfetch", {
response = await fetch(params.url, { signal, headers: browserHeaders })
}

// altimate_change start — actionable error messages and failure caching (#471)
if (!response.ok) {
// altimate_change start — include URL domain in error for easier triage
let domain: string
try {
domain = new URL(params.url).hostname
} catch {
domain = params.url.slice(0, 60)
}
throw new Error(
`Request failed with status code: ${response.status} (${domain})`,
)
// altimate_change end
cacheUrlFailure(params.url, response.status)
throw new Error(buildFetchError(params.url, response.status, response.headers))
}
// altimate_change end

// Check content length
const contentLength = response.headers.get("content-length")
Expand Down
Loading
Loading