Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 45 additions & 9 deletions packages/opencode/src/altimate/telemetry/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -475,12 +475,40 @@ export namespace Telemetry {
session_id: string
tool_name: string
tool_category: string
error_class: "parse_error" | "connection" | "timeout" | "validation" | "internal" | "permission" | "http_error" | "file_not_found" | "edit_mismatch" | "not_configured" | "resource_exhausted" | "unknown"
error_class: "parse_error" | "connection" | "timeout" | "validation" | "internal" | "permission" | "http_error" | "file_not_found" | "file_stale" | "edit_mismatch" | "not_configured" | "resource_exhausted" | "unknown"
error_message: string
input_signature: string
masked_args?: string
duration_ms: number
}
// altimate_change start — FileTime observability: drift + assertion outcome tracking
// Tracks the gap between Node.js wall-clock and filesystem mtime at read time.
// Use to monitor whether the mtime clock-source change (PR #611) is preventing
// false stale-file errors, and detect environments with significant drift.
// KQL: customEvents | where name == "filetime_drift" | extend drift = todouble(customMeasurements.drift_ms)
| {
type: "filetime_drift"
timestamp: number
session_id: string
/** Absolute difference in ms between Date.now() and filesystem mtime */
drift_ms: number
/** True if filesystem mtime is ahead of wall-clock (the problematic direction) */
mtime_ahead: boolean
}
// Tracks FileTime.assert() outcomes: "stale" when a file fails the check.
// High volume of "stale" with low delta_ms suggests tolerance is too tight.
// KQL: customEvents | where name == "filetime_assert" | extend delta = todouble(customMeasurements.delta_ms)
| {
type: "filetime_assert"
timestamp: number
session_id: string
outcome: "stale"
/** mtime - readTime in ms (how far ahead the file's mtime is) */
delta_ms: number
/** Current tolerance threshold in ms */
tolerance_ms: number
}
// altimate_change end
// altimate_change start — sql quality telemetry for issue prevention metrics
| {
type: "sql_quality"
Expand Down Expand Up @@ -761,19 +789,31 @@ export namespace Telemetry {
// altimate_change end
{ class: "timeout", keywords: ["timeout", "etimedout", "bridge timeout", "timed out"] },
{ class: "permission", keywords: ["permission", "access denied", "permission denied", "unauthorized", "forbidden", "authentication"] },
// altimate_change start — http_error before validation to prevent "HTTP 404" matching "invalid"/"missing"
{
class: "http_error",
keywords: ["status code: 4", "status code: 5", "request failed with status", "http 404", "http 410", "http 429", "http 451", "http 403"],
},
// altimate_change end
// altimate_change start — split file_stale out of validation; remove "does not exist" (was catching HTTP 404s)
{
class: "file_stale",
keywords: [
"must read file",
"has been modified since",
"before overwriting",
],
},
{
class: "validation",
keywords: [
"invalid params",
"invalid",
"missing",
"required",
"must read file",
"has been modified since",
"does not exist",
"before overwriting",
],
},
// altimate_change end
{ class: "internal", keywords: ["internal", "assertion"] },
// altimate_change start — resource_exhausted class for OOM/quota errors
{
Expand All @@ -788,10 +828,6 @@ export namespace Telemetry {
],
},
// altimate_change end
{
class: "http_error",
keywords: ["status code: 4", "status code: 5", "request failed with status"],
},
]
// altimate_change end

Expand Down
88 changes: 84 additions & 4 deletions packages/opencode/src/file/time.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,48 @@ import { Instance } from "../project/instance"
import { Log } from "../util/log"
import { Flag } from "../flag/flag"
import { Filesystem } from "../util/filesystem"
// altimate_change start — telemetry for FileTime drift measurement
import { Telemetry } from "../altimate/telemetry"
// altimate_change end

export namespace FileTime {
const log = Log.create({ service: "file.time" })

// altimate_change start — FileTime clock source change documentation
//
// CHANGE: read() now records the file's filesystem mtime instead of Date.now().
//
// WHY: The original code used `new Date()` (wall-clock) as the "last read" timestamp,
// then assert() compared it against `Filesystem.stat(file).mtime`. On WSL (NTFS-over-9P),
// networked drives, and some macOS APFS mounts, the filesystem clock drifts 400ms–1.2s
// behind Node.js's clock. This caused mtime > readTime even for the file's own write,
// triggering false "modified since last read" errors. One user hit 782 consecutive retries.
//
// FIX: Both timestamps now come from the same clock (filesystem mtime), eliminating
// cross-clock skew. The tolerance is kept at 50ms (upstream default) since same-clock
// comparisons don't need a larger window.
//
// TRADE-OFF: On coarse-resolution filesystems (HFS+ = 1s, NFS with stale cache,
// Docker overlayfs after copy-up), two writes within the same resolution window
// produce identical mtimes — so we'd miss a real external modification. This is a
// false-negative risk (missed edit) vs the false-positive risk (retry loop) we're
// fixing. Acceptable because: (a) the file gets re-read on the next attempt anyway,
// (b) HFS+ is rare (macOS defaulted to APFS since 2017), and (c) the wall-clock
// approach was actively causing 782-retry production loops on WSL.
//
// MONITORING: filetime_drift telemetry event tracks the gap between wall-clock and mtime
// at read time. If drift_ms is consistently 0, this change has no effect (good). If
// drift_ms shows large values, this change is preventing false positives (also good).
// If file_stale errors increase post-deploy, the tolerance may need adjustment.
//
// UPSTREAM: sst/opencode issues #19040, #14183, #20354 track the same problem.
// Upstream is pursuing processor-level recovery (PR #19099) rather than fixing the clock
// source. Both approaches are complementary.
//
// ROLLBACK: Set OPENCODE_DISABLE_FILETIME_CHECK=true to bypass all checks, or revert
// this change to restore `new Date()` behavior with a wider tolerance.
// altimate_change end

// Per-session read times plus per-file write locks.
// All tools that overwrite existing files should run their
// assert/read/write/update sequence inside withLock(filepath, ...)
Expand All @@ -26,7 +65,32 @@ export namespace FileTime {
log.info("read", { sessionID, file })
const { read } = state()
read[sessionID] = read[sessionID] || {}
read[sessionID][file] = new Date()
// altimate_change start — use filesystem mtime instead of wall-clock (see doc block above)
const wallClock = new Date()
const mtime = Filesystem.stat(file)?.mtime
read[sessionID][file] = mtime ?? wallClock

// Track drift between wall-clock and filesystem mtime for monitoring.
// This lets us measure the real-world impact of the clock source change
// and detect environments where drift is significant.
if (mtime) {
const driftMs = Math.abs(wallClock.getTime() - mtime.getTime())
if (driftMs > 10) {
// Only emit when drift is non-trivial (>10ms) to avoid noise
try {
Telemetry.track({
type: "filetime_drift",
timestamp: Date.now(),
session_id: sessionID,
drift_ms: driftMs,
mtime_ahead: mtime.getTime() > wallClock.getTime(),
})
} catch {
// Telemetry must never break file operations
}
}
}
// altimate_change end
}

export function get(sessionID: string, file: string) {
Expand Down Expand Up @@ -61,11 +125,27 @@ export namespace FileTime {
const time = get(sessionID, filepath)
if (!time) throw new Error(`You must read file ${filepath} before overwriting it. Use the Read tool first`)
const mtime = Filesystem.stat(filepath)?.mtime
// Allow a 50ms tolerance for Windows NTFS timestamp fuzziness / async flushing
if (mtime && mtime.getTime() > time.getTime() + 50) {
// altimate_change start — keep upstream's 50ms tolerance (sufficient now that both
// timestamps come from the same filesystem clock). Track assertion outcomes for monitoring.
const toleranceMs = 50
const deltaMs = mtime ? mtime.getTime() - time.getTime() : 0
if (mtime && deltaMs > toleranceMs) {
try {
Telemetry.track({
type: "filetime_assert",
timestamp: Date.now(),
session_id: sessionID,
outcome: "stale",
delta_ms: deltaMs,
tolerance_ms: toleranceMs,
})
} catch {
// Telemetry must never mask the stale-file error
}
throw new Error(
`File ${filepath} has been modified since it was last read.\nLast modification: ${mtime.toISOString()}\nLast read: ${time.toISOString()}\n\nPlease read the file again before modifying it.`,
`File ${filepath} has been modified since it was last read.\nLast modification: ${mtime.toISOString()}\nLast read: ${time.toISOString()}\nDelta: ${deltaMs}ms (tolerance: ${toleranceMs}ms)\n\nPlease read the file again before modifying it.`,
)
}
// altimate_change end
}
}
39 changes: 31 additions & 8 deletions packages/opencode/test/file/time.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,19 @@ describe("file/time", () => {
fn: async () => {
FileTime.read(sessionID, filepath)

// Wait to ensure different timestamps
await new Promise((resolve) => setTimeout(resolve, 100))

// Modify file after reading
// Simulate external modification by setting mtime 5s ahead of the
// file's current mtime (read() stores mtime, assert() compares
// against current mtime — both from filesystem clock).
await fs.writeFile(filepath, "modified content", "utf-8")
const future = new Date(Date.now() + 5000)
await fs.utimes(filepath, future, future)

await expect(FileTime.assert(sessionID, filepath)).rejects.toThrow("modified since it was last read")
},
})
})

test("includes timestamps in error message", async () => {
test("includes timestamps and delta in error message", async () => {
await using tmp = await tmpdir()
const filepath = path.join(tmp.path, "file.txt")
await fs.writeFile(filepath, "content", "utf-8")
Expand All @@ -132,8 +133,9 @@ describe("file/time", () => {
directory: tmp.path,
fn: async () => {
FileTime.read(sessionID, filepath)
await new Promise((resolve) => setTimeout(resolve, 100))
await fs.writeFile(filepath, "modified", "utf-8")
const future = new Date(Date.now() + 5000)
await fs.utimes(filepath, future, future)

let error: Error | undefined
try {
Expand All @@ -144,6 +146,8 @@ describe("file/time", () => {
expect(error).toBeDefined()
expect(error!.message).toContain("Last modification:")
expect(error!.message).toContain("Last read:")
expect(error!.message).toContain("Delta:")
expect(error!.message).toContain("tolerance:")
},
})
})
Expand Down Expand Up @@ -346,9 +350,10 @@ describe("file/time", () => {

const originalStat = Filesystem.stat(filepath)

// Wait and modify
await new Promise((resolve) => setTimeout(resolve, 100))
// Simulate external modification with mtime far ahead
await fs.writeFile(filepath, "modified", "utf-8")
const future = new Date(Date.now() + 5000)
await fs.utimes(filepath, future, future)

const newStat = Filesystem.stat(filepath)
expect(newStat!.mtime.getTime()).toBeGreaterThan(originalStat!.mtime.getTime())
Expand All @@ -357,5 +362,23 @@ describe("file/time", () => {
},
})
})

test("read() stores filesystem mtime, not wall-clock time", async () => {
await using tmp = await tmpdir()
const filepath = path.join(tmp.path, "file.txt")
await fs.writeFile(filepath, "content", "utf-8")

await Instance.provide({
directory: tmp.path,
fn: async () => {
const statBefore = Filesystem.stat(filepath)
FileTime.read(sessionID, filepath)
const readTime = FileTime.get(sessionID, filepath)

// read() should store the file's mtime, not wall-clock
expect(readTime!.getTime()).toBe(statBefore!.mtime.getTime())
},
})
})
})
})
25 changes: 20 additions & 5 deletions packages/opencode/test/telemetry/telemetry.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1628,11 +1628,13 @@ describe("telemetry.classifyError", () => {
expect(Telemetry.classifyError("Invalid dialect specified")).toBe("validation")
expect(Telemetry.classifyError("Missing required field")).toBe("validation")
expect(Telemetry.classifyError("Required parameter 'query' not provided")).toBe("validation")
// altimate_change start — expanded validation patterns
expect(Telemetry.classifyError("You must read file /path/to/file before overwriting it")).toBe("validation")
expect(Telemetry.classifyError("File has been modified since it was last read")).toBe("validation")
expect(Telemetry.classifyError("error: column foo does not exist")).toBe("validation")
expect(Telemetry.classifyError("You must read file before overwriting it. Use the Read tool first")).toBe("validation")
// altimate_change start — file_stale split out from validation; "does not exist" moved to http_error
// These are now classified as file_stale, not validation
expect(Telemetry.classifyError("You must read file /path/to/file before overwriting it")).toBe("file_stale")
expect(Telemetry.classifyError("File has been modified since it was last read")).toBe("file_stale")
expect(Telemetry.classifyError("You must read file before overwriting it. Use the Read tool first")).toBe("file_stale")
// HTTP 404 "does not exist" is now http_error
expect(Telemetry.classifyError("HTTP 404: https://example.com/page does not exist")).toBe("http_error")
// altimate_change end
})

Expand All @@ -1651,12 +1653,25 @@ describe("telemetry.classifyError", () => {
expect(Telemetry.classifyError("Assertion failed: x > 0")).toBe("internal")
})

// altimate_change start — file_stale class tests
test("classifies file_stale errors", () => {
expect(Telemetry.classifyError("You must read file /path/to/file before overwriting it")).toBe("file_stale")
expect(Telemetry.classifyError("File /foo.ts has been modified since it was last read")).toBe("file_stale")
expect(Telemetry.classifyError("Read the file before overwriting it")).toBe("file_stale")
})
// altimate_change end

// altimate_change start — http_error class and priority ordering tests
test("classifies http errors", () => {
expect(Telemetry.classifyError("Request failed with status code: 404 (example.com)")).toBe("http_error")
expect(Telemetry.classifyError("Request failed with status code: 500")).toBe("http_error")
expect(Telemetry.classifyError("status code: 403")).toBe("http_error")
expect(Telemetry.classifyError("Request failed with status")).toBe("http_error")
// altimate_change start — HTTP status codes in webfetch error messages
expect(Telemetry.classifyError("HTTP 404: https://example.com does not exist. Do NOT retry")).toBe("http_error")
expect(Telemetry.classifyError("HTTP 410: https://example.com has been permanently removed")).toBe("http_error")
expect(Telemetry.classifyError("HTTP 429: Rate limited by example.com")).toBe("http_error")
// altimate_change end
})

test("priority ordering: earlier patterns win over later ones", () => {
Expand Down
8 changes: 4 additions & 4 deletions packages/opencode/test/tool/edit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,11 @@ describe("tool.edit", () => {
// Read first
FileTime.read(ctx.sessionID, filepath)

// Wait a bit to ensure different timestamps
await new Promise((resolve) => setTimeout(resolve, 100))

// Simulate external modification
// Simulate external modification with mtime 5s in the future
// to exceed the 50ms tolerance in FileTime.assert()
await fs.writeFile(filepath, "modified externally", "utf-8")
const future = new Date(Date.now() + 5000)
await fs.utimes(filepath, future, future)

// Try to edit with the new content
const edit = await EditTool.init()
Expand Down
Loading