Skip to content

Commit 839c0d3

Browse files
authored
refactor run cache (#976)
* start on refactoring cache * feat: πŸ”’ add cache key to responses and improve caching * wrap system prompt to avoid concurrency issues * feat: πŸ’‘ improve tracing and cache logic in chat flow * test: πŸ”’ update hash test with new parameters and value * refactor: ♻️ improve cache initialization logic * revert system scripts * refactor: ♻️ streamline script handling and ctx usage * refactor: ♻️ improve trace handling for better granularity
1 parent b666ece commit 839c0d3

File tree

79 files changed

+261
-252
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+261
-252
lines changed

β€Žpackages/cli/src/nodehost.ts

+7-8
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@ import {
2424
AZURE_AI_INFERENCE_TOKEN_SCOPES,
2525
MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI,
2626
DOT_ENV_FILENAME,
27-
LARGE_MODEL_ID,
28-
SMALL_MODEL_ID,
29-
VISION_MODEL_ID,
3027
} from "../../core/src/constants"
3128
import { tryReadText } from "../../core/src/fs"
3229
import {
@@ -41,7 +38,7 @@ import {
4138
ModelConfiguration,
4239
} from "../../core/src/host"
4340
import { TraceOptions } from "../../core/src/trace"
44-
import { deleteEmptyValues, logError, logVerbose } from "../../core/src/util"
41+
import { logError, logVerbose } from "../../core/src/util"
4542
import { parseModelIdentifier } from "../../core/src/models"
4643
import { LanguageModel } from "../../core/src/chat"
4744
import { errorMessage, NotSupportedError } from "../../core/src/error"
@@ -50,7 +47,11 @@ import { shellConfirm, shellInput, shellSelect } from "./input"
5047
import { shellQuote } from "../../core/src/shell"
5148
import { uniq } from "es-toolkit"
5249
import { PLimitPromiseQueue } from "../../core/src/concurrency"
53-
import { LanguageModelConfiguration, Project, ResponseStatus } from "../../core/src/server/messages"
50+
import {
51+
LanguageModelConfiguration,
52+
Project,
53+
ResponseStatus,
54+
} from "../../core/src/server/messages"
5455
import { createAzureTokenResolver } from "./azuretoken"
5556
import {
5657
createAzureContentSafetyClient,
@@ -396,15 +397,13 @@ export class NodeHost implements RuntimeHost {
396397
}
397398

398399
const {
399-
trace,
400400
label,
401401
cwd,
402402
timeout = SHELL_EXEC_TIMEOUT,
403403
stdin: input,
404404
} = options || {}
405+
const trace = options?.trace?.startTraceDetails(label || command)
405406
try {
406-
trace?.startDetails(label || command)
407-
408407
// python3 on windows -> python
409408
if (command === "python3" && process.platform === "win32")
410409
command = "python"

β€Žpackages/cli/src/scripts.ts

+13-9
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import {
88
fixPromptDefinitions,
99
createScript as coreCreateScript,
1010
} from "../../core/src/scripts"
11-
import { logInfo, logVerbose } from "../../core/src/util"
11+
import { deleteEmptyValues, logInfo, logVerbose } from "../../core/src/util"
1212
import { runtimeHost } from "../../core/src/host"
1313
import { RUNTIME_ERROR_CODE } from "../../core/src/constants"
1414
import {
@@ -27,14 +27,18 @@ export async function listScripts(options?: ScriptFilterOptions) {
2727
const prj = await buildProject() // Build the project to get script templates
2828
const scripts = filterScripts(prj.scripts, options) // Filter scripts based on options
2929
console.log(
30-
YAMLStringify(
31-
scripts.map(({ id, title, group, filename, system: isSystem }) => ({
32-
id,
33-
title,
34-
group,
35-
filename,
36-
isSystem,
37-
}))
30+
JSON.stringify(
31+
scripts.map(({ id, title, group, filename, system: isSystem }) =>
32+
deleteEmptyValues({
33+
id,
34+
title,
35+
group,
36+
filename,
37+
isSystem,
38+
})
39+
),
40+
null,
41+
2
3842
)
3943
)
4044
}

β€Žpackages/core/bundleprompts.js

+6-3
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,13 @@ async function main() {
1111
const promptMap = {}
1212
const prompts = readdirSync(dir)
1313
for (const prompt of prompts) {
14-
if (!/\.m?js$/.test(prompt)) continue
14+
if (!/\.mjs$/.test(prompt)) continue
1515
const text = readFileSync(`${dir}/${prompt}`, "utf-8")
16-
if (/\.genai\.m?js$/.test(prompt))
17-
promptMap[prompt.replace(/\.genai\.m?js$/i, "")] = text
16+
if (/^system\./.test(prompt)) {
17+
const id = prompt.replace(/\.m?js$/i, "")
18+
if (promptMap[id]) throw new Error(`duplicate prompt ${id}`)
19+
promptMap[id] = text
20+
}
1821
}
1922
console.log(`found ${Object.keys(promptMap).length} prompts`)
2023
console.debug(Object.keys(promptMap).join("\n"))

β€Žpackages/core/src/anthropic.ts

-41
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ import {
3030

3131
import { deleteUndefinedValues, logError, logVerbose } from "./util"
3232
import { resolveHttpProxyAgent } from "./proxy"
33-
import {
34-
ChatCompletionRequestCacheKey,
35-
getChatCompletionCache,
36-
} from "./chatcache"
3733
import { HttpsProxyAgent } from "https-proxy-agent"
3834
import { MarkdownTrace } from "./trace"
3935
import { createFetch, FetchType } from "./fetch"
@@ -289,40 +285,6 @@ const completerFactory = (
289285
const { model } = parseModelIdentifier(req.model)
290286
const { encode: encoder } = await resolveTokenEncoder(model)
291287

292-
const cache = !!cacheOrName || !!cacheName
293-
const cacheStore = getChatCompletionCache(
294-
typeof cacheOrName === "string" ? cacheOrName : cacheName
295-
)
296-
const cachedKey = cache
297-
? <ChatCompletionRequestCacheKey>{
298-
...req,
299-
...cfgNoToken,
300-
model: req.model,
301-
temperature: req.temperature,
302-
top_p: req.top_p,
303-
max_tokens: req.max_tokens,
304-
logit_bias: req.logit_bias,
305-
}
306-
: undefined
307-
trace.itemValue(`caching`, cache)
308-
trace.itemValue(`cache`, cacheStore?.name)
309-
const { text: cached, finishReason: cachedFinishReason } =
310-
(await cacheStore.get(cachedKey)) || {}
311-
if (cached !== undefined) {
312-
partialCb?.({
313-
tokensSoFar: estimateTokens(cached, encoder),
314-
responseSoFar: cached,
315-
responseChunk: cached,
316-
inner,
317-
})
318-
trace.itemValue(`cache hit`, await cacheStore.getKeySHA(cachedKey))
319-
return {
320-
text: cached,
321-
finishReason: cachedFinishReason,
322-
cached: true,
323-
}
324-
}
325-
326288
const fetch = await createFetch({
327289
trace,
328290
retries: retry,
@@ -441,9 +403,6 @@ const completerFactory = (
441403
`${usage.total_tokens} total, ${usage.prompt_tokens} prompt, ${usage.completion_tokens} completion`
442404
)
443405
}
444-
445-
if (finishReason === "stop")
446-
await cacheStore.set(cachedKey, { text: chatResp, finishReason })
447406
return {
448407
text: chatResp,
449408
finishReason,

β€Žpackages/core/src/cache.test.ts

+22
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,26 @@ describe("Cache", () => {
3636
assert.ok(sha)
3737
assert.strictEqual(typeof sha, "string")
3838
})
39+
test("JSONLineCache getOrUpdate retrieves existing value", async () => {
40+
const cache = JSONLineCache.byName<string, number>("testCache")
41+
await cache.set("existingKey", 42)
42+
const value = await cache.getOrUpdate(
43+
"existingKey",
44+
async () => 99,
45+
() => true
46+
)
47+
assert.strictEqual(value.value, 42)
48+
})
49+
50+
test("JSONLineCache getOrUpdate updates with new value if key does not exist", async () => {
51+
const cache = JSONLineCache.byName<string, number>("testCache")
52+
const value = await cache.getOrUpdate(
53+
"newKey",
54+
async () => 99,
55+
() => true
56+
)
57+
assert.strictEqual(value.value, 99)
58+
const cachedValue = await cache.get("newKey")
59+
assert.strictEqual(cachedValue, 99)
60+
})
3961
})

β€Žpackages/core/src/cache.ts

+27-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export class MemoryCache<K, V>
2525
implements WorkspaceFileCache<any, any>
2626
{
2727
protected _entries: Record<string, CacheEntry<K, V>>
28+
private _pending: Record<string, Promise<V>>
2829

2930
// Constructor is private to enforce the use of byName factory method
3031
protected constructor(public readonly name: string) {
@@ -53,6 +54,7 @@ export class MemoryCache<K, V>
5354
protected async initialize() {
5455
if (this._entries) return
5556
this._entries = {}
57+
this._pending = {}
5658
}
5759

5860
/**
@@ -104,6 +106,29 @@ export class MemoryCache<K, V>
104106
return this._entries[sha]?.val
105107
}
106108

109+
async getOrUpdate(
110+
key: K,
111+
updater: () => Promise<V>,
112+
validator: (val: V) => boolean
113+
): Promise<{ key: string; value: V; cached?: boolean }> {
114+
await this.initialize()
115+
const sha = await keySHA(key)
116+
if (this._entries[sha])
117+
return { key: sha, value: this._entries[sha].val, cached: true }
118+
if (this._pending[sha])
119+
return { key: sha, value: await this._pending[sha], cached: true }
120+
121+
try {
122+
const p = updater()
123+
this._pending[sha] = p
124+
const value = await p
125+
if (validator(value)) await this.set(key, value)
126+
return { key: sha, value, cached: false }
127+
} finally {
128+
delete this._pending[sha]
129+
}
130+
}
131+
107132
protected async appendEntry(entry: CacheEntry<K, V>) {}
108133

109134
/**
@@ -177,7 +202,7 @@ export class JSONLineCache<K, V> extends MemoryCache<K, V> {
177202
*/
178203
override async initialize() {
179204
if (this._entries) return
180-
this._entries = {}
205+
super.initialize()
181206
await host.createDirectory(this.folder()) // Ensure directory exists
182207
const content = await tryReadText(this.path())
183208
const objs: CacheEntry<K, V>[] = (await JSONLTryParse(content)) ?? []
@@ -201,7 +226,7 @@ export class JSONLineCache<K, V> extends MemoryCache<K, V> {
201226
}
202227

203228
/**
204-
* Compute the SHA1 hash of a key for uniqueness.
229+
* Compute the hash of a key for uniqueness.
205230
* Normalizes the key by converting it to a string and appending the core version.
206231
* @param key - The key to hash
207232
* @returns A promise resolving to the SHA256 hash string

0 commit comments

Comments
Β (0)