diff --git a/src/models/_model_map.js b/src/models/_model_map.js index c119f07ed..1bd6c821f 100644 --- a/src/models/_model_map.js +++ b/src/models/_model_map.js @@ -31,6 +31,10 @@ const apiMap = await (async () => { })(); export function selectAPI(profile) { + // RC27: Guard against undefined/null profile (e.g., missing model key in profile JSON) + if (!profile) { + throw new Error('No model specified in profile configuration.'); + } if (typeof profile === 'string' || profile instanceof String) { profile = {model: profile}; } diff --git a/src/models/cerebras.js b/src/models/cerebras.js index be902a649..d854813d3 100644 --- a/src/models/cerebras.js +++ b/src/models/cerebras.js @@ -13,7 +13,7 @@ export class Cerebras { this.client = new CerebrasSDK({ apiKey: getKey('CEREBRAS_API_KEY') }); } - async sendRequest(turns, systemMessage, stop_seq = '***') { + async sendRequest(turns, systemMessage, _stop_seq = '***') { // Format messages array const messages = strictFormat(turns); messages.unshift({ role: 'system', content: systemMessage }); @@ -55,7 +55,7 @@ export class Cerebras { return this.sendRequest(imageMessages, systemMessage); } - async embed(text) { + async embed(_text) { throw new Error('Embeddings are not supported by Cerebras.'); } } diff --git a/src/models/claude.js b/src/models/claude.js index 271c6b214..378697a3b 100644 --- a/src/models/claude.js +++ b/src/models/claude.js @@ -81,7 +81,7 @@ export class Claude { return this.sendRequest(imageMessages, systemMessage); } - async embed(text) { + async embed(_text) { throw new Error('Embeddings are not supported by Claude.'); } } diff --git a/src/models/deepseek.js b/src/models/deepseek.js index 5596fa8fc..186c5af5e 100644 --- a/src/models/deepseek.js +++ b/src/models/deepseek.js @@ -1,5 +1,5 @@ import OpenAIApi from 'openai'; -import { getKey, hasKey } from '../utils/keys.js'; +import { getKey } from '../utils/keys.js'; import { strictFormat } from '../utils/text.js'; export class DeepSeek { @@ -50,7 +50,7 @@ export class DeepSeek { return res; } - async embed(text) { + async embed(_text) { throw new Error('Embeddings are not supported by Deepseek.'); } } diff --git a/src/models/gemini.js b/src/models/gemini.js index 178ffff37..bcf149b7d 100644 --- a/src/models/gemini.js +++ b/src/models/gemini.js @@ -56,6 +56,11 @@ export class Gemini { } }); const response = await result.text; + this._lastUsage = result.usageMetadata ? { + prompt_tokens: result.usageMetadata.promptTokenCount || 0, + completion_tokens: result.usageMetadata.candidatesTokenCount || 0, + total_tokens: result.usageMetadata.totalTokenCount || 0, + } : null; console.log('Received.'); @@ -90,12 +95,17 @@ export class Gemini { model: this.model_name, contents: contents, safetySettings: this.safetySettings, - generationConfig: { + config: { + systemInstruction: systemMessage, ...(this.params || {}) - }, - systemInstruction: systemMessage + } }); res = await result.text; + this._lastUsage = result.usageMetadata ? { + prompt_tokens: result.usageMetadata.promptTokenCount || 0, + completion_tokens: result.usageMetadata.candidatesTokenCount || 0, + total_tokens: result.usageMetadata.totalTokenCount || 0, + } : null; console.log('Received.'); } catch (err) { console.log(err); @@ -112,13 +122,14 @@ export class Gemini { const result = await this.genAI.models.embedContent({ model: this.model_name || "gemini-embedding-001", contents: text, - }) + }); - return result.embeddings; + // @google/genai v1.x returns result.embedding.values (not result.embeddings) + return result?.embedding?.values ?? result?.embeddings; } } -const sendAudioRequest = async (text, model, voice, url) => { +const sendAudioRequest = async (text, model, voice, _url) => { const ai = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')}); const response = await ai.models.generateContent({ diff --git a/src/models/glhf.js b/src/models/glhf.js index b237c8d74..4f3fb2ae1 100644 --- a/src/models/glhf.js +++ b/src/models/glhf.js @@ -1,71 +1,71 @@ -import OpenAIApi from 'openai'; -import { getKey } from '../utils/keys.js'; - -export class GLHF { - static prefix = 'glhf'; - constructor(model_name, url) { - this.model_name = model_name; - const apiKey = getKey('GHLF_API_KEY'); - if (!apiKey) { - throw new Error('API key not found. Please check keys.json and ensure GHLF_API_KEY is defined.'); - } - this.openai = new OpenAIApi({ - apiKey, - baseURL: url || "https://glhf.chat/api/openai/v1" - }); - } - - async sendRequest(turns, systemMessage, stop_seq = '***') { - // Construct the message array for the API request. - let messages = [{ role: 'system', content: systemMessage }].concat(turns); - const pack = { - model: this.model_name || "hf:meta-llama/Llama-3.1-405B-Instruct", - messages, - stop: [stop_seq] - }; - - const maxAttempts = 5; - let attempt = 0; - let finalRes = null; - - while (attempt < maxAttempts) { - attempt++; - console.log(`Awaiting glhf.chat API response... (attempt: ${attempt})`); - try { - let completion = await this.openai.chat.completions.create(pack); - if (completion.choices[0].finish_reason === 'length') { - throw new Error('Context length exceeded'); - } - let res = completion.choices[0].message.content; - // If there's an open tag without a corresponding , retry. - if (res.includes("") && !res.includes("")) { - console.warn("Partial block detected. Re-generating..."); - continue; - } - // If there's a closing tag but no opening , prepend one. - if (res.includes("") && !res.includes("")) { - res = "" + res; - } - finalRes = res.replace(/<\|separator\|>/g, '*no response*'); - break; // Valid response obtained. - } catch (err) { - if ((err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && turns.length > 1) { - console.log('Context length exceeded, trying again with shorter context.'); - return await this.sendRequest(turns.slice(1), systemMessage, stop_seq); - } else { - console.error(err); - finalRes = 'My brain disconnected, try again.'; - break; - } - } - } - if (finalRes === null) { - finalRes = "I thought too hard, sorry, try again"; - } - return finalRes; - } - - async embed(text) { - throw new Error('Embeddings are not supported by glhf.'); - } -} +import OpenAIApi from 'openai'; +import { getKey } from '../utils/keys.js'; + +export class GLHF { + static prefix = 'glhf'; + constructor(model_name, url) { + this.model_name = model_name; + const apiKey = getKey('GHLF_API_KEY'); + if (!apiKey) { + throw new Error('API key not found. Please check keys.json and ensure GHLF_API_KEY is defined.'); + } + this.openai = new OpenAIApi({ + apiKey, + baseURL: url || "https://glhf.chat/api/openai/v1" + }); + } + + async sendRequest(turns, systemMessage, stop_seq = '***') { + // Construct the message array for the API request. + let messages = [{ role: 'system', content: systemMessage }].concat(turns); + const pack = { + model: this.model_name || "hf:meta-llama/Llama-3.1-405B-Instruct", + messages, + stop: [stop_seq] + }; + + const maxAttempts = 5; + let attempt = 0; + let finalRes = null; + + while (attempt < maxAttempts) { + attempt++; + console.log(`Awaiting glhf.chat API response... (attempt: ${attempt})`); + try { + let completion = await this.openai.chat.completions.create(pack); + if (completion.choices[0].finish_reason === 'length') { + throw new Error('Context length exceeded'); + } + let res = completion.choices[0].message.content; + // If there's an open tag without a corresponding , retry. + if (res.includes("") && !res.includes("")) { + console.warn("Partial block detected. Re-generating..."); + continue; + } + // If there's a closing tag but no opening , prepend one. + if (res.includes("") && !res.includes("")) { + res = "" + res; + } + finalRes = res.replace(/<\|separator\|>/g, '*no response*'); + break; // Valid response obtained. + } catch (err) { + if ((err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && turns.length > 1) { + console.log('Context length exceeded, trying again with shorter context.'); + return await this.sendRequest(turns.slice(1), systemMessage, stop_seq); + } else { + console.error(err); + finalRes = 'My brain disconnected, try again.'; + break; + } + } + } + if (finalRes === null) { + finalRes = "I thought too hard, sorry, try again"; + } + return finalRes; + } + + async embed(_text) { + throw new Error('Embeddings are not supported by glhf.'); + } +} diff --git a/src/models/gpt.js b/src/models/gpt.js index 63bdaa1af..badf82fd3 100644 --- a/src/models/gpt.js +++ b/src/models/gpt.js @@ -22,8 +22,8 @@ export class GPT { } async sendRequest(turns, systemMessage, stop_seq='***') { - let messages = strictFormat(turns); - messages = messages.map(message => { + let _messages = strictFormat(turns); + _messages = _messages.map(message => { message.content += stop_seq; return message; }); diff --git a/src/models/grok.js b/src/models/grok.js index 40c63ce1c..74aa7555c 100644 --- a/src/models/grok.js +++ b/src/models/grok.js @@ -35,9 +35,14 @@ export class Grok { ///console.log('Messages:', messages); let completion = await this.openai.chat.completions.create(pack); if (completion.choices[0].finish_reason == 'length') - throw new Error('Context length exceeded'); + throw new Error('Context length exceeded'); console.log('Received.') res = completion.choices[0].message.content; + this._lastUsage = completion.usage ? { + prompt_tokens: completion.usage.prompt_tokens || 0, + completion_tokens: completion.usage.completion_tokens || 0, + total_tokens: completion.usage.total_tokens || 0, + } : null; } catch (err) { if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) { @@ -52,7 +57,7 @@ export class Grok { } } // sometimes outputs special token <|separator|>, just replace it - return res.replace(/<\|separator\|>/g, '*no response*'); + return (res ?? '').replace(/<\|separator\|>/g, '*no response*'); } async sendVisionRequest(messages, systemMessage, imageBuffer) { @@ -73,7 +78,7 @@ export class Grok { return this.sendRequest(imageMessages, systemMessage); } - async embed(text) { + async embed(_text) { throw new Error('Embeddings are not supported by Grok.'); } } diff --git a/src/models/huggingface.js b/src/models/huggingface.js index 91fbdfd77..b5985c9a5 100644 --- a/src/models/huggingface.js +++ b/src/models/huggingface.js @@ -80,7 +80,7 @@ export class HuggingFace { return finalRes; } - async embed(text) { + async embed(_text) { throw new Error('Embeddings are not supported by HuggingFace.'); } } diff --git a/src/models/hyperbolic.js b/src/models/hyperbolic.js index f483b6980..c57aca48d 100644 --- a/src/models/hyperbolic.js +++ b/src/models/hyperbolic.js @@ -1,114 +1,114 @@ -import { getKey } from '../utils/keys.js'; - -export class Hyperbolic { - static prefix = 'hyperbolic'; - constructor(modelName, apiUrl) { - this.modelName = modelName || "deepseek-ai/DeepSeek-V3"; - this.apiUrl = apiUrl || "https://api.hyperbolic.xyz/v1/chat/completions"; - - // Retrieve the Hyperbolic API key from keys.js - this.apiKey = getKey('HYPERBOLIC_API_KEY'); - if (!this.apiKey) { - throw new Error('HYPERBOLIC_API_KEY not found. Check your keys.js file.'); - } - } - - /** - * Sends a chat completion request to the Hyperbolic endpoint. - * - * @param {Array} turns - An array of message objects, e.g. [{role: 'user', content: 'Hi'}]. - * @param {string} systemMessage - The system prompt or instruction. - * @param {string} stopSeq - A stopping sequence, default '***'. - * @returns {Promise} - The model's reply. - */ - async sendRequest(turns, systemMessage, stopSeq = '***') { - // Prepare the messages with a system prompt at the beginning - const messages = [{ role: 'system', content: systemMessage }, ...turns]; - - // Build the request payload - const payload = { - model: this.modelName, - messages: messages, - max_tokens: 8192, - temperature: 0.7, - top_p: 0.9, - stream: false - }; - - const maxAttempts = 5; - let attempt = 0; - let finalRes = null; - - while (attempt < maxAttempts) { - attempt++; - console.log(`Awaiting Hyperbolic API response... (attempt: ${attempt})`); - console.log('Messages:', messages); - - let completionContent = null; - - try { - const response = await fetch(this.apiUrl, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}` - }, - body: JSON.stringify(payload) - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - const data = await response.json(); - if (data?.choices?.[0]?.finish_reason === 'length') { - throw new Error('Context length exceeded'); - } - - completionContent = data?.choices?.[0]?.message?.content || ''; - console.log('Received response from Hyperbolic.'); - } catch (err) { - if ( - (err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && - turns.length > 1 - ) { - console.log('Context length exceeded, trying again with a shorter context...'); - return await this.sendRequest(turns.slice(1), systemMessage, stopSeq); - } else { - console.error(err); - completionContent = 'My brain disconnected, try again.'; - } - } - - // Check for blocks - const hasOpenTag = completionContent.includes(""); - const hasCloseTag = completionContent.includes(""); - - if ((hasOpenTag && !hasCloseTag)) { - console.warn("Partial block detected. Re-generating..."); - continue; // Retry the request - } - - if (hasCloseTag && !hasOpenTag) { - completionContent = '' + completionContent; - } - - if (hasOpenTag && hasCloseTag) { - completionContent = completionContent.replace(/[\s\S]*?<\/think>/g, '').trim(); - } - - finalRes = completionContent.replace(/<\|separator\|>/g, '*no response*'); - break; // Valid response obtained—exit loop - } - - if (finalRes == null) { - console.warn("Could not get a valid block or normal response after max attempts."); - finalRes = 'I thought too hard, sorry, try again.'; - } - return finalRes; - } - - async embed(text) { - throw new Error('Embeddings are not supported by Hyperbolic.'); - } -} +import { getKey } from '../utils/keys.js'; + +export class Hyperbolic { + static prefix = 'hyperbolic'; + constructor(modelName, apiUrl) { + this.modelName = modelName || "deepseek-ai/DeepSeek-V3"; + this.apiUrl = apiUrl || "https://api.hyperbolic.xyz/v1/chat/completions"; + + // Retrieve the Hyperbolic API key from keys.js + this.apiKey = getKey('HYPERBOLIC_API_KEY'); + if (!this.apiKey) { + throw new Error('HYPERBOLIC_API_KEY not found. Check your keys.js file.'); + } + } + + /** + * Sends a chat completion request to the Hyperbolic endpoint. + * + * @param {Array} turns - An array of message objects, e.g. [{role: 'user', content: 'Hi'}]. + * @param {string} systemMessage - The system prompt or instruction. + * @param {string} stopSeq - A stopping sequence, default '***'. + * @returns {Promise} - The model's reply. + */ + async sendRequest(turns, systemMessage, stopSeq = '***') { + // Prepare the messages with a system prompt at the beginning + const messages = [{ role: 'system', content: systemMessage }, ...turns]; + + // Build the request payload + const payload = { + model: this.modelName, + messages: messages, + max_tokens: 8192, + temperature: 0.7, + top_p: 0.9, + stream: false + }; + + const maxAttempts = 5; + let attempt = 0; + let finalRes = null; + + while (attempt < maxAttempts) { + attempt++; + console.log(`Awaiting Hyperbolic API response... (attempt: ${attempt})`); + console.log('Messages:', messages); + + let completionContent = null; + + try { + const response = await fetch(this.apiUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}` + }, + body: JSON.stringify(payload) + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data = await response.json(); + if (data?.choices?.[0]?.finish_reason === 'length') { + throw new Error('Context length exceeded'); + } + + completionContent = data?.choices?.[0]?.message?.content || ''; + console.log('Received response from Hyperbolic.'); + } catch (err) { + if ( + (err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && + turns.length > 1 + ) { + console.log('Context length exceeded, trying again with a shorter context...'); + return await this.sendRequest(turns.slice(1), systemMessage, stopSeq); + } else { + console.error(err); + completionContent = 'My brain disconnected, try again.'; + } + } + + // Check for blocks + const hasOpenTag = completionContent.includes(""); + const hasCloseTag = completionContent.includes(""); + + if ((hasOpenTag && !hasCloseTag)) { + console.warn("Partial block detected. Re-generating..."); + continue; // Retry the request + } + + if (hasCloseTag && !hasOpenTag) { + completionContent = '' + completionContent; + } + + if (hasOpenTag && hasCloseTag) { + completionContent = completionContent.replace(/[\s\S]*?<\/think>/g, '').trim(); + } + + finalRes = completionContent.replace(/<\|separator\|>/g, '*no response*'); + break; // Valid response obtained—exit loop + } + + if (finalRes == null) { + console.warn("Could not get a valid block or normal response after max attempts."); + finalRes = 'I thought too hard, sorry, try again.'; + } + return finalRes; + } + + async embed(_text) { + throw new Error('Embeddings are not supported by Hyperbolic.'); + } +} diff --git a/src/models/mercury.js b/src/models/mercury.js index 74cd64e63..79e1df482 100644 --- a/src/models/mercury.js +++ b/src/models/mercury.js @@ -1,5 +1,5 @@ import OpenAIApi from 'openai'; -import { getKey, hasKey } from '../utils/keys.js'; +import { getKey } from '../utils/keys.js'; import { strictFormat } from '../utils/text.js'; export class Mercury { diff --git a/src/models/novita.js b/src/models/novita.js index 18e1fc454..12dd74d84 100644 --- a/src/models/novita.js +++ b/src/models/novita.js @@ -65,7 +65,7 @@ export class Novita { return res; } - async embed(text) { + async embed(_text) { throw new Error('Embeddings are not supported by Novita AI.'); } } diff --git a/src/models/ollama.js b/src/models/ollama.js index d5b2891b6..41727a534 100644 --- a/src/models/ollama.js +++ b/src/models/ollama.js @@ -1,4 +1,6 @@ import { strictFormat } from '../utils/text.js'; +import http from 'node:http'; +import https from 'node:https'; export class Ollama { static prefix = 'ollama'; @@ -31,8 +33,14 @@ export class Ollama { }); if (apiResponse) { res = apiResponse['message']['content']; + this._lastUsage = { + prompt_tokens: apiResponse.prompt_eval_count || 0, + completion_tokens: apiResponse.eval_count || 0, + total_tokens: (apiResponse.prompt_eval_count || 0) + (apiResponse.eval_count || 0), + }; } else { res = 'No response data.'; + this._lastUsage = null; } } catch (err) { if (err.message.toLowerCase().includes('context length') && turns.length > 1) { @@ -77,17 +85,38 @@ export class Ollama { async send(endpoint, body) { const url = new URL(endpoint, this.url); - let method = 'POST'; - let headers = new Headers(); - const request = new Request(url, { method, headers, body: JSON.stringify(body) }); + const bodyStr = JSON.stringify(body); + const client = url.protocol === 'https:' ? https : http; let data = null; try { - const res = await fetch(request); - if (res.ok) { - data = await res.json(); - } else { - throw new Error(`Ollama Status: ${res.status}`); - } + data = await new Promise((resolve, reject) => { + const req = client.request({ + hostname: url.hostname, + port: url.port || (url.protocol === 'https:' ? 443 : 11434), + path: url.pathname, + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Content-Length': Buffer.byteLength(bodyStr), + }, + timeout: 90000, // 90s timeout for local model responses + }, (res) => { + if (res.statusCode < 200 || res.statusCode >= 300) { + res.resume(); + return reject(new Error(`Ollama Status: ${res.statusCode}`)); + } + let raw = ''; + res.on('data', chunk => raw += chunk); + res.on('end', () => { + try { resolve(JSON.parse(raw)); } + catch { reject(new Error(`Ollama parse error: ${raw.slice(0, 200)}`)); } + }); + }); + req.on('error', reject); + req.on('timeout', () => { req.destroy(new Error('Ollama request timed out')); }); + req.write(bodyStr); + req.end(); + }); } catch (err) { console.error('Failed to send Ollama request.'); console.error(err); @@ -96,20 +125,35 @@ export class Ollama { } async sendVisionRequest(messages, systemMessage, imageBuffer) { - const imageMessages = [...messages]; - imageMessages.push({ - role: "user", - content: [ - { type: "text", text: systemMessage }, - { - type: "image_url", - image_url: { - url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` - } - } - ] + // Ollama uses its own image format: { role, content, images: [base64] } + let model = this.model_name || 'llava'; + let formatted = strictFormat(messages); + formatted.unshift({ role: 'system', content: systemMessage }); + // Append the vision request with image in Ollama's native format + formatted.push({ + role: 'user', + content: systemMessage, + images: [imageBuffer.toString('base64')] }); - - return this.sendRequest(imageMessages, systemMessage); + + console.log(`Awaiting vision response... (model: ${model})`); + let res = null; + try { + let apiResponse = await this.send(this.chat_endpoint, { + model: model, + messages: formatted, + stream: false, + ...(this.params || {}) + }); + if (apiResponse) { + res = apiResponse['message']['content']; + } else { + res = 'Vision model returned no response.'; + } + } catch (err) { + console.log('Vision request error:', err); + res = 'Vision failed, try again.'; + } + return res; } } diff --git a/src/models/openrouter.js b/src/models/openrouter.js index ca0782bc4..28f7a98ee 100644 --- a/src/models/openrouter.js +++ b/src/models/openrouter.js @@ -1,5 +1,5 @@ import OpenAIApi from 'openai'; -import { getKey, hasKey } from '../utils/keys.js'; +import { getKey } from '../utils/keys.js'; import { strictFormat } from '../utils/text.js'; export class OpenRouter { @@ -71,7 +71,7 @@ export class OpenRouter { return this.sendRequest(imageMessages, systemMessage); } - async embed(text) { + async embed(_text) { throw new Error('Embeddings are not supported by Openrouter.'); } } \ No newline at end of file diff --git a/src/models/prompter.js b/src/models/prompter.js index 6ee93b2e7..39e8e558e 100644 --- a/src/models/prompter.js +++ b/src/models/prompter.js @@ -1,14 +1,33 @@ -import { readFileSync, mkdirSync, writeFileSync} from 'fs'; +import { readFileSync, mkdirSync} from 'fs'; import { Examples } from '../utils/examples.js'; import { getCommandDocs } from '../agent/commands/index.js'; import { SkillLibrary } from "../agent/library/skill_library.js"; import { stringifyTurns } from '../utils/text.js'; import { getCommand } from '../agent/commands/index.js'; import settings from '../agent/settings.js'; +import { deepSanitize } from '../../settings.js'; import { promises as fs } from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; import { selectAPI, createModel } from './_model_map.js'; +import { EnsembleModel } from '../ensemble/controller.js'; +import { UsageTracker } from '../utils/usage_tracker.js'; + +// Helper function to safely write files with retry logic for Windows EBADF issues +async function safeWriteFile(filepath, content, retries = 3, delay = 100) { + for (let i = 0; i < retries; i++) { + try { + await fs.writeFile(filepath, content, 'utf8'); + return; + } catch (error) { + if (error.code === 'EBADF' && i < retries - 1) { + await new Promise(resolve => setTimeout(resolve, delay * (i + 1))); + continue; + } + throw error; + } + } +} const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -27,8 +46,10 @@ export class Prompter { base_fp = './profiles/defaults/creative.json'; } else if (settings.base_profile.includes('god_mode')) { base_fp = './profiles/defaults/god_mode.json'; + } else { + base_fp = './profiles/defaults/survival.json'; // safe fallback } - let base_profile = JSON.parse(readFileSync(base_fp, 'utf8')); + let base_profile = deepSanitize(JSON.parse(readFileSync(base_fp, 'utf8'))); // first use defaults to fill in missing values in the base profile for (let key in default_profile) { @@ -51,12 +72,16 @@ export class Prompter { this.awaiting_coding = false; // for backwards compatibility, move max_tokens to params - let max_tokens = null; + let _max_tokens = null; if (this.profile.max_tokens) - max_tokens = this.profile.max_tokens; + _max_tokens = this.profile.max_tokens; - let chat_model_profile = selectAPI(this.profile.model); - this.chat_model = createModel(chat_model_profile); + if (this.profile.ensemble) { + this.chat_model = new EnsembleModel(this.profile.ensemble, this.profile); + } else { + let chat_model_profile = selectAPI(this.profile.model); + this.chat_model = createModel(chat_model_profile); + } if (this.profile.code_model) { let code_model_profile = selectAPI(this.profile.code_model); @@ -79,25 +104,43 @@ export class Prompter { if (this.profile.embedding) { try { embedding_model_profile = selectAPI(this.profile.embedding); - } catch (e) { + } catch { embedding_model_profile = null; } } if (embedding_model_profile) { this.embedding_model = createModel(embedding_model_profile); } - else { + else if (typeof chat_model_profile !== 'undefined') { this.embedding_model = createModel({api: chat_model_profile.api}); } + else { + this.embedding_model = createModel({api: 'google'}); + } + + // Phase 3: give EnsembleModel access to the embedding model for ChromaDB + if (this.chat_model?.setEmbeddingModel) { + this.chat_model.setEmbeddingModel(this.embedding_model); + } this.skill_libary = new SkillLibrary(agent, this.embedding_model); + + // Minecraft wiki knowledge + this.wikiData = {}; + try { + const wikiPath = path.join(__dirname, '../../data/minecraft_wiki.json'); + this.wikiData = JSON.parse(readFileSync(wikiPath, 'utf8')); + console.log('Minecraft wiki data loaded.'); + } catch (_e) { + console.warn(`Minecraft wiki data not found: ${_e.message}`); + } + mkdirSync(`./bots/${name}`, { recursive: true }); - writeFileSync(`./bots/${name}/last_profile.json`, JSON.stringify(this.profile, null, 4), (err) => { - if (err) { - throw new Error('Failed to save profile:', err); - } - console.log("Copy profile saved."); - }); + // Save profile asynchronously with retry logic + safeWriteFile(`./bots/${name}/last_profile.json`, JSON.stringify(this.profile, null, 4)) + .catch(err => console.error('Failed to save profile:', err.message)); + + this.usageTracker = new UsageTracker(name); } getName() { @@ -109,6 +152,8 @@ export class Prompter { } async initExamples() { + this.usageTracker.load(); + try { this.convo_examples = new Examples(this.embedding_model, settings.num_examples); this.coding_examples = new Examples(this.embedding_model, settings.num_examples); @@ -151,6 +196,14 @@ export class Prompter { } if (prompt.includes('$COMMAND_DOCS')) prompt = prompt.replaceAll('$COMMAND_DOCS', getCommandDocs(this.agent)); + + if (prompt.includes('$WIKI')) { + const wikiStr = this.wikiData && Object.keys(this.wikiData).length > 0 + ? JSON.stringify(this.wikiData, null, 2) + : '{ "error": "Wiki data not loaded" }'; + prompt = prompt.replaceAll('$WIKI', `Minecraft Wiki Knowledge (Java Edition 1.21+):\n${wikiStr}`); + } + if (prompt.includes('$CODE_DOCS')) { const code_task_content = messages.slice().reverse().find(msg => msg.role !== 'system' && msg.content.includes('!newAction(') @@ -165,6 +218,10 @@ export class Prompter { prompt = prompt.replaceAll('$EXAMPLES', await examples.createExampleMessage(messages)); if (prompt.includes('$MEMORY')) prompt = prompt.replaceAll('$MEMORY', this.agent.history.memory); + if (prompt.includes('$LEARNINGS')) { + const summary = this.agent.learnings?.getRecentSummary(10) || ''; + prompt = prompt.replaceAll('$LEARNINGS', summary ? 'Recent action outcomes:\n' + summary : ''); + } if (prompt.includes('$TO_SUMMARIZE')) prompt = prompt.replaceAll('$TO_SUMMARIZE', stringifyTurns(to_summarize)); if (prompt.includes('$CONVO')) @@ -178,9 +235,9 @@ export class Prompter { let goal_text = ''; for (let goal in last_goals) { if (last_goals[goal]) - goal_text += `You recently successfully completed the goal ${goal}.\n` + goal_text += `You recently successfully completed the goal ${goal}.\n`; else - goal_text += `You recently failed to complete the goal ${goal}.\n` + goal_text += `You recently failed to complete the goal ${goal}.\n`; } prompt = prompt.replaceAll('$LAST_GOALS', goal_text.trim()); } @@ -226,6 +283,7 @@ export class Prompter { try { generation = await this.chat_model.sendRequest(messages, prompt); + this._recordUsage(this.chat_model, 'chat'); if (typeof generation !== 'string') { console.error('Error: Generated response is not a string', generation); throw new Error('Generated response is not a string'); @@ -249,9 +307,9 @@ export class Prompter { return ''; } - if (generation?.includes('')) { - const [_, afterThink] = generation.split('') - generation = afterThink + if (generation?.includes('')) { + const [_, afterThink] = generation.split(''); + generation = afterThink; } return generation; @@ -266,14 +324,18 @@ export class Prompter { return '```//no response```'; } this.awaiting_coding = true; - await this.checkCooldown(); - let prompt = this.profile.coding; - prompt = await this.replaceStrings(prompt, messages, this.coding_examples); - - let resp = await this.code_model.sendRequest(messages, prompt); - this.awaiting_coding = false; - await this._saveLog(prompt, messages, resp, 'coding'); - return resp; + try { + await this.checkCooldown(); + let prompt = this.profile.coding; + prompt = await this.replaceStrings(prompt, messages, this.coding_examples); + + let resp = await this.code_model.sendRequest(messages, prompt); + this._recordUsage(this.code_model, 'code'); + await this._saveLog(prompt, messages, resp, 'coding'); + return resp; + } finally { + this.awaiting_coding = false; + } } async promptMemSaving(to_summarize) { @@ -281,9 +343,10 @@ export class Prompter { let prompt = this.profile.saving_memory; prompt = await this.replaceStrings(prompt, null, null, to_summarize); let resp = await this.chat_model.sendRequest([], prompt); + this._recordUsage(this.chat_model, 'memory'); await this._saveLog(prompt, to_summarize, resp, 'memSaving'); - if (resp?.includes('')) { - const [_, afterThink] = resp.split('') + if (resp?.includes('')) { + const [_, afterThink] = resp.split(''); resp = afterThink; } return resp; @@ -296,6 +359,7 @@ export class Prompter { messages.push({role: 'user', content: new_message}); prompt = await this.replaceStrings(prompt, null, null, messages); let res = await this.chat_model.sendRequest([], prompt); + this._recordUsage(this.chat_model, 'chat'); return res.trim().toLowerCase() === 'respond'; } @@ -303,7 +367,9 @@ export class Prompter { await this.checkCooldown(); let prompt = this.profile.image_analysis; prompt = await this.replaceStrings(prompt, messages, null, null, null); - return await this.vision_model.sendVisionRequest(messages, prompt, imageBuffer); + let res = await this.vision_model.sendVisionRequest(messages, prompt, imageBuffer); + this._recordUsage(this.vision_model, 'vision'); + return res; } async promptGoalSetting(messages, last_goals) { @@ -312,11 +378,12 @@ export class Prompter { system_message = await this.replaceStrings(system_message, messages); let user_message = 'Use the below info to determine what goal to target next\n\n'; - user_message += '$LAST_GOALS\n$STATS\n$INVENTORY\n$CONVO' + user_message += '$LAST_GOALS\n$STATS\n$INVENTORY\n$CONVO'; user_message = await this.replaceStrings(user_message, messages, null, null, last_goals); let user_messages = [{role: 'user', content: user_message}]; let res = await this.chat_model.sendRequest(user_messages, system_message); + this._recordUsage(this.chat_model, 'chat'); let goal = null; try { @@ -333,6 +400,23 @@ export class Prompter { return goal; } + _recordUsage(model, callType) { + const breakdown = model._lastUsageByModel || null; + if (Array.isArray(breakdown) && breakdown.length > 0) { + for (const entry of breakdown) { + const modelName = entry.modelName || 'unknown'; + const provider = entry.provider || 'unknown'; + const usage = entry.usage || null; + this.usageTracker.record(modelName, provider, callType, usage); + } + return; + } + const usage = model._lastUsage || null; + const modelName = model.model_name || 'unknown'; + const provider = model.constructor?.prefix || 'unknown'; + this.usageTracker.record(modelName, provider, callType, usage); + } + async _saveLog(prompt, messages, generation, tag) { if (!settings.log_all_prompts) return; diff --git a/src/models/qwen.js b/src/models/qwen.js index a768b5b07..275281fff 100644 --- a/src/models/qwen.js +++ b/src/models/qwen.js @@ -1,5 +1,5 @@ import OpenAIApi from 'openai'; -import { getKey, hasKey } from '../utils/keys.js'; +import { getKey } from '../utils/keys.js'; import { strictFormat } from '../utils/text.js'; export class Qwen { diff --git a/src/models/vllm.js b/src/models/vllm.js index d821983bb..8cbdaf82a 100644 --- a/src/models/vllm.js +++ b/src/models/vllm.js @@ -1,8 +1,4 @@ -// This code uses Dashscope and HTTP to ensure the latest support for the Qwen model. -// Qwen is also compatible with the OpenAI API format; - import OpenAIApi from 'openai'; -import { getKey, hasKey } from '../utils/keys.js'; import { strictFormat } from '../utils/text.js'; export class VLLM { @@ -10,25 +6,21 @@ export class VLLM { constructor(model_name, url) { this.model_name = model_name; - // Currently use self-hosted SGLang API for text generation; use OpenAI text-embedding-3-small model for simple embedding. - let vllm_config = {}; - if (url) - vllm_config.baseURL = url; - else - vllm_config.baseURL = 'http://0.0.0.0:8000/v1'; - - vllm_config.apiKey = "" + let config = {}; + config.baseURL = url || 'http://0.0.0.0:8000/v1'; + config.apiKey = ''; - this.vllm = new OpenAIApi(vllm_config); + this.vllm = new OpenAIApi(config); + this._lastUsage = null; } async sendRequest(turns, systemMessage, stop_seq = '***') { let messages = [{ 'role': 'system', 'content': systemMessage }].concat(turns); - let model = this.model_name || "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"; - + let model = this.model_name || 'google/gemma-3-12b-it'; + if (model.includes('deepseek') || model.includes('qwen')) { messages = strictFormat(messages); - } + } const pack = { model: model, @@ -36,43 +28,82 @@ export class VLLM { stop: stop_seq, }; - let res = null; - try { - console.log('Awaiting openai api response...') - // console.log('Messages:', messages); - // todo set max_tokens, temperature, top_p, etc. in pack - let completion = await this.vllm.chat.completions.create(pack); - if (completion.choices[0].finish_reason == 'length') - throw new Error('Context length exceeded'); - console.log('Received.') - res = completion.choices[0].message.content; - } - catch (err) { - if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) { - console.log('Context length exceeded, trying again with shorter context.'); - return await this.sendRequest(turns.slice(1), systemMessage, stop_seq); - } else { - console.log(err); - res = 'My brain disconnected, try again.'; + const maxAttempts = 5; + let attempt = 0; + let finalRes = null; + + while (attempt < maxAttempts) { + attempt++; + let res = null; + try { + console.log(`Awaiting vLLM response... (model: ${model}, attempt: ${attempt})`); + let completion = await this.vllm.chat.completions.create(pack); + if (completion.choices[0].finish_reason == 'length') + throw new Error('Context length exceeded'); + console.log('Received.'); + res = completion.choices[0].message.content; + + this._lastUsage = completion.usage ? { + prompt_tokens: completion.usage.prompt_tokens || 0, + completion_tokens: completion.usage.completion_tokens || 0, + total_tokens: completion.usage.total_tokens || 0, + } : null; } - } - return res; - } + catch (err) { + this._lastUsage = null; + if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) { + console.log('Context length exceeded, trying again with shorter context.'); + return await this.sendRequest(turns.slice(1), systemMessage, stop_seq); + } else { + console.log(err); + res = 'My brain disconnected, try again.'; + } + } + + // Handle tags (Gemma-3 / reasoning models may produce these) + const hasOpenTag = res.includes(''); + const hasCloseTag = res.includes(''); - async saveToFile(logFile, logEntry) { - let task_id = this.agent.task.task_id; - console.log(task_id) - let logDir; - if (this.task_id === null) { - logDir = path.join(__dirname, `../../bots/${this.agent.name}/logs`); - } else { - logDir = path.join(__dirname, `../../bots/${this.agent.name}/logs/${task_id}`); + if (hasOpenTag && !hasCloseTag) { + console.warn('Partial block detected. Re-generating...'); + if (attempt < maxAttempts) continue; + } + if (hasCloseTag && !hasOpenTag) { + res = '' + res; + } + if (hasOpenTag && hasCloseTag) { + res = res.replace(/[\s\S]*?<\/think>/g, '').trim(); + } + + finalRes = res; + break; } - await fs.mkdir(logDir, { recursive: true }); + if (finalRes == null) { + console.warn('Could not get a valid response after max attempts.'); + finalRes = 'I thought too hard, sorry, try again.'; + } + return finalRes; + } - logFile = path.join(logDir, logFile); - await fs.appendFile(logFile, String(logEntry), 'utf-8'); + async embed(_text) { + throw new Error('vLLM embeddings not configured. Use Google gemini-embedding-001 instead.'); } -} \ No newline at end of file + async sendVisionRequest(messages, systemMessage, imageBuffer) { + const imageMessages = [...messages]; + imageMessages.push({ + role: 'user', + content: [ + { type: 'text', text: systemMessage }, + { + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` + } + } + ] + }); + return this.sendRequest(imageMessages, systemMessage); + } +}