diff --git a/package.json b/package.json index 2221ac5..c64321d 100644 --- a/package.json +++ b/package.json @@ -107,6 +107,11 @@ "default": false, "description": "Enable experimental chat feature." }, + "firecoder.experimental.chat.useGpu": { + "type": "boolean", + "default": false, + "description": "Enable experimental chat feature." + }, "firecoder.completion.autoMode": { "type": "string", "default": "base-small", @@ -117,25 +122,35 @@ "base-large" ], "enumDescriptions": [ - "Use if you have only CPU.", - "Use if you have standard GPU.", - "Use if you have enterprise GPU." + "Use if you have only CPU. (RAM OR VRAM required 4 GB)", + "Use if you have standard GPU. (RAM OR VRAM required 10 GB)", + "Use if you have enterprise GPU. (RAM OR VRAM required 38 GB)" ] }, - "firecoder.completion.manuallyMode": { + "firecoder.completion.autoMode.useGpu": { + "type": "boolean", + "default": false, + "description": "Enable experimental chat feature." + }, + "firecoder.completion.manualMode": { "type": "string", "default": "base-small", - "markdownDescription": "Select a model for the manually mode based on your system specifications.", + "markdownDescription": "Select a model for the manual mode based on your system specifications.", "enum": [ "base-small", "base-medium", "base-large" ], "enumDescriptions": [ - "Use if you have only CPU.", - "Use if you have standard GPU.", - "Use if you have enterprise GPU." + "Use if you have only CPU. (RAM OR VRAM required 4 GB)", + "Use if you have standard GPU. (RAM OR VRAM required 10 GB)", + "Use if you have enterprise GPU. (RAM OR VRAM required 38 GB)" ] + }, + "firecoder.completion.manualMode.useGpu": { + "type": "boolean", + "default": false, + "description": "Enable experimental chat feature." } } } diff --git a/src/common/completion/index.ts b/src/common/completion/index.ts index deeedd4..1d6df6a 100644 --- a/src/common/completion/index.ts +++ b/src/common/completion/index.ts @@ -54,7 +54,7 @@ export const getInlineCompletionProvider = ( const modelType = triggerAuto ? configuration.get("completion.autoMode") - : configuration.get("completion.manuallyMode"); + : configuration.get("completion.manualMode"); const prompt = await getPromptCompletion( document, position, diff --git a/src/common/server/index.ts b/src/common/server/index.ts index 572976b..9811899 100644 --- a/src/common/server/index.ts +++ b/src/common/server/index.ts @@ -105,16 +105,30 @@ class Server { component: "server", sendTelemetry: true, }); + const isChatModel = this.typeModel in modelsChat; + const isBaseModel = this.typeModel in modelsBase; - const useGPU = + const gpuEnabled = configuration.get("experimental.useGpu.linux.nvidia") || configuration.get("experimental.useGpu.osx.metal") || configuration.get("experimental.useGpu.windows.nvidia"); - const port = models[this.typeModel].port; + const useGPUChat = + configuration.get("experimental.chat.useGpu") && isChatModel; - const isChatModel = this.typeModel in modelsChat; - const isBaseModel = this.typeModel in modelsBase; + const useGPUCompletionAuto = + configuration.get("completion.autoMode.useGpu") && + this.typeModel === configuration.get("completion.autoMode"); + + const useGPUCompletionManual = + configuration.get("completion.manualMode.useGpu") && + this.typeModel === configuration.get("completion.manualMode"); + + const useGpu = + gpuEnabled && + (useGPUCompletionAuto || useGPUCompletionManual || useGPUChat); + + const port = models[this.typeModel].port; this.serverProcess = spawn( serverPath, @@ -126,7 +140,7 @@ class Server { ...(isChatModel ? ["--ctx-size", "16384"] : ["--ctx-size", "4096"]), ...(isBaseModel ? ["--parallel", "4"] : []), ...(isMacArm64 ? ["--nobrowser"] : []), - ...(useGPU ? ["--n-gpu-layers", "100"] : []), + ...(useGpu ? ["--n-gpu-layers", "100"] : []), "--cont-batching", "--embedding", "--log-disable", diff --git a/src/common/utils/configuration.ts b/src/common/utils/configuration.ts index 71695cc..723cbac 100644 --- a/src/common/utils/configuration.ts +++ b/src/common/utils/configuration.ts @@ -14,12 +14,21 @@ const ConfigurationProperties = { "experimental.chat": { default: false, }, + "experimental.chat.useGpu": { + default: false, + }, "completion.autoMode": { default: "base-small", }, - "completion.manuallyMode": { + "completion.autoMode.useGpu": { + default: false, + }, + "completion.manualMode": { default: "base-small", }, + "completion.manualMode.useGpu": { + default: false, + }, } as const; interface ConfigurationPropertiesType @@ -36,12 +45,21 @@ interface ConfigurationPropertiesType "experimental.chat": { possibleValues: boolean; }; + "experimental.chat.useGpu": { + possibleValues: boolean; + }; "completion.autoMode": { possibleValues: TypeModelsBase; }; - "completion.manuallyMode": { + "completion.autoMode.useGpu": { + possibleValues: boolean; + }; + "completion.manualMode": { possibleValues: TypeModelsBase; }; + "completion.manualMode.useGpu": { + possibleValues: boolean; + }; } class Configuration { diff --git a/src/extension.ts b/src/extension.ts index de042fb..a1afc9f 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -71,7 +71,7 @@ export async function activate(context: vscode.ExtensionContext) { [ ...new Set([ configuration.get("completion.autoMode"), - configuration.get("completion.manuallyMode"), + configuration.get("completion.manualMode"), ...(isChatEnabled ? ["chat-medium" as const] : []), ]), ].map((serverType) => servers[serverType].startServer())