diff --git a/package-lock.json b/package-lock.json index 6ba5f95..7e43937 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,6 +24,7 @@ "hnswlib-node": "^1.4.2", "langchain": "^0.0.144", "next": "13.4.4", + "ollama": "^0.4.4", "postcss": "8.4.24", "react": "18.2.0", "react-dom": "18.2.0", @@ -4707,6 +4708,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/ollama": { + "version": "0.4.4", + "resolved": "https://registry.npmjs.org/ollama/-/ollama-0.4.4.tgz", + "integrity": "sha512-vAOZ3LM3DUE98Yy+iUJHu9Y9juKmjDAMntYoe8XqGAEkJkU5ZRkO1lwJxu5sRNU6WkEQ3MoHsvgeVIWr+cQmbg==", + "dependencies": { + "whatwg-fetch": "^3.6.20" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -6460,6 +6469,11 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" }, + "node_modules/whatwg-fetch": { + "version": "3.6.20", + "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.20.tgz", + "integrity": "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==" + }, "node_modules/whatwg-url": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", diff --git a/package.json b/package.json index f4029e3..f3f775b 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "hnswlib-node": "^1.4.2", "langchain": "^0.0.144", "next": "13.4.4", + "ollama": "^0.4.4", "postcss": "8.4.24", "react": "18.2.0", "react-dom": "18.2.0", diff --git a/src/app/api/qa-pg-vector/route.ts b/src/app/api/qa-pg-vector/route.ts index 3048f82..19aa758 100644 --- a/src/app/api/qa-pg-vector/route.ts +++ b/src/app/api/qa-pg-vector/route.ts @@ -3,6 +3,7 @@ import { OpenAIEmbeddings } from "langchain/embeddings/openai"; import { createClient } from "@supabase/supabase-js"; import { OpenAI } from "langchain/llms/openai"; import { Ollama } from "langchain/llms/ollama"; +import ollama from "ollama"; import dotenv from "dotenv"; import { VectorDBQAChain } from "langchain/chains"; import { StreamingTextResponse, LangChainStream } from "ai"; @@ -15,6 +16,11 @@ export async function POST(req: Request) { const { prompt } = await req.json(); const ollama_endpoint = process.env.OLLAMA_URL; const ollama_model = process.env.OLLAMA_MODEL; + //TODO - use this later + const modelfile = ` + FROM ${ollama_model} + SYSTEM "You are a helpful assistant who answers the human's questions like you are a cartoon character. And you are always super happy. + `; const privateKey = process.env.SUPABASE_PRIVATE_KEY; if (!privateKey) throw new Error(`Expected env var SUPABASE_PRIVATE_KEY`); @@ -33,11 +39,6 @@ export async function POST(req: Request) { if (ollama_endpoint) { console.info("Using Ollama"); - model = new Ollama({ - baseUrl: ollama_endpoint, - model: ollama_model ? ollama_model : "ollama", - }); - model.verbose = true; const data = await vectorSearch(client, prompt); const contextData = data.map((d: any) => d.content); @@ -46,9 +47,25 @@ export async function POST(req: Request) { Question: ${prompt} Context: ${JSON.stringify(contextData)}`; - const result = await model.call(modifiedPrompt); - return new Response(result); + const result = await ollama.generate({ + model: ollama_model as string, + prompt: modifiedPrompt, + stream: true, + }); + + const ollamReadableStream = new ReadableStream({ + async start(controller) { + for await (const chunk of result) { + console.log("chunk", chunk.response); + const buffer = new TextEncoder().encode(chunk.response); + controller.enqueue(buffer); + } + controller.close(); + }, + }); + + return new StreamingTextResponse(ollamReadableStream); } else { const vectorStore = await SupabaseVectorStore.fromExistingIndex( new OpenAIEmbeddings({ openAIApiKey: process.env.OPENAI_API_KEY }),