diff --git a/package-lock.json b/package-lock.json
index 6ba5f95..7e43937 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -24,6 +24,7 @@
         "hnswlib-node": "^1.4.2",
         "langchain": "^0.0.144",
         "next": "13.4.4",
+        "ollama": "^0.4.4",
         "postcss": "8.4.24",
         "react": "18.2.0",
         "react-dom": "18.2.0",
@@ -4707,6 +4708,14 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/ollama": {
+      "version": "0.4.4",
+      "resolved": "https://registry.npmjs.org/ollama/-/ollama-0.4.4.tgz",
+      "integrity": "sha512-vAOZ3LM3DUE98Yy+iUJHu9Y9juKmjDAMntYoe8XqGAEkJkU5ZRkO1lwJxu5sRNU6WkEQ3MoHsvgeVIWr+cQmbg==",
+      "dependencies": {
+        "whatwg-fetch": "^3.6.20"
+      }
+    },
     "node_modules/once": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@@ -6460,6 +6469,11 @@
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
       "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
     },
+    "node_modules/whatwg-fetch": {
+      "version": "3.6.20",
+      "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.20.tgz",
+      "integrity": "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg=="
+    },
     "node_modules/whatwg-url": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
diff --git a/package.json b/package.json
index f4029e3..f3f775b 100644
--- a/package.json
+++ b/package.json
@@ -27,6 +27,7 @@
     "hnswlib-node": "^1.4.2",
     "langchain": "^0.0.144",
     "next": "13.4.4",
+    "ollama": "^0.4.4",
     "postcss": "8.4.24",
     "react": "18.2.0",
     "react-dom": "18.2.0",
diff --git a/src/app/api/qa-pg-vector/route.ts b/src/app/api/qa-pg-vector/route.ts
index 3048f82..19aa758 100644
--- a/src/app/api/qa-pg-vector/route.ts
+++ b/src/app/api/qa-pg-vector/route.ts
@@ -3,6 +3,7 @@ import { OpenAIEmbeddings } from "langchain/embeddings/openai";
 import { createClient } from "@supabase/supabase-js";
 import { OpenAI } from "langchain/llms/openai";
 import { Ollama } from "langchain/llms/ollama";
+import ollama from "ollama";
 import dotenv from "dotenv";
 import { VectorDBQAChain } from "langchain/chains";
 import { StreamingTextResponse, LangChainStream } from "ai";
@@ -15,6 +16,11 @@ export async function POST(req: Request) {
   const { prompt } = await req.json();
   const ollama_endpoint = process.env.OLLAMA_URL;
   const ollama_model = process.env.OLLAMA_MODEL;
+  //TODO - use this later
+  const modelfile = `
+  FROM ${ollama_model}
+  SYSTEM "You are a helpful assistant who answers the human's questions like you are a cartoon character. And you are always super happy.
+  `;
 
   const privateKey = process.env.SUPABASE_PRIVATE_KEY;
   if (!privateKey) throw new Error(`Expected env var SUPABASE_PRIVATE_KEY`);
@@ -33,11 +39,6 @@ export async function POST(req: Request) {
 
   if (ollama_endpoint) {
     console.info("Using Ollama");
-    model = new Ollama({
-      baseUrl: ollama_endpoint,
-      model: ollama_model ? ollama_model : "ollama",
-    });
-    model.verbose = true;
     const data = await vectorSearch(client, prompt);
     const contextData = data.map((d: any) => d.content);
 
@@ -46,9 +47,25 @@ export async function POST(req: Request) {
     Question: ${prompt}
     
     Context: ${JSON.stringify(contextData)}`;
-    const result = await model.call(modifiedPrompt);
 
-    return new Response(result);
+    const result = await ollama.generate({
+      model: ollama_model as string,
+      prompt: modifiedPrompt,
+      stream: true,
+    });
+
+    const ollamReadableStream = new ReadableStream({
+      async start(controller) {
+        for await (const chunk of result) {
+          console.log("chunk", chunk.response);
+          const buffer = new TextEncoder().encode(chunk.response);
+          controller.enqueue(buffer);
+        }
+        controller.close();
+      },
+    });
+
+    return new StreamingTextResponse(ollamReadableStream);
   } else {
     const vectorStore = await SupabaseVectorStore.fromExistingIndex(
       new OpenAIEmbeddings({ openAIApiKey: process.env.OPENAI_API_KEY }),