diff --git a/examples/rag-playground/package.json b/examples/rag-playground/package.json
index 0116803..c10f6cc 100644
--- a/examples/rag-playground/package.json
+++ b/examples/rag-playground/package.json
@@ -13,12 +13,14 @@
   },
   "devDependencies": {
     "@floating-ui/dom": "^1.6.1",
+    "@mlc-ai/web-llm": "^0.2.18",
     "@types/d3-array": "^3.2.1",
     "@types/d3-format": "^3.0.4",
     "@types/d3-random": "^3.0.3",
     "@types/d3-time-format": "^4.0.3",
     "@types/flexsearch": "^0.7.6",
     "@typescript-eslint/eslint-plugin": "^6.20.0",
+    "@webgpu/types": "^0.1.40",
     "@xenova/transformers": "^2.14.2",
     "@xiaohk/utils": "^0.0.6",
     "d3-array": "^3.2.4",
@@ -34,6 +36,7 @@
     "flexsearch": "^0.7.43",
     "gh-pages": "^6.1.1",
     "gpt-tokenizer": "^2.1.2",
+    "idb-keyval": "^6.2.1",
     "lit": "^3.1.2",
     "prettier": "^3.2.4",
     "typescript": "^5.3.3",
diff --git a/examples/rag-playground/src/components/playground/playground.ts b/examples/rag-playground/src/components/playground/playground.ts
index bdb837f..350853a 100644
--- a/examples/rag-playground/src/components/playground/playground.ts
+++ b/examples/rag-playground/src/components/playground/playground.ts
@@ -2,8 +2,23 @@ import { LitElement, css, unsafeCSS, html, PropertyValues } from 'lit';
 import { customElement, property, state, query } from 'lit/decorators.js';
 import { unsafeHTML } from 'lit/directives/unsafe-html.js';
 import { EmbeddingModel } from '../../workers/embedding';
+import {
+  UserConfigManager,
+  UserConfig,
+  SupportedRemoteModel,
+  SupportedLocalModel,
+  supportedModelReverseLookup,
+  ModelFamily
+} from './user-config';
+import { textGenGpt } from '../../llms/gpt';
+import { textGenMememo } from '../../llms/mememo-gen';
+import { textGenGemini } from '../../llms/gemini';
+import TextGenLocalWorkerInline from '../../llms/web-llm?worker&inline';
+
+import type { TextGenMessage } from '../../llms/gpt';
 import type { EmbeddingWorkerMessage } from '../../workers/embedding';
 import type { MememoTextViewer } from '../text-viewer/text-viewer';
+import type { TextGenLocalWorkerMessage } from '../../llms/web-llm';
 
 import '../query-box/query-box';
 import '../prompt-box/prompt-box';
@@ -35,6 +50,9 @@ const datasets: Record<Dataset, DatasetInfo> = {
   }
 };
 
+const DEV_MODE = import.meta.env.DEV;
+const USE_CACHE = true && DEV_MODE;
+
 /**
  * Playground element.
  *
@@ -63,6 +81,18 @@ export class MememoPlayground extends LitElement {
   @query('mememo-text-viewer')
   textViewerComponent: MememoTextViewer | undefined | null;
 
+  @state()
+  userConfigManager: UserConfigManager;
+
+  @state()
+  userConfig!: UserConfig;
+
+  @property({ attribute: false })
+  textGenLocalWorker: Worker;
+  textGenLocalWorkerResolve = (
+    value: TextGenMessage | PromiseLike<TextGenMessage>
+  ) => {};
+
   //==========================================================================||
   //                             Lifecycle Methods                            ||
   //==========================================================================||
@@ -76,6 +106,15 @@ export class MememoPlayground extends LitElement {
         this.embeddingWorkerMessageHandler(e);
       }
     );
+
+    // Initialize the local llm worker
+    this.textGenLocalWorker = new TextGenLocalWorkerInline();
+
+    // Set up the user config store
+    const updateUserConfig = (userConfig: UserConfig) => {
+      this.userConfig = userConfig;
+    };
+    this.userConfigManager = new UserConfigManager(updateUserConfig);
   }
 
   /**
@@ -129,6 +168,10 @@ export class MememoPlayground extends LitElement {
   //==========================================================================||
   //                              Event Handlers                              ||
   //==========================================================================||
+  /**
+   * Start extracting embeddings form the user query
+   * @param e Event
+   */
   userQueryRunClickHandler(e: CustomEvent<string>) {
     this.userQuery = e.detail;
 
@@ -136,6 +179,17 @@ export class MememoPlayground extends LitElement {
     this.getEmbedding([this.userQuery]);
   }
 
+  /**
+   * Run the prompt using external AI services or local LLM
+   * @param e Event
+   */
+  promptRunClickHandler(e: CustomEvent<string>) {
+    const prompt = e.detail;
+
+    // Run the prompt
+    this._runPrompt(prompt);
+  }
+
   semanticSearchFinishedHandler(e: CustomEvent<string[]>) {
     this.relevantDocuments = e.detail;
   }
@@ -164,6 +218,117 @@ export class MememoPlayground extends LitElement {
   //==========================================================================||
   //                             Private Helpers                              ||
   //==========================================================================||
+  /**
+   * Run the given prompt using the preferred model
+   * @returns A promise of the prompt inference
+   */
+  _runPrompt(curPrompt: string, temperature = 0.2) {
+    let runRequest: Promise<TextGenMessage>;
+
+    switch (this.userConfig.preferredLLM) {
+      case SupportedRemoteModel['gpt-3.5']: {
+        runRequest = textGenGpt(
+          this.userConfig.llmAPIKeys[ModelFamily.openAI],
+          'text-gen',
+          curPrompt,
+          temperature,
+          'gpt-3.5-turbo',
+          USE_CACHE
+        );
+        break;
+      }
+
+      case SupportedRemoteModel['gpt-4']: {
+        runRequest = textGenGpt(
+          this.userConfig.llmAPIKeys[ModelFamily.openAI],
+          'text-gen',
+          curPrompt,
+          temperature,
+          'gpt-4-1106-preview',
+          USE_CACHE
+        );
+        break;
+      }
+
+      case SupportedRemoteModel['gemini-pro']: {
+        runRequest = textGenGemini(
+          this.userConfig.llmAPIKeys[ModelFamily.google],
+          'text-gen',
+          curPrompt,
+          temperature,
+          USE_CACHE
+        );
+        break;
+      }
+
+      // case SupportedLocalModel['mistral-7b-v0.2']:
+      // case SupportedLocalModel['gpt-2']:
+      case SupportedLocalModel['phi-2']:
+      case SupportedLocalModel['llama-2-7b']:
+      case SupportedLocalModel['tinyllama-1.1b']: {
+        runRequest = new Promise<TextGenMessage>(resolve => {
+          this.textGenLocalWorkerResolve = resolve;
+        });
+        const message: TextGenLocalWorkerMessage = {
+          command: 'startTextGen',
+          payload: {
+            apiKey: '',
+            prompt: curPrompt,
+            requestID: '',
+            temperature: temperature
+          }
+        };
+        this.textGenLocalWorker.postMessage(message);
+        break;
+      }
+
+      case SupportedRemoteModel['gpt-3.5-free']: {
+        runRequest = textGenMememo(
+          'text-gen',
+          curPrompt,
+          temperature,
+          'gpt-3.5-free',
+          USE_CACHE
+        );
+        break;
+      }
+
+      default: {
+        console.error('Unknown case ', this.userConfig.preferredLLM);
+        runRequest = textGenMememo(
+          'text-gen',
+          curPrompt,
+          temperature,
+          'gpt-3.5-free',
+          USE_CACHE
+        );
+      }
+    }
+
+    runRequest.then(
+      message => {
+        switch (message.command) {
+          case 'finishTextGen': {
+            // Success
+            if (DEV_MODE) {
+              console.info(
+                `Finished running prompt with [${this.userConfig.preferredLLM}]`
+              );
+              console.info(message.payload.result);
+            }
+
+            const output = message.payload.result;
+            break;
+          }
+
+          case 'error': {
+            console.error(message.payload.message);
+          }
+        }
+      },
+      () => {}
+    );
+  }
 
   //==========================================================================||
   //                           Templates and Styles                           ||
@@ -198,7 +363,9 @@ export class MememoPlayground extends LitElement {
             template=${promptTemplate[Dataset.Arxiv]}
             userQuery=${this.userQuery}
             .relevantDocuments=${this.relevantDocuments}
-            @runButtonClicked=${(e: CustomEvent<string>) => {}}
+            @runButtonClicked=${(e: CustomEvent<string>) => {
+              this.promptRunClickHandler(e);
+            }}
           ></mememo-prompt-box>
         </div>
 
diff --git a/examples/rag-playground/src/components/playground/user-config.ts b/examples/rag-playground/src/components/playground/user-config.ts
new file mode 100644
index 0000000..0840607
--- /dev/null
+++ b/examples/rag-playground/src/components/playground/user-config.ts
@@ -0,0 +1,149 @@
+import { get, set, del, clear } from 'idb-keyval';
+
+const PREFIX = 'user-config';
+
+export enum SupportedLocalModel {
+  'llama-2-7b' = 'Llama 2 (7B)',
+  // 'mistral-7b-v0.2' = 'Mistral (7B)',
+  'phi-2' = 'Phi 2 (2.7B)',
+  'tinyllama-1.1b' = 'TinyLlama (1.1B)'
+  // 'gpt-2' = 'GPT 2 (124M)'
+}
+
+export enum SupportedRemoteModel {
+  'gpt-3.5-free' = 'GPT 3.5 (free)',
+  'gpt-3.5' = 'GPT 3.5',
+  'gpt-4' = 'GPT 4',
+  'gemini-pro' = 'Gemini Pro'
+}
+
+export const supportedModelReverseLookup: Record<
+  SupportedRemoteModel | SupportedLocalModel,
+  keyof typeof SupportedRemoteModel | keyof typeof SupportedLocalModel
+> = {
+  [SupportedRemoteModel['gpt-3.5-free']]: 'gpt-3.5-free',
+  [SupportedRemoteModel['gpt-3.5']]: 'gpt-3.5',
+  [SupportedRemoteModel['gpt-4']]: 'gpt-4',
+  [SupportedRemoteModel['gemini-pro']]: 'gemini-pro',
+  [SupportedLocalModel['tinyllama-1.1b']]: 'tinyllama-1.1b',
+  [SupportedLocalModel['llama-2-7b']]: 'llama-2-7b',
+  [SupportedLocalModel['phi-2']]: 'phi-2'
+  // [SupportedLocalModel['gpt-2']]: 'gpt-2'
+  // [SupportedLocalModel['mistral-7b-v0.2']]: 'mistral-7b-v0.2'
+};
+
+export enum ModelFamily {
+  google = 'Google',
+  openAI = 'Open AI',
+  local = 'Local'
+}
+
+export const modelFamilyMap: Record<
+  SupportedRemoteModel | SupportedLocalModel,
+  ModelFamily
+> = {
+  [SupportedRemoteModel['gpt-3.5']]: ModelFamily.openAI,
+  [SupportedRemoteModel['gpt-3.5-free']]: ModelFamily.openAI,
+  [SupportedRemoteModel['gpt-4']]: ModelFamily.openAI,
+  [SupportedRemoteModel['gemini-pro']]: ModelFamily.google,
+  [SupportedLocalModel['tinyllama-1.1b']]: ModelFamily.local,
+  [SupportedLocalModel['llama-2-7b']]: ModelFamily.local,
+  // [SupportedLocalModel['gpt-2']]: ModelFamily.local
+  // [SupportedLocalModel['mistral-7b-v0.2']]: ModelFamily.local
+  [SupportedLocalModel['phi-2']]: ModelFamily.local
+};
+
+export interface UserConfig {
+  llmAPIKeys: Record<ModelFamily, string>;
+  preferredLLM: SupportedRemoteModel | SupportedLocalModel;
+}
+
+export class UserConfigManager {
+  restoreFinished: Promise<void>;
+  updateUserConfig: (userConfig: UserConfig) => void;
+
+  #llmAPIKeys: Record<ModelFamily, string>;
+  #preferredLLM: SupportedRemoteModel | SupportedLocalModel;
+
+  constructor(updateUserConfig: (userConfig: UserConfig) => void) {
+    this.updateUserConfig = updateUserConfig;
+
+    this.#llmAPIKeys = {
+      [ModelFamily.openAI]: '',
+      [ModelFamily.google]: '',
+      [ModelFamily.local]: ''
+    };
+    this.#preferredLLM = SupportedRemoteModel['gpt-3.5-free'];
+    this._broadcastUserConfig();
+
+    this.restoreFinished = this._restoreFromStorage();
+
+    // this._cleanStorage();
+  }
+
+  setAPIKey(modelFamily: ModelFamily, key: string) {
+    this.#llmAPIKeys[modelFamily] = key;
+    this._syncStorage().then(
+      () => {},
+      () => {}
+    );
+    this._broadcastUserConfig();
+  }
+
+  setPreferredLLM(model: SupportedRemoteModel | SupportedLocalModel) {
+    this.#preferredLLM = model;
+    this._syncStorage().then(
+      () => {},
+      () => {}
+    );
+    this._broadcastUserConfig();
+  }
+
+  /**
+   * Reconstruct the prompts from the local storage.
+   */
+  async _restoreFromStorage() {
+    // Restore the local prompts
+    const config = (await get(PREFIX)) as UserConfig | undefined;
+    if (config) {
+      this.#llmAPIKeys = config.llmAPIKeys;
+      this.#preferredLLM = config.preferredLLM;
+    }
+    this._broadcastUserConfig();
+  }
+
+  /**
+   * Store the current config to local storage
+   */
+  async _syncStorage() {
+    const config = this._constructConfig();
+    await set(PREFIX, config);
+  }
+
+  /**
+   * Create a copy of the user config
+   * @returns User config
+   */
+  _constructConfig(): UserConfig {
+    const config: UserConfig = {
+      llmAPIKeys: this.#llmAPIKeys,
+      preferredLLM: this.#preferredLLM
+    };
+    return config;
+  }
+
+  /**
+   * Clean the local storage
+   */
+  async _cleanStorage() {
+    await del(PREFIX);
+  }
+
+  /**
+   * Update the public user config
+   */
+  _broadcastUserConfig() {
+    const newConfig = this._constructConfig();
+    this.updateUserConfig(newConfig);
+  }
+}
diff --git a/examples/rag-playground/src/components/prompt-box/prompt-box.css b/examples/rag-playground/src/components/prompt-box/prompt-box.css
index 0f96dc9..8dbff2f 100644
--- a/examples/rag-playground/src/components/prompt-box/prompt-box.css
+++ b/examples/rag-playground/src/components/prompt-box/prompt-box.css
@@ -42,13 +42,12 @@ textarea {
   flex-direction: row;
   justify-content: center;
   align-items: center;
-  gap: 12px;
+  gap: 10px;
 
   .text-group {
     display: flex;
-    gap: 7px;
+    gap: 5px;
     align-items: baseline;
-    overflow: hidden;
   }
 
   .text {
diff --git a/examples/rag-playground/src/components/prompt-box/prompt-box.ts b/examples/rag-playground/src/components/prompt-box/prompt-box.ts
index acdad9f..b283289 100644
--- a/examples/rag-playground/src/components/prompt-box/prompt-box.ts
+++ b/examples/rag-playground/src/components/prompt-box/prompt-box.ts
@@ -95,7 +95,7 @@ export class MememoPromptBox extends LitElement {
     const event = new CustomEvent('runButtonClicked', {
       bubbles: true,
       composed: true,
-      detail: this.template
+      detail: this.prompt
     });
     this.dispatchEvent(event);
   }
@@ -125,7 +125,7 @@ export class MememoPromptBox extends LitElement {
               run
             </button>
 
-            <button @click=${() => this.runButtonClicked()}>
+            <button>
               <span class="svg-icon">${unsafeHTML(expandIcon)}</span>
               view
             </button>
diff --git a/examples/rag-playground/src/components/query-box/query-box.css b/examples/rag-playground/src/components/query-box/query-box.css
index 14c40ac..ccf8a6b 100644
--- a/examples/rag-playground/src/components/query-box/query-box.css
+++ b/examples/rag-playground/src/components/query-box/query-box.css
@@ -42,7 +42,7 @@ textarea {
   flex-direction: row;
   justify-content: center;
   align-items: center;
-  gap: 12px;
+  gap: 10px;
 
   .text {
     font-weight: 800;
diff --git a/examples/rag-playground/src/llms/gemini.ts b/examples/rag-playground/src/llms/gemini.ts
new file mode 100644
index 0000000..296e1bf
--- /dev/null
+++ b/examples/rag-playground/src/llms/gemini.ts
@@ -0,0 +1,141 @@
+import { HarmCategory, HarmBlockThreshold } from '../types/gemini-api-types';
+import type { TextGenMessage } from './gpt';
+import type {
+  GeminiGenerateTextRequestBody,
+  GeminiGenerateTextResponseBody,
+  SafetySetting
+} from '../types/gemini-api-types';
+
+/**
+ * Use Gemini API to generate text based on a given prompt
+ * @param apiKey Gemini API key
+ * @param requestID Worker request ID
+ * @param prompt Prompt to give to the Gemini model
+ * @param temperature Model temperature
+ * @param useCache Whether to use local cache
+ * @param stopSequences Strings to stop the generation
+ * @param detail Extra string information to include (will be returned)
+ */
+export const textGenGemini = async (
+  apiKey: string,
+  requestID: string,
+  prompt: string,
+  temperature: number,
+  useCache: boolean = false,
+  stopSequences: string[] = [],
+  detail: string = ''
+) => {
+  // Configure safety setting to allow low-probability unsafe responses
+  const safetySettings: SafetySetting[] = [
+    {
+      category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_HARASSMENT,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    }
+  ];
+
+  const parameter: GeminiGenerateTextRequestBody = {
+    contents: [
+      {
+        parts: [
+          {
+            text: prompt
+          }
+        ]
+      }
+    ],
+    safetySettings,
+    generationConfig: {
+      temperature,
+      stopSequences
+    }
+  };
+
+  // Check if the model output is cached
+  const cachedValue = localStorage.getItem('[gemini]' + prompt);
+  if (useCache && cachedValue !== null) {
+    console.log('Use cached output (text gen)');
+    await new Promise(resolve => setTimeout(resolve, 1000));
+    const message: TextGenMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID,
+        apiKey,
+        result: cachedValue,
+        prompt: prompt,
+        detail: detail
+      }
+    };
+    return message;
+  }
+
+  const model = 'gemini-pro';
+  let url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
+  const urlParam = new URLSearchParams();
+  urlParam.append('key', apiKey);
+  url += `?${urlParam.toString()}`;
+
+  const requestOptions: RequestInit = {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(parameter)
+  };
+
+  try {
+    const response = await fetch(url, requestOptions);
+    const data = (await response.json()) as GeminiGenerateTextResponseBody;
+    if (response.status !== 200) {
+      throw Error('Gemini API error' + JSON.stringify(data));
+    }
+
+    if (data.candidates === undefined) {
+      console.error(
+        'Gemini API is blocked, feedback: ',
+        data.promptFeedback.safetyRatings,
+        data
+      );
+      throw Error('Gemini API Error' + JSON.stringify(data));
+    }
+
+    // Send back the data to the main thread
+    const result = data.candidates[0].content.parts[0].text;
+    const message: TextGenMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID,
+        apiKey,
+        result,
+        prompt: prompt,
+        detail: detail
+      }
+    };
+
+    // Also cache the model output
+    if (useCache && localStorage.getItem('[gemini]' + prompt) === null) {
+      localStorage.setItem('[gemini]' + prompt, result);
+    }
+    return message;
+  } catch (error) {
+    // Throw the error to the main thread
+    const message: TextGenMessage = {
+      command: 'error',
+      payload: {
+        requestID,
+        originalCommand: 'startTextGen',
+        message: error as string
+      }
+    };
+    return message;
+  }
+};
diff --git a/examples/rag-playground/src/llms/gpt.ts b/examples/rag-playground/src/llms/gpt.ts
new file mode 100644
index 0000000..a3e289c
--- /dev/null
+++ b/examples/rag-playground/src/llms/gpt.ts
@@ -0,0 +1,131 @@
+import type {
+  ChatCompletionRequest,
+  ChatCompletion,
+  ChatMessage
+} from '../types/gpt-types';
+
+export type TextGenMessage =
+  | {
+      command: 'finishTextGen';
+      payload: {
+        requestID: string;
+        apiKey: string;
+        result: string;
+        prompt: string;
+        detail: string;
+      };
+    }
+  | {
+      command: 'error';
+      payload: {
+        requestID: string;
+        originalCommand: string;
+        message: string;
+      };
+    };
+
+/**
+ * Use GPT API to generate text based on a given prompt
+ * @param apiKey GPT API key
+ * @param requestID Worker request ID
+ * @param prompt Prompt to give to the GPT model
+ * @param temperature Model temperature
+ * @param stopSequences Strings to stop the generation
+ * @param detail Extra string information to include (will be returned)
+ * @param model GPT 3.5 or GPT 4
+ */
+export const textGenGpt = async (
+  apiKey: string,
+  requestID: string,
+  prompt: string,
+  temperature: number,
+  model: 'gpt-3.5-turbo' | 'gpt-4-1106-preview',
+  useCache: boolean = false,
+  stopSequences: string[] = [],
+  detail: string = ''
+) => {
+  // Compile the prompt into a chat format
+  const message: ChatMessage = {
+    role: 'user',
+    content: prompt
+  };
+
+  const body: ChatCompletionRequest = {
+    model,
+    messages: [message],
+    temperature,
+    stop: stopSequences
+  };
+
+  // Check if the model output is cached
+  const cachedValue = localStorage.getItem('[gpt]' + prompt);
+  if (useCache && cachedValue !== null) {
+    console.log('Use cached output (text gen)');
+    await new Promise(resolve => setTimeout(resolve, 1000));
+    // await new Promise(resolve => setTimeout(resolve, 100000));
+    const message: TextGenMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID,
+        apiKey,
+        result: cachedValue,
+        prompt: prompt,
+        detail: detail
+      }
+    };
+    return message;
+  }
+
+  const url = 'https://api.openai.com/v1/chat/completions';
+
+  const requestOptions: RequestInit = {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'Access-Control-Allow-Origin': '*',
+      Authorization: `Bearer ${apiKey}`
+    },
+    body: JSON.stringify(body)
+  };
+
+  try {
+    const response = await fetch(url, requestOptions);
+    const data = (await response.json()) as ChatCompletion;
+    if (response.status !== 200) {
+      throw Error('GPT API error' + JSON.stringify(data));
+    }
+
+    if (data.choices.length < 1) {
+      throw Error('GPT API error' + JSON.stringify(data));
+    }
+
+    // Send back the data to the main thread
+    const message: TextGenMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID,
+        apiKey,
+        result: data.choices[0].message.content,
+        prompt: prompt,
+        detail: detail
+      }
+    };
+
+    // Also cache the model output
+    if (useCache && localStorage.getItem('[gpt]' + prompt) === null) {
+      localStorage.setItem('[gpt]' + prompt, data.choices[0].message.content);
+    }
+    return message;
+  } catch (error) {
+    // Throw the error to the main thread
+    const message: TextGenMessage = {
+      command: 'error',
+      payload: {
+        requestID,
+        originalCommand: 'startTextGen',
+        message: error as string
+      }
+    };
+    return message;
+  }
+};
diff --git a/examples/rag-playground/src/llms/mememo-gen.ts b/examples/rag-playground/src/llms/mememo-gen.ts
new file mode 100644
index 0000000..e6baab1
--- /dev/null
+++ b/examples/rag-playground/src/llms/mememo-gen.ts
@@ -0,0 +1,138 @@
+import type { TextGenMessage } from './gpt';
+import type { ChatCompletion } from '../types/gpt-types';
+import {
+  SupportedRemoteModel,
+  SupportedLocalModel
+} from '../components/playground/user-config';
+
+export type PromptRunSuccessResponse = {
+  command: 'finishTextGen';
+  completion: ChatCompletion;
+  payload: {
+    result: string;
+    fullPrompt: string;
+    detail: string;
+  };
+};
+
+export type PromptRunErrorResponse = {
+  message: string;
+};
+
+export interface PromptRunPostBody {
+  prompt: string;
+  temperature: number;
+  model: keyof typeof SupportedRemoteModel | keyof typeof SupportedLocalModel;
+}
+
+const ENDPOINT =
+  'https://o2r71c10cd.execute-api.localhost.localstack.cloud:4566/prod/run';
+
+/**
+ * Use mememo to generate text based on a given prompt
+ * @param requestID Worker request ID
+ * @param prompt Prompt prefix
+ * @param inputText Input text
+ * @param temperature Model temperature
+ * @param userID User ID
+ * @param model The model to use
+ * @param detail Extra string information to include (will be returned)
+ */
+export const textGenMememo = async (
+  requestID: string,
+  prompt: string,
+  temperature: number,
+  model: keyof typeof SupportedRemoteModel | keyof typeof SupportedLocalModel,
+  useCache: boolean = false,
+  detail: string = ''
+): Promise<TextGenMessage> => {
+  // Check if the model output is cached
+  const cachedValue = localStorage.getItem('[mememo]' + prompt);
+  if (useCache && cachedValue !== null) {
+    console.log('Use cached output (text gen)');
+    await new Promise(resolve => setTimeout(resolve, 1000));
+    const message: TextGenMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID: '',
+        apiKey: '',
+        result: cachedValue,
+        prompt: prompt,
+        detail: detail
+      }
+    };
+    return message;
+  }
+
+  // Run the prompt through mememo API
+  const body: PromptRunPostBody = {
+    prompt,
+    temperature,
+    model
+  };
+
+  const url = new URL(ENDPOINT);
+  url.searchParams.append('type', 'run');
+
+  const requestOptions: RequestInit = {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json'
+    },
+    credentials: 'include',
+    body: JSON.stringify(body)
+  };
+
+  try {
+    const response = await fetch(url.toString(), requestOptions);
+    const data = (await response.json()) as
+      | PromptRunSuccessResponse
+      | PromptRunErrorResponse;
+    if (response.status !== 200) {
+      // Throw the error to the main thread
+      const errorData = data as PromptRunErrorResponse;
+      const message: TextGenMessage = {
+        command: 'error',
+        payload: {
+          requestID: requestID,
+          originalCommand: 'startTextGen',
+          message: errorData.message
+        }
+      };
+      return message;
+    }
+
+    const successData = data as PromptRunSuccessResponse;
+    // Send back the data to the main thread
+    const message: TextGenMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID: '',
+        apiKey: '',
+        result: successData.payload.result,
+        prompt: successData.payload.fullPrompt,
+        detail: detail
+      }
+    };
+
+    // Also cache the model output
+    if (useCache) {
+      if (localStorage.getItem('[mememo]' + prompt) === null) {
+        localStorage.setItem('[mememo]' + prompt, successData.payload.result);
+      }
+    }
+
+    return message;
+  } catch (error) {
+    // Throw the error to the main thread
+    const message: TextGenMessage = {
+      command: 'error',
+      payload: {
+        requestID: requestID,
+        originalCommand: 'startTextGen',
+        message: error as string
+      }
+    };
+    return message;
+  }
+};
diff --git a/examples/rag-playground/src/llms/palm.ts b/examples/rag-playground/src/llms/palm.ts
new file mode 100644
index 0000000..a749312
--- /dev/null
+++ b/examples/rag-playground/src/llms/palm.ts
@@ -0,0 +1,137 @@
+import type { TextGenMessage } from './gpt';
+import { HarmCategory, HarmBlockThreshold } from '../types/palm-api-types';
+import type {
+  PalmGenerateTextRequestBody,
+  PalmGenerateTextResponseBody,
+  SafetySetting
+} from '../types/palm-api-types';
+
+/**
+ * Use PaLM API to generate text based on a given prompt
+ * @param apiKey PaLM API key
+ * @param requestID Worker request ID
+ * @param prompt Prompt to give to the PaLM model
+ * @param temperature Model temperature
+ * @param stopSequences Strings to stop the generation
+ * @param detail Extra string information to include (will be returned)
+ */
+export const textGenPalm = async (
+  apiKey: string,
+  requestID: string,
+  prompt: string,
+  temperature: number,
+  useCache: boolean = true,
+  stopSequences: string[] = [],
+  detail: string = ''
+) => {
+  // Configure safety setting to allow low-probability unsafe responses
+  const safetySettings: SafetySetting[] = [
+    {
+      category: HarmCategory.HARM_CATEGORY_DANGEROUS,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_DEROGATORY,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_MEDICAL,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_SEXUAL,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_TOXICITY,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_UNSPECIFIED,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    },
+    {
+      category: HarmCategory.HARM_CATEGORY_VIOLENCE,
+      threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH
+    }
+  ];
+
+  const parameter: PalmGenerateTextRequestBody = {
+    prompt: { text: prompt },
+    safetySettings,
+    temperature,
+    stopSequences
+  };
+
+  // Check if the model output is cached
+  const cachedValue = localStorage.getItem('[palm]' + prompt);
+  if (useCache && cachedValue !== null) {
+    console.log('Use cached output (text gen)');
+    await new Promise(resolve => setTimeout(resolve, 1000));
+    const message: TextGenMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID,
+        apiKey,
+        result: cachedValue,
+        prompt: prompt,
+        detail: detail
+      }
+    };
+    return message;
+  }
+
+  const model = 'text-bison-001';
+  let url = `https://generativelanguage.googleapis.com/v1beta2/models/${model}:generateText`;
+  const urlParam = new URLSearchParams();
+  urlParam.append('key', apiKey);
+  url += `?${urlParam.toString()}`;
+
+  const requestOptions: RequestInit = {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(parameter)
+  };
+
+  try {
+    const response = await fetch(url, requestOptions);
+    const data = (await response.json()) as PalmGenerateTextResponseBody;
+    if (response.status !== 200) {
+      throw Error('PaLM API error' + JSON.stringify(data));
+    }
+
+    if (data.candidates === undefined) {
+      console.error('PaLM API is blocked, feedback: ', data.filters[0], data);
+      throw Error('PaLM API Error' + JSON.stringify(data));
+    }
+
+    // Send back the data to the main thread
+    const message: TextGenMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID,
+        apiKey,
+        result: data.candidates[0].output,
+        prompt: prompt,
+        detail: detail
+      }
+    };
+
+    // Also cache the model output
+    if (localStorage.getItem('[palm]' + prompt) === null) {
+      localStorage.setItem('[palm]' + prompt, data.candidates[0].output);
+    }
+    return message;
+  } catch (error) {
+    // Throw the error to the main thread
+    const message: TextGenMessage = {
+      command: 'error',
+      payload: {
+        requestID,
+        originalCommand: 'startTextGen',
+        message: error as string
+      }
+    };
+    return message;
+  }
+};
diff --git a/examples/rag-playground/src/llms/web-llm.ts b/examples/rag-playground/src/llms/web-llm.ts
new file mode 100644
index 0000000..8a70ccb
--- /dev/null
+++ b/examples/rag-playground/src/llms/web-llm.ts
@@ -0,0 +1,374 @@
+import * as webllm from '@mlc-ai/web-llm';
+import { SupportedLocalModel } from '../components/playground/user-config';
+import type { TextGenWorkerMessage } from '../types/common-types';
+import type { ConvTemplateConfig } from '@mlc-ai/web-llm/lib/config';
+
+export type TextGenLocalWorkerMessage =
+  | TextGenWorkerMessage
+  | {
+      command: 'progressLoadModel';
+      payload: {
+        progress: number;
+        timeElapsed: number;
+      };
+    }
+  | {
+      command: 'startLoadModel';
+      payload: {
+        temperature: number;
+        model: SupportedLocalModel;
+      };
+    }
+  | {
+      command: 'finishLoadModel';
+      payload: {
+        temperature: number;
+        model: SupportedLocalModel;
+      };
+    };
+
+//==========================================================================||
+//                          Worker Initialization                           ||
+//==========================================================================||
+const APP_CONFIGS: webllm.AppConfig = {
+  model_list: [
+    {
+      model_url:
+        'https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/',
+      local_id: 'TinyLlama-1.1B-Chat-v0.4-q4f16_1',
+      model_lib_url:
+        'https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/TinyLlama-1.1B-Chat-v0.4/TinyLlama-1.1B-Chat-v0.4-q4f16_1-ctx1k-webgpu.wasm'
+    },
+    {
+      model_url:
+        'https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/',
+      local_id: 'Llama-2-7b-chat-hf-q4f16_1',
+      model_lib_url:
+        'https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf/Llama-2-7b-chat-hf-q4f16_1-ctx1k-webgpu.wasm'
+    },
+    {
+      model_url: 'https://huggingface.co/mlc-ai/gpt2-q0f16-MLC/resolve/main/',
+      local_id: 'gpt2-q0f16',
+      model_lib_url:
+        'https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gpt2/gpt2-q0f16-ctx1k-webgpu.wasm'
+    },
+    {
+      model_url:
+        'https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q3f16_1-MLC/resolve/main/',
+      local_id: 'Mistral-7B-Instruct-v0.2-q3f16_1',
+      model_lib_url:
+        'https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.2/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm'
+    },
+    {
+      model_url:
+        'https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/',
+      local_id: 'Phi2-q4f16_1',
+      model_lib_url:
+        'https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/phi-2/phi-2-q4f16_1-ctx2k-webgpu.wasm',
+      vram_required_MB: 3053.97,
+      low_resource_required: false,
+      required_features: ['shader-f16']
+    }
+  ]
+};
+
+const CONV_TEMPLATES: Record<
+  SupportedLocalModel,
+  Partial<ConvTemplateConfig>
+> = {
+  [SupportedLocalModel['tinyllama-1.1b']]: {
+    system: '<|im_start|><|im_end|> ',
+    roles: ['<|im_start|>user', '<|im_start|>assistant'],
+    offset: 0,
+    seps: ['', ''],
+    separator_style: 'Two',
+    stop_str: '<|im_end|>',
+    add_bos: false,
+    stop_tokens: [2]
+  },
+  [SupportedLocalModel['llama-2-7b']]: {
+    system: '[INST] <<SYS>><</SYS>>\n\n ',
+    roles: ['[INST]', '[/INST]'],
+    offset: 0,
+    seps: [' ', ' '],
+    separator_style: 'Two',
+    stop_str: '[INST]',
+    add_bos: true,
+    stop_tokens: [2]
+  },
+  [SupportedLocalModel['phi-2']]: {
+    system: '',
+    roles: ['Instruct', 'Output'],
+    offset: 0,
+    seps: ['\n'],
+    separator_style: 'Two',
+    stop_str: '<|endoftext|>',
+    add_bos: false,
+    stop_tokens: [50256]
+  }
+};
+
+const modelMap: Record<SupportedLocalModel, string> = {
+  [SupportedLocalModel['tinyllama-1.1b']]: 'TinyLlama-1.1B-Chat-v0.4-q4f16_1',
+  [SupportedLocalModel['llama-2-7b']]: 'Llama-2-7b-chat-hf-q4f16_1',
+  [SupportedLocalModel['phi-2']]: 'Phi2-q4f16_1'
+  // [SupportedLocalModel['gpt-2']]: 'gpt2-q0f16'
+  // [SupportedLocalModel['mistral-7b-v0.2']]: 'Mistral-7B-Instruct-v0.2-q3f16_1'
+};
+
+const chat = new webllm.ChatModule();
+
+// To reset temperature, WebLLM requires to reload the model. Therefore, we just
+// fix the temperature for now.
+let _temperature = 0.2;
+
+let _modelLoadingComplete: Promise<void> | null = null;
+
+chat.setInitProgressCallback((report: webllm.InitProgressReport) => {
+  // Update the main thread about the progress
+  console.log(report.text);
+  const message: TextGenLocalWorkerMessage = {
+    command: 'progressLoadModel',
+    payload: {
+      progress: report.progress,
+      timeElapsed: report.timeElapsed
+    }
+  };
+  postMessage(message);
+});
+
+//==========================================================================||
+//                          Worker Event Handlers                           ||
+//==========================================================================||
+
+/**
+ * Helper function to handle calls from the main thread
+ * @param e Message event
+ */
+self.onmessage = (e: MessageEvent<TextGenLocalWorkerMessage>) => {
+  switch (e.data.command) {
+    case 'startLoadModel': {
+      startLoadModel(e.data.payload.model, e.data.payload.temperature).then(
+        () => {},
+        () => {}
+      );
+      break;
+    }
+
+    case 'startTextGen': {
+      startTextGen(e.data.payload.prompt, e.data.payload.temperature).then(
+        () => {},
+        () => {}
+      );
+      break;
+    }
+
+    default: {
+      console.error('Worker: unknown message', e.data.command);
+      break;
+    }
+  }
+};
+
+/**
+ * Reload a WebLLM model
+ * @param model Local LLM model
+ * @param temperature LLM temperature for all subsequent generation
+ */
+const startLoadModel = async (
+  model: SupportedLocalModel,
+  temperature: number
+) => {
+  _temperature = temperature;
+  const curModel = modelMap[model];
+  const chatOption: webllm.ChatOptions = {
+    temperature: temperature,
+    conv_config: CONV_TEMPLATES[model],
+    conv_template: 'custom'
+  };
+  _modelLoadingComplete = chat.reload(curModel, chatOption, APP_CONFIGS);
+  await _modelLoadingComplete;
+
+  try {
+    // Send back the data to the main thread
+    const message: TextGenLocalWorkerMessage = {
+      command: 'finishLoadModel',
+      payload: {
+        model,
+        temperature
+      }
+    };
+    postMessage(message);
+  } catch (error) {
+    // Throw the error to the main thread
+    const message: TextGenLocalWorkerMessage = {
+      command: 'error',
+      payload: {
+        requestID: 'web-llm',
+        originalCommand: 'startLoadModel',
+        message: error as string
+      }
+    };
+    postMessage(message);
+  }
+};
+
+/**
+ * Use Web LLM to generate text based on a given prompt
+ * @param prompt Prompt to give to the PaLM model
+ * @param temperature Model temperature
+ */
+const startTextGen = async (prompt: string, temperature: number) => {
+  try {
+    if (_modelLoadingComplete) {
+      await _modelLoadingComplete;
+    }
+
+    const response = await chat.generate(prompt);
+
+    // Reset the chat cache to avoid memorizing previous messages
+    await chat.resetChat();
+
+    // Send back the data to the main thread
+    const message: TextGenLocalWorkerMessage = {
+      command: 'finishTextGen',
+      payload: {
+        requestID: 'web-llm',
+        apiKey: '',
+        result: response,
+        prompt: prompt,
+        detail: ''
+      }
+    };
+    postMessage(message);
+  } catch (error) {
+    // Throw the error to the main thread
+    const message: TextGenLocalWorkerMessage = {
+      command: 'error',
+      payload: {
+        requestID: 'web-llm',
+        originalCommand: 'startTextGen',
+        message: error as string
+      }
+    };
+    postMessage(message);
+  }
+};
+
+//==========================================================================||
+//                          Module Methods                                  ||
+//==========================================================================||
+
+export const hasLocalModelInCache = async (model: SupportedLocalModel) => {
+  const curModel = modelMap[model];
+  const inCache = await webllm.hasModelInCache(curModel, APP_CONFIGS);
+  return inCache;
+};
+
+// Below helper functions are from TVM
+// https:github.com/mlc-ai/relax/blob/71e8089ff3d26877f4fd139e52c30cba24f23315/web/src/webgpu.ts#L36
+
+// Types are from @webgpu/types
+export interface GPUDeviceDetectOutput {
+  adapter: GPUAdapter;
+  adapterInfo: GPUAdapterInfo;
+  device: GPUDevice;
+}
+
+/**
+ * DetectGPU device in the environment.
+ */
+export async function detectGPUDevice(): Promise<
+  GPUDeviceDetectOutput | undefined
+> {
+  if (typeof navigator !== 'undefined' && navigator.gpu !== undefined) {
+    const adapter = await navigator.gpu.requestAdapter({
+      powerPreference: 'high-performance'
+    });
+    if (adapter == null) {
+      throw Error('Cannot find adapter that matches the request');
+    }
+    const computeMB = (value: number) => {
+      return Math.ceil(value / (1 << 20)) + 'MB';
+    };
+
+    // more detailed error message
+    const requiredMaxBufferSize = 1 << 30;
+    if (requiredMaxBufferSize > adapter.limits.maxBufferSize) {
+      throw Error(
+        'Cannot initialize runtime because of requested maxBufferSize ' +
+          `exceeds limit. requested=${computeMB(requiredMaxBufferSize)}, ` +
+          `limit=${computeMB(adapter.limits.maxBufferSize)}. ` +
+          'This error may be caused by an older version of the browser (e.g. Chrome 112). ' +
+          'You can try to upgrade your browser to Chrome 113 or later.'
+      );
+    }
+
+    let requiredMaxStorageBufferBindingSize = 1 << 30; // 1GB
+    if (
+      requiredMaxStorageBufferBindingSize >
+      adapter.limits.maxStorageBufferBindingSize
+    ) {
+      // If 1GB is too large, try 128MB (default size for Android)
+      const backupRequiredMaxStorageBufferBindingSize = 1 << 27; // 128MB
+      console.log(
+        'Requested maxStorageBufferBindingSize exceeds limit. \n' +
+          `requested=${computeMB(requiredMaxStorageBufferBindingSize)}, \n` +
+          `limit=${computeMB(adapter.limits.maxStorageBufferBindingSize)}. \n` +
+          `WARNING: Falling back to ${computeMB(
+            backupRequiredMaxStorageBufferBindingSize
+          )}...`
+      );
+      requiredMaxStorageBufferBindingSize =
+        backupRequiredMaxStorageBufferBindingSize;
+      if (
+        backupRequiredMaxStorageBufferBindingSize >
+        adapter.limits.maxStorageBufferBindingSize
+      ) {
+        // Fail if 128MB is still too big
+        throw Error(
+          'Cannot initialize runtime because of requested maxStorageBufferBindingSize ' +
+            `exceeds limit. requested=${computeMB(
+              backupRequiredMaxStorageBufferBindingSize
+            )}, ` +
+            `limit=${computeMB(adapter.limits.maxStorageBufferBindingSize)}. `
+        );
+      }
+    }
+
+    const requiredMaxComputeWorkgroupStorageSize = 32 << 10;
+    if (
+      requiredMaxComputeWorkgroupStorageSize >
+      adapter.limits.maxComputeWorkgroupStorageSize
+    ) {
+      throw Error(
+        'Cannot initialize runtime because of requested maxComputeWorkgroupStorageSize ' +
+          `exceeds limit. requested=${requiredMaxComputeWorkgroupStorageSize}, ` +
+          `limit=${adapter.limits.maxComputeWorkgroupStorageSize}. `
+      );
+    }
+
+    const requiredFeatures: GPUFeatureName[] = [];
+    // Always require f16 if available
+    if (adapter.features.has('shader-f16')) {
+      requiredFeatures.push('shader-f16');
+    }
+
+    const adapterInfo = await adapter.requestAdapterInfo();
+    const device = await adapter.requestDevice({
+      requiredLimits: {
+        maxBufferSize: requiredMaxBufferSize,
+        maxStorageBufferBindingSize: requiredMaxStorageBufferBindingSize,
+        maxComputeWorkgroupStorageSize: requiredMaxComputeWorkgroupStorageSize
+      },
+      requiredFeatures
+    });
+    return {
+      adapter: adapter,
+      adapterInfo: adapterInfo,
+      device: device
+    };
+  } else {
+    return undefined;
+  }
+}
diff --git a/examples/rag-playground/src/types/common-types.ts b/examples/rag-playground/src/types/common-types.ts
index 507aa63..f4afa7d 100644
--- a/examples/rag-playground/src/types/common-types.ts
+++ b/examples/rag-playground/src/types/common-types.ts
@@ -53,3 +53,34 @@ export interface Size {
   width: number;
   height: number;
 }
+
+export type TextGenWorkerMessage =
+  | {
+      command: 'startTextGen';
+      payload: {
+        requestID: string;
+        apiKey: string;
+        prompt: string;
+        temperature: number;
+        stopSequences?: string[];
+        detail?: string;
+      };
+    }
+  | {
+      command: 'finishTextGen';
+      payload: {
+        requestID: string;
+        apiKey: string;
+        result: string;
+        prompt: string;
+        detail: string;
+      };
+    }
+  | {
+      command: 'error';
+      payload: {
+        requestID: string;
+        originalCommand: string;
+        message: string;
+      };
+    };
diff --git a/examples/rag-playground/src/types/gemini-api-types.ts b/examples/rag-playground/src/types/gemini-api-types.ts
new file mode 100644
index 0000000..f1a08c3
--- /dev/null
+++ b/examples/rag-playground/src/types/gemini-api-types.ts
@@ -0,0 +1,99 @@
+/**
+ * Type definitions for the Gemini API calls
+ */
+
+/**
+ * Harm categories that would cause prompts or candidates to be blocked.
+ */
+export enum HarmCategory {
+  HARM_CATEGORY_UNSPECIFIED = 'HARM_CATEGORY_UNSPECIFIED',
+  HARM_CATEGORY_HATE_SPEECH = 'HARM_CATEGORY_HATE_SPEECH',
+  HARM_CATEGORY_SEXUALLY_EXPLICIT = 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
+  HARM_CATEGORY_HARASSMENT = 'HARM_CATEGORY_HARASSMENT',
+  HARM_CATEGORY_DANGEROUS_CONTENT = 'HARM_CATEGORY_DANGEROUS_CONTENT'
+}
+
+/**
+ * Threshold Threshold above which a prompt or candidate will be blocked.
+ * @public
+ */
+export enum HarmBlockThreshold {
+  // Threshold is unspecified.
+  HARM_BLOCK_THRESHOLD_UNSPECIFIED = 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
+  // Content with NEGLIGIBLE will be allowed.
+  BLOCK_LOW_AND_ABOVE = 'BLOCK_LOW_AND_ABOVE',
+  // Content with NEGLIGIBLE and LOW will be allowed.
+  BLOCK_MEDIUM_AND_ABOVE = 'BLOCK_MEDIUM_AND_ABOVE',
+  // Content with NEGLIGIBLE, LOW, and MEDIUM will be allowed.
+  BLOCK_ONLY_HIGH = 'BLOCK_ONLY_HIGH',
+  // All content will be allowed.
+  BLOCK_NONE = 'BLOCK_NONE'
+}
+
+/**
+ * Probability that a prompt or candidate matches a harm category.
+ * @public
+ */
+export enum HarmProbability {
+  // Probability is unspecified.
+  HARM_PROBABILITY_UNSPECIFIED = 'HARM_PROBABILITY_UNSPECIFIED',
+  // Content has a negligible chance of being unsafe.
+  NEGLIGIBLE = 'NEGLIGIBLE',
+  // Content has a low chance of being unsafe.
+  LOW = 'LOW',
+  // Content has a medium chance of being unsafe.
+  MEDIUM = 'MEDIUM',
+  // Content has a high chance of being unsafe.
+  HIGH = 'HIGH'
+}
+
+export interface SafetySetting {
+  category: HarmCategory;
+  threshold: HarmBlockThreshold;
+}
+
+export interface SafetyRating {
+  category: HarmCategory;
+  probability: HarmProbability;
+}
+
+export interface GeminiGenerateTextRequestBody {
+  contents: {
+    parts: {
+      text: string;
+    }[];
+  }[];
+  safetySettings?: SafetySetting[];
+  generationConfig?: {
+    stopSequences?: string[];
+    temperature?: number;
+    maxOutputTokens?: number;
+    topP?: number;
+    topK?: number;
+  };
+}
+
+export interface GeminiGenerateTextResponseBody {
+  candidates: Candidate[];
+  promptFeedback: PromptFeedback;
+}
+
+interface Candidate {
+  content: Content;
+  finishReason: string;
+  index: number;
+  safetyRatings: SafetyRating[];
+}
+
+interface Content {
+  parts: Part[];
+  role: string;
+}
+
+interface Part {
+  text: string;
+}
+
+interface PromptFeedback {
+  safetyRatings: SafetyRating[];
+}
diff --git a/examples/rag-playground/src/types/gpt-types.ts b/examples/rag-playground/src/types/gpt-types.ts
new file mode 100644
index 0000000..273777e
--- /dev/null
+++ b/examples/rag-playground/src/types/gpt-types.ts
@@ -0,0 +1,56 @@
+export interface ChatCompletion {
+  id: string;
+  object: string;
+  created: number;
+  model: string;
+  choices: ChatCompletionChoice[];
+  usage: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
+export interface ChatCompletionChoice {
+  index: number;
+  message: {
+    role: 'system' | 'user' | 'assistant' | 'function';
+    content: string;
+  };
+  finish_reason: string;
+}
+
+export interface ChatCompletionRequest {
+  model: string;
+  messages: ChatMessage[];
+  functions?: ChatFunction[];
+  function_call?: string | undefined;
+  temperature?: number | null;
+  top_p?: number | null;
+  n?: number | null;
+  stream?: boolean | null;
+  stop?: string | string[] | null;
+  max_tokens?: number | null;
+  presence_penalty?: number | null;
+  frequency_penalty?: number | null;
+  logit_bias?: { [key: string]: number };
+  user?: string;
+}
+
+export interface ChatMessage {
+  role: string;
+  content: string | null;
+  name?: string;
+  function_call?: ChatFunctionCall;
+}
+
+export interface ChatFunctionCall {
+  name: string;
+  arguments: string;
+}
+
+export interface ChatFunction {
+  name: string;
+  description?: string;
+  parameters: unknown;
+}
diff --git a/examples/rag-playground/src/types/palm-api-types.ts b/examples/rag-playground/src/types/palm-api-types.ts
new file mode 100644
index 0000000..d86ac23
--- /dev/null
+++ b/examples/rag-playground/src/types/palm-api-types.ts
@@ -0,0 +1,99 @@
+/**
+ * Type definitions for the PaLM API calls
+ */
+
+export enum HarmCategory {
+  HARM_CATEGORY_UNSPECIFIED = 0,
+  HARM_CATEGORY_DEROGATORY = 1,
+  HARM_CATEGORY_TOXICITY = 2,
+  HARM_CATEGORY_VIOLENCE = 3,
+  HARM_CATEGORY_SEXUAL = 4,
+  HARM_CATEGORY_MEDICAL = 5,
+  HARM_CATEGORY_DANGEROUS = 6
+}
+
+export enum HarmBlockThreshold {
+  HARM_BLOCK_THRESHOLD_UNSPECIFIED = 0,
+  BLOCK_LOW_AND_ABOVE = 1,
+  BLOCK_MEDIUM_AND_ABOVE = 2,
+  BLOCK_ONLY_HIGH = 3,
+  BLOCK_NONE = 4
+}
+
+export enum HarmProbability {
+  HARM_PROBABILITY_UNSPECIFIED = 0,
+  NEGLIGIBLE = 1,
+  LOW = 2,
+  MEDIUM = 3,
+  HIGH = 4
+}
+
+export interface SafetySetting {
+  category: HarmCategory;
+  threshold: HarmBlockThreshold;
+}
+
+export interface SafetyRating {
+  category: HarmCategory;
+  probability: HarmProbability;
+}
+
+export interface CitationSource {
+  startIndex: number;
+  endIndex: number;
+  uri: string;
+  license: string;
+}
+
+export interface CitationMetadata {
+  citationSources: CitationSource[];
+}
+
+export interface TextCompletion {
+  output: string;
+  safetyRatings: SafetyRating[];
+  citationMetadata: CitationMetadata;
+}
+
+export enum BlockedReason {
+  BLOCKED_REASON_UNSPECIFIED = 0,
+  SAFETY = 1,
+  OTHER = 2
+}
+
+export interface ContentFilter {
+  reason: BlockedReason;
+  message: string;
+}
+
+export interface SafetyFeedback {
+  rating: SafetyRating;
+  setting: SafetySetting;
+}
+
+export interface PalmGenerateTextRequestBody {
+  prompt: {
+    text: string;
+  };
+  safetySettings?: SafetySetting[];
+  stopSequences?: string[];
+  temperature?: number;
+  candidateCount?: number;
+  maxOutputTokens?: number;
+  topP?: number;
+  topK?: number;
+}
+
+export interface PalmGenerateTextResponseBody {
+  candidates: TextCompletion[];
+  filters: ContentFilter[];
+  safetyFeedback: SafetyFeedback;
+}
+
+export interface PalmEmbedTextRequestBody {
+  text: string;
+}
+
+export interface PalmEmbedTextResponseBody {
+  embedding: { value: number[] };
+}
diff --git a/examples/rag-playground/tsconfig.json b/examples/rag-playground/tsconfig.json
index b0f1228..6050d77 100644
--- a/examples/rag-playground/tsconfig.json
+++ b/examples/rag-playground/tsconfig.json
@@ -6,6 +6,7 @@
     "module": "ESNext",
     "lib": ["ES2021", "DOM", "DOM.Iterable"],
     "skipLibCheck": true,
+    "types": ["@webgpu/types"],
 
     /* Bundler mode */
     "moduleResolution": "bundler",