chore: update deployment guide and more

jrj5423 · Aug 30, 2024 · 73b322b · 73b322b
1 parent 35e5ee7
commit 73b322b
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 16 deletions.
diff --git a/.env.example b/.env.example
@@ -9,9 +9,8 @@ SECRET_KEY=
 # Replace with your own sentry dsn, leave it commented if you don't want to use sentry
 # SENTRY_DSN=https://[email protected]/xxxxxx
 
-# Replace with your own TiDB Serverless connection information,
-# currently only TiDB Serverless supports vector search.
-# You can quickly create one from https://tidbcloud.com/
+# Replace with your own TiDB cluster connection information,
+# TiDB Serverless is recommended. You can quickly create one from https://tidbcloud.com/
 TIDB_HOST=xxxxx.prod.aws.tidbcloud.com
 TIDB_USER=
 TIDB_PASSWORD=
@@ -23,6 +22,10 @@ TIDB_SSL=true
 # The default EMBEDDING_DIMS and EMBEDDING_MAX_TOKENS are set for the OpenAI text-embedding-3-small model.
 # If using a different embedding model, adjust these values according to the model's specifications.
 # For example:
-#   maidalun1020/bce-embedding-base_v1: EMBEDDING_DIMS=768   EMBEDDING_MAX_TOKENS=512
+#   openai/text-embedding-3-small: EMBEDDING_DIMS=1536, EMBEDDING_MAX_TOKENS=8191
+#   maidalun1020/bce-embedding-base_v1: EMBEDDING_DIMS=768, EMBEDDING_MAX_TOKENS=512
+#   BAAI/bge-m3: EMBEDDING_DIMS=1024, EMBEDDING_MAX_TOKENS=8192
 EMBEDDING_DIMS=1536
+# EMBEDDING_MAX_TOKENS should be equal or smaller than the embedding model's max tokens,
+# it indicates the max size of document chunks.
 EMBEDDING_MAX_TOKENS=8191
diff --git a/backend/app/rag/llm_option.py b/backend/app/rag/llm_option.py
@@ -67,9 +67,9 @@ class LLMOption(BaseModel):
         default_llm_model="llama3.1",
         llm_model_description="Find more in https://ollama.com/library",
         default_config={
-            "api_base": "http://localhost:11434",
+            "base_url": "http://localhost:11434",
         },
-        config_description="`api_base` is the base URL of the Ollama server, ensure it can be accessed from this server.",
+        config_description="`base_url` is the base URL of the Ollama server, ensure it can be accessed from this server.",
         credentials_display_name="Ollama API Key",
         credentials_description="Ollama doesn't require an API key, set a dummy string here is ok",
         credentials_type="str",

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -8,7 +8,7 @@ services:
       - ./redis-data:/data
 
   backend:
-    image: tidbai/backend:0.2
+    image: tidbai/backend:0.2.4
     restart: always
     depends_on:
       - redis
@@ -25,7 +25,7 @@ services:
         max-file: "6"
 
   frontend:
-    image: tidbai/frontend:0.2
+    image: tidbai/frontend:0.2.4
     restart: always
     depends_on:
       - backend
@@ -40,7 +40,7 @@ services:
         max-file: "6"
 
   background:
-    image: tidbai/backend:0.2
+    image: tidbai/backend:0.2.4
     restart: always
     depends_on:
       - redis
@@ -56,3 +56,15 @@ services:
       options:
         max-size: "50m"
         max-file: "6"
+
+  local-embedding-reranker:
+    image: tidbai/local-embedding-reranker:v1
+    ports:
+      - 5001:5001
+    # If you are in China, you can set the following environment variable to speed up the download of models
+    # environment:
+    #   - HF_ENDPOINT=https://hf-mirror.com
+    volumes:
+      - ./local-embedding-reranker:/root/.cache/huggingface
+    profiles:
+      - local-embedding-reranker
diff --git a/frontend/app/src/pages/docs/deploy-with-docker.mdx b/frontend/app/src/pages/docs/deploy-with-docker.mdx
@@ -16,6 +16,22 @@ This document provides instructions for deploying the entire RAG using Docker Co
     cd tidb.ai
     ```
 
+2. Select an embedding model for TiDB.AI.
+
+    We recommend using the OpenAI text-embedding-3-small model for TiDB.AI, but you can also use other supported embedding models.
+
+    - OpenAI
+        - text-embedding-3-small: EMBEDDING_DIMS=1536, EMBEDDING_MAX_TOKENS=8191
+    - JinaAI
+        - jina-clip-v1: EMBEDDING_DIMS=768, EMBEDDING_MAX_TOKENS=8192
+        - find more in https://jina.ai/embeddings/
+    - Local Embedding Server
+        - BAAI/bge-m3: EMBEDDING_DIMS=1024, EMBEDDING_MAX_TOKENS=8192
+
+<Callout>
+Note: The Embedding Model will be configured at the beggining of the deployment process.  You can not change the embedding model after the deployment.
+</Callout>
+
 2. Copy and edit the `.env` file:
 
     ```bash
@@ -28,6 +44,7 @@ This document provides instructions for deploying the entire RAG using Docker Co
     - `TIDB_HOST`, `TIDB_USER`, `TIDB_PASSWORD` and `TIDB_DATABASE`: get them from your [TiDB Serverless cluster](https://tidbcloud.com/)
 
       - Note: TiDB Serverless will provide a default database name called `test`, if you want to use another database name, you need to create a new database in the TiDB Serverless console.
+    - `EMBEDDING_DIMS` and `EMBEDDING_MAX_TOKENS`: set them according to the embedding model you choose before, it can not be changed after the deployment.
 
 3. Migrate the database schema:
 
@@ -49,6 +66,12 @@ This document provides instructions for deploying the entire RAG using Docker Co
     docker compose up
     ```
 
+    To use the local embedding model, start with the following command:
+
+    ```bash
+    docker compose --profile local-embedding-reranker up
+    ```
+
 6. Open your browser and visit `http://localhost:3000` to access the web interface.
 
 That's it! You can now use TiDB.AI locally. You can also go to https://tidb.ai to experience the live demo.
@@ -69,18 +92,18 @@ After you deploy the tool, you need to initialize the tool by following the popu
 
 This section will help you upgrade tidb.ai to the new version.
 
-Suppose you want to upgrade tidb.ai from 0.1 to version 0.2
+Suppose you want to upgrade tidb.ai from 0.1.0 to version 0.2.0
 
 1. Edit your docker-compose.yml file to use the new image version.
 
     ```yaml
     services:
       backend:
-        image: tidbai/backend:0.2
+        image: tidbai/backend:0.2.0
       frontend:
-        image: tidbai/frontend:0.2
+        image: tidbai/frontend:0.2.0
       background:
-        image: tidbai/backend:0.2
+        image: tidbai/backend:0.2.0
     ```
 
 2. Pull the new image:

diff --git a/frontend/app/src/pages/docs/llm.mdx b/frontend/app/src/pages/docs/llm.mdx
@@ -20,7 +20,14 @@ After login with admin account, you can configure the LLM in the admin panel.
 
 Currently we support the following LLMs:
 
-- [OpenAI](https://platform.openai.com/) - Recommended
-- [Gemini](https://gemini.google.com/) - Recommended
+- [OpenAI](https://platform.openai.com/)
+- [Gemini](https://gemini.google.com/)
+- OpenAI Like
+    - [OpenRouter](https://openrouter.ai/)
+        - Default config: `{"api_base": "https://openrouter.ai/api/v1/"}`
+    - [BigModel](https://open.bigmodel.cn/)
+        - Default config: `{"api_base": "https://open.bigmodel.cn/api/paas/v4/"}`
 - [Bedrock](https://aws.amazon.com/bedrock/)
-- [Anthropic Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude)
+- [Anthropic Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude)
+- [Ollama](https://ollama.com/)
+    - Default config: `{"api_base": "http://localhost:11434"}`