Skip to content

Commit cdd6e92

Browse files
committed
Initial commit from Create Llama
0 parents  commit cdd6e92

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+16877
-0
lines changed

.devcontainer/devcontainer.json

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"image": "mcr.microsoft.com/vscode/devcontainers/typescript-node:dev-20-bullseye",
3+
"features": {
4+
"ghcr.io/devcontainers-contrib/features/turborepo-npm:1": {},
5+
"ghcr.io/devcontainers-contrib/features/typescript:2": {},
6+
"ghcr.io/devcontainers/features/python:1": {
7+
"version": "3.11",
8+
"toolsToInstall": [
9+
"flake8",
10+
"black",
11+
"mypy",
12+
"poetry"
13+
]
14+
}
15+
},
16+
"customizations": {
17+
"codespaces": {
18+
"openFiles": [
19+
"README.md"
20+
]
21+
},
22+
"vscode": {
23+
"extensions": [
24+
"ms-vscode.typescript-language-features",
25+
"esbenp.prettier-vscode",
26+
"ms-python.python",
27+
"ms-python.black-formatter",
28+
"ms-python.vscode-flake8",
29+
"ms-python.vscode-pylance"
30+
],
31+
"settings": {
32+
"python.formatting.provider": "black",
33+
"python.languageServer": "Pylance",
34+
"python.analysis.typeCheckingMode": "basic"
35+
}
36+
}
37+
},
38+
"containerEnv": {
39+
"POETRY_VIRTUALENVS_CREATE": "false"
40+
},
41+
"forwardPorts": [
42+
3000,
43+
8000
44+
],
45+
"postCreateCommand": "npm install"
46+
}

.env

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# The Llama Cloud API key.
2+
# LLAMA_CLOUD_API_KEY=
3+
4+
# The provider for the AI models to use.
5+
MODEL_PROVIDER=openai
6+
7+
# The name of LLM model to use.
8+
MODEL=gpt-3.5-turbo
9+
10+
# Name of the embedding model to use.
11+
EMBEDDING_MODEL=text-embedding-3-large
12+
13+
# Dimension of the embedding model to use.
14+
EMBEDDING_DIM=1024
15+
16+
# The OpenAI API key to use.
17+
# OPENAI_API_KEY=
18+
19+
# Temperature for sampling from the model.
20+
# LLM_TEMPERATURE=
21+
22+
# Maximum number of tokens to generate.
23+
# LLM_MAX_TOKENS=
24+
25+
# The number of similar embeddings to return when retrieving documents.
26+
TOP_K=3
27+
28+
# FILESERVER_URL_PREFIX is the URL prefix of the server storing the images generated by the interpreter.
29+
FILESERVER_URL_PREFIX=http://localhost:3000/api/files
30+
31+
# The system prompt for the AI model.
32+
SYSTEM_PROMPT=You are a helpful assistant who helps users with their questions.
33+

.eslintrc.json

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"extends": ["next/core-web-vitals", "prettier"],
3+
"rules": {
4+
"max-params": ["error", 4],
5+
"prefer-const": "error"
6+
}
7+
}

.gitignore

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2+
3+
# dependencies
4+
/node_modules
5+
/.pnp
6+
.pnp.js
7+
8+
# testing
9+
/coverage
10+
11+
# next.js
12+
/.next/
13+
/out/
14+
15+
# production
16+
/build
17+
18+
# misc
19+
.DS_Store
20+
*.pem
21+
22+
# debug
23+
npm-debug.log*
24+
yarn-debug.log*
25+
yarn-error.log*
26+
27+
# local env files
28+
.env*.local
29+
30+
# vercel
31+
.vercel
32+
33+
# typescript
34+
*.tsbuildinfo
35+
next-env.d.ts
36+
37+
tool-output/

Dockerfile

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
FROM node:20-alpine as build
2+
3+
WORKDIR /app
4+
5+
# Install dependencies
6+
COPY package.json package-lock.* ./
7+
RUN npm install
8+
9+
# Build the application
10+
COPY . .
11+
RUN npm run build
12+
13+
# ====================================
14+
FROM build as release
15+
16+
CMD ["npm", "run", "start"]

README.md

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Next.js](https://nextjs.org/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
2+
3+
## Getting Started
4+
5+
First, install the dependencies:
6+
7+
```
8+
npm install
9+
```
10+
11+
Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step):
12+
13+
```
14+
npm run generate
15+
```
16+
17+
Third, run the development server:
18+
19+
```
20+
npm run dev
21+
```
22+
23+
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
24+
25+
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
26+
27+
This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.
28+
29+
## Using Docker
30+
31+
1. Build an image for the Next.js app:
32+
33+
```
34+
docker build -t <your_app_image_name> .
35+
```
36+
37+
2. Generate embeddings:
38+
39+
Parse the data and generate the vector embeddings if the `./data` folder exists - otherwise, skip this step:
40+
41+
```
42+
docker run \
43+
--rm \
44+
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
45+
-v $(pwd)/config:/app/config \
46+
-v $(pwd)/data:/app/data \
47+
-v $(pwd)/cache:/app/cache \ # Use your file system to store the vector database
48+
<your_app_image_name> \
49+
npm run generate
50+
```
51+
52+
3. Start the app:
53+
54+
```
55+
docker run \
56+
--rm \
57+
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
58+
-v $(pwd)/config:/app/config \
59+
-v $(pwd)/cache:/app/cache \ # Use your file system to store gea vector database
60+
-p 3000:3000 \
61+
<your_app_image_name>
62+
```
63+
64+
## Learn More
65+
66+
To learn more about LlamaIndex, take a look at the following resources:
67+
68+
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
69+
- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features).
70+
71+
You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!

app/api/chat/engine/chat.ts

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import { ContextChatEngine, Settings } from "llamaindex";
2+
import { getDataSource } from "./index";
3+
4+
export async function createChatEngine() {
5+
const index = await getDataSource();
6+
if (!index) {
7+
throw new Error(
8+
`StorageContext is empty - call 'npm run generate' to generate the storage first`,
9+
);
10+
}
11+
const retriever = index.asRetriever();
12+
retriever.similarityTopK = process.env.TOP_K
13+
? parseInt(process.env.TOP_K)
14+
: 3;
15+
16+
return new ContextChatEngine({
17+
chatModel: Settings.llm,
18+
retriever,
19+
systemPrompt: process.env.SYSTEM_PROMPT,
20+
});
21+
}

app/api/chat/engine/generate.ts

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import { VectorStoreIndex } from "llamaindex";
2+
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
3+
4+
import * as dotenv from "dotenv";
5+
6+
import { getDocuments } from "./loader";
7+
import { initSettings } from "./settings";
8+
import { STORAGE_CACHE_DIR } from "./shared";
9+
10+
// Load environment variables from local .env file
11+
dotenv.config();
12+
13+
async function getRuntime(func: any) {
14+
const start = Date.now();
15+
await func();
16+
const end = Date.now();
17+
return end - start;
18+
}
19+
20+
async function generateDatasource() {
21+
console.log(`Generating storage context...`);
22+
// Split documents, create embeddings and store them in the storage context
23+
const ms = await getRuntime(async () => {
24+
const storageContext = await storageContextFromDefaults({
25+
persistDir: STORAGE_CACHE_DIR,
26+
});
27+
const documents = await getDocuments();
28+
await VectorStoreIndex.fromDocuments(documents, {
29+
storageContext,
30+
});
31+
});
32+
console.log(`Storage context successfully generated in ${ms / 1000}s.`);
33+
}
34+
35+
(async () => {
36+
initSettings();
37+
await generateDatasource();
38+
console.log("Finished generating storage.");
39+
})();

app/api/chat/engine/index.ts

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex";
2+
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
3+
import { STORAGE_CACHE_DIR } from "./shared";
4+
5+
export async function getDataSource() {
6+
const storageContext = await storageContextFromDefaults({
7+
persistDir: `${STORAGE_CACHE_DIR}`,
8+
});
9+
10+
const numberOfDocs = Object.keys(
11+
(storageContext.docStore as SimpleDocumentStore).toDict(),
12+
).length;
13+
if (numberOfDocs === 0) {
14+
return null;
15+
}
16+
return await VectorStoreIndex.init({
17+
storageContext,
18+
});
19+
}

app/api/chat/engine/loader.ts

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import { SimpleDirectoryReader } from "llamaindex";
2+
3+
export const DATA_DIR = "./data";
4+
5+
export async function getDocuments() {
6+
return await new SimpleDirectoryReader().loadData({
7+
directoryPath: DATA_DIR,
8+
});
9+
}

0 commit comments

Comments
 (0)