Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Use LlamaIndex #32

Open
wants to merge 20 commits into
base: goku
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,30 @@ source .env
## Steps

1. Open [rag.ipynb](notebooks/rag.ipynb) to interactively go through all the concepts and run experiments.
2. Use the best configuration (in `serve.py`) from the notebook experiments to serve the LLM.
```bash
python app/main.py
2. Use the best configuration from the notebook experiments to serve the LLM.
```python
import os
import ray
from ray import serve
from app.serve import deployment

# Credentials
ray.init(
runtime_env={
"env_vars": {
"OPENAI_API_BASE": os.environ["OPENAI_API_BASE"],
"OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
"ANYSCALE_API_BASE": os.environ["ANYSCALE_API_BASE"],
"ANYSCALE_API_KEY": os.environ["ANYSCALE_API_KEY"],
"DB_CONNECTION_STRING": os.environ["DB_CONNECTION_STRING"]
}
}
)

# Serve
serve.run(deployment)
```
3. Query your service.
3. Query your service (in a new terminal).
```python
import json
import requests
Expand Down
22 changes: 0 additions & 22 deletions app/main.py

This file was deleted.

23 changes: 8 additions & 15 deletions app/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,7 @@ def __init__(
)
else:
self.embedding_model = HuggingFaceEmbeddings(
model_name=embedding_model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs,
model_name=embedding_model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

# LLM
Expand All @@ -74,21 +72,16 @@ def __init__(
self.system_content = system_content
self.assistant_content = assistant_content

# VectorDB connection
self.conn = psycopg.connect(os.environ["DB_CONNECTION_STRING"])
register_vector(self.conn)

def __call__(self, query, num_chunks=5):
# Get context
embedding = np.array(self.embedding_model.embed_query(query))
with self.conn.cursor() as cur:
cur.execute(
"SELECT * FROM document ORDER BY embedding <-> %s LIMIT %s",
(embedding, num_chunks),
)
rows = cur.fetchall()
context = [{"text": row[1]} for row in rows]
sources = [row[2] for row in rows]
with psycopg.connect(os.environ["DB_CONNECTION_STRING"]) as conn:
register_vector(conn)
with conn.cursor() as cur:
cur.execute("SELECT * FROM document ORDER BY embedding <-> %s LIMIT %s", (embedding, num_chunks))
rows = cur.fetchall()
context = [{"text": row[1]} for row in rows]
sources = [row[2] for row in rows]

# Generate response
user_content = f"query: {query}, context: {context}"
Expand Down
17 changes: 10 additions & 7 deletions app/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,15 @@


def get_secret(secret_name):
import boto3
try:
import boto3

client = boto3.client("secretsmanager", region_name="us-west-2")
response = client.get_secret_value(SecretId="ray-assistant")
return json.loads(response["SecretString"])[secret_name]
client = boto3.client("secretsmanager", region_name="us-west-2")
response = client.get_secret_value(SecretId="ray-assistant")
return json.loads(response["SecretString"])[secret_name]
except Exception as e:
print(e)
return os.environ[secret_name]


def execute_bash(command):
Expand All @@ -37,9 +41,6 @@ def execute_bash(command):

def load_index(embedding_model_name, chunk_size, chunk_overlap):
# Drop current Vector DB and prepare for new one
execute_bash(
f'''psql "{os.environ["DB_CONNECTION_STRING"]}" -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE state = 'idle in transaction';"'''
)
execute_bash(f'psql "{os.environ["DB_CONNECTION_STRING"]}" -c "DROP TABLE document;"')
execute_bash(f"sudo -u postgres psql -f ../migrations/vector-{EMBEDDING_DIMENSIONS[embedding_model_name]}.sql")
SQL_DUMP_FP = Path(EFS_DIR, "sql_dumps", f"{embedding_model_name.split('/')[-1]}_{chunk_size}_{chunk_overlap}.sql")
Expand Down Expand Up @@ -85,9 +86,11 @@ class Answer(BaseModel):
@serve.ingress(application)
class RayAssistantDeployment:
def __init__(self, chunk_size, chunk_overlap, num_chunks, embedding_model_name, llm):
# Set credentials
os.environ["DB_CONNECTION_STRING"] = get_secret("DB_CONNECTION_STRING")
openai.api_key = get_secret("ANYSCALE_API_KEY")
openai.api_base = "https://api.endpoints.anyscale.com/v1"

# Load index
load_index(
embedding_model_name=embedding_model_name,
Expand Down
4 changes: 1 addition & 3 deletions datasets/eval-dataset-v1.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{"question": "How does autoscaling work in a Ray Serve application?", "source": "https://docs.ray.io/en/master/serve/scaling-and-resource-allocation.html#autoscaling"}
{"question": "how do I get the address of a ray node", "source": "https://docs.ray.io/en/master/ray-core/miscellaneous.html#node-information"}
{"question": "Does Ray support NCCL?", "source": "https://docs.ray.io/en/master/ray-more-libs/ray-collective.html"}
{"question": "could you give me an example of using this library for data-parallel training of CNNs on Ray?", "source": "https://docs.ray.io/en/master/ray-air/computer-vision.html#training-vision-models"}
{"question": "Is Ray integrated with DeepSpeed?", "source": "https://docs.ray.io/en/master/ray-air/examples/gptj_deepspeed_fine_tuning.html#fine-tuning-the-model-with-ray-air-a-name-train-a"}
{"question": "what will happen if I use AsyncIO's await to wait for a Ray future like `await x.remote()`", "source": "https://docs.ray.io/en/master/ray-core/actors/async_api.html#objectrefs-as-asyncio-futures"}
{"question": "How would you compare Spark, Ray, Dask?", "source": "https://docs.ray.io/en/master/data/overview.html#how-does-ray-data-compare-to-x-for-offline-inference"}
Expand Down Expand Up @@ -35,7 +34,6 @@
{"question": "how do I specify ScalingConfig for a Tuner run?", "source": "https://docs.ray.io/en/master/tune/api/doc/ray.tune.Tuner.html"}
{"question": "how to utilize ‘zero-copy’ feature ray provide for numpy?", "source": "https://docs.ray.io/en/master/ray-core/objects/serialization.html#numpy-arrays"}
{"question": "if there are O(millions) of keys that all have state, is it ok to spin up 1=1 actors? Or would it be advised to create ‘key pools’ where an actor can hold 1=many keys?", "source": "https://docs.ray.io/en/master/ray-core/patterns/too-fine-grained-tasks.html"}
{"question": "How to get the best AIR checkpoint after training without a Result object?", "source": "https://docs.ray.io/en/master/tune/api/doc/ray.air.Result.html#ray-air-result"}
{"question": "How to find the best checkpoint from the trial directory?", "source": "https://docs.ray.io/en/master/tune/api/doc/ray.tune.ExperimentAnalysis.html"}
{"question": "what are the advantage and disadvantage of using singleton Actor ?", "source": "https://docs.ray.io/en/master/ray-core/actors/named-actors.html"}
{"question": "what are the advantages of using a named actor?", "source": "https://docs.ray.io/en/master/ray-core/actors/named-actors.html"}
Expand All @@ -53,7 +51,7 @@
{"question": "how do I get the actor id of an actor", "source": "https://docs.ray.io/en/master/ray-core/api/doc/ray.runtime_context.get_runtime_context.html"}
{"question": "can ray.init() can check if ray is all-ready initiated ?", "source": "https://docs.ray.io/en/master/ray-core/api/doc/ray.init.html"}
{"question": "What does the `compute=actor` argument do within `ray.data.map_batches` ?", "source": "https://docs.ray.io/en/master/data/api/doc/ray.data.Dataset.map_batches.html"}
{"question": "how do I use wandb logger with accelerateTrainer?", "source": "https://docs.ray.io/en/master/ray-air/api/doc/ray.air.integrations.wandb.WandbLoggerCallback.html"}
{"question": "how do I use wandb logger with accelerateTrainer?", "source": "https://docs.ray.io/en/master/tune/examples/tune-wandb.html"}
{"question": "What will be implicitly put into object store?", "source": "https://docs.ray.io/en/master/ray-core/objects.html#objects"}
{"question": "How do I kill or cancel a ray task that I already started?", "source": "https://docs.ray.io/en/master/ray-core/fault_tolerance/tasks.html#cancelling-misbehaving-tasks"}
{"question": "how to send extra arguments in dataset.map_batches function?", "source": "https://docs.ray.io/en/master/data/api/doc/ray.data.Dataset.map_batches.html#ray-data-dataset-map-batches"}
Expand Down
Loading