Skip to content

Commit

Permalink
Use milvus gpu with GPU_IVF_FLAT flag (#243)
Browse files Browse the repository at this point in the history
* Use GPU flag for document ingestion as default

* Port milvus changes to release branch
  • Loading branch information
nv-pranjald authored Nov 17, 2024
1 parent fc80791 commit a3f956a
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
9 changes: 8 additions & 1 deletion RAG/examples/local_deploy/docker-compose-vectordb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ services:

milvus:
container_name: milvus-standalone
image: milvusdb/milvus:v2.4.12
image: milvusdb/milvus:v2.4.15-gpu
command: ["milvus", "run", "standalone"]
environment:
ETCD_ENDPOINTS: etcd:2379
Expand All @@ -74,6 +74,13 @@ services:
depends_on:
- "etcd"
- "minio"
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: ["gpu"]
device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}']
profiles: ["nemo-retriever", "milvus", ""]

elasticsearch:
Expand Down
2 changes: 1 addition & 1 deletion RAG/src/chain_server/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class VectorStoreConfig(ConfigWizard):
"nprobe", default=16, help_txt="Number of units to query", # IVF Flat milvus
)
index_type: str = configfield(
"index_type", default="IVF_FLAT", help_txt="Index of the vector db", # IVF Flat for milvus
"index_type", default="GPU_IVF_FLAT", help_txt="Index of the vector db", # IVF Flat for milvus
)


Expand Down
8 changes: 6 additions & 2 deletions RAG/src/chain_server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,13 +314,17 @@ def create_vectorstore_langchain(document_embedder: "Embeddings", collection_nam
)
elif config.vector_store.name == "milvus":
logger.info(f"Using milvus collection: {collection_name}")
# vectorstore url can be updated using environment variable APP_VECTORSTORE_URL, it should be in http://ip:port format
if not collection_name:
collection_name = os.getenv('COLLECTION_NAME', "vector_db")
logger.info(f"Using milvus collection: {collection_name}")
url = urlparse(config.vector_store.url)
vectorstore = Milvus(
document_embedder,
connection_args={"host": url.hostname, "port": url.port},
collection_name=collection_name,
auto_id=True,
index_params={"index_type": config.vector_store.index_type, "metric_type": "L2", "nlist": config.vector_store.nlist},
search_params={"nprobe": config.vector_store.nprobe},
auto_id = True
)
else:
raise ValueError(f"{config.vector_store.name} vector database is not supported")
Expand Down

0 comments on commit a3f956a

Please sign in to comment.