forked from NVIDIA/GenerativeAIExamples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompose.env
71 lines (50 loc) · 2.32 KB
/
compose.env
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# full path to the local copy of the model weights
# NOTE: This should be an absolute path and not relative path
export MODEL_DIRECTORY="/home/nvidia/llama2_13b_chat_hf_v1/"
# the number of GPUs needed by nemollm inference ms to deploy the model
export NUM_GPU=1
# To control which GPU the vector database uses, specify the device ID.
# export VECTORSTORE_GPU_DEVICE_ID=0
# Fill this out if you dont have a GPU. Leave this empty if you have a local GPU
export NVIDIA_API_KEY=${NVIDIA_API_KEY}
# flag to enable activation aware quantization for the LLM
# export QUANTIZATION="int4_awq"
# the architecture of the model. eg: llama, gptnext (for nemotron use gptnext)
export MODEL_ARCHITECTURE="llama"
# the name of the model being used - only for displaying on rag-playground
# export MODEL_NAME="Llama-2-13b-chat-hf"
# [OPTIONAL] the maximum number of input tokens
# export MODEL_MAX_INPUT_LENGTH=3000
# [OPTIONAL] the number of GPUs to make available to the inference server
# export INFERENCE_GPU_COUNT="all"
# [OPTIONAL] the base directory inside which all persistent volumes will be created
# export DOCKER_VOLUME_DIRECTORY="."
# full path to the model store directory storing the nemo embedding model
export EMBEDDING_MODEL_DIRECTORY="/home/nvidia/nv-embed-qa_v4"
# name of the nemo embedding model
export EMBEDDING_MODEL_NAME="NV-Embed-QA"
export EMBEDDING_MODEL_CKPT_NAME="NV-Embed-QA-4.nemo"
# GPU id which nemo embedding ms will use
# export EMBEDDING_MS_GPU_ID=0
# parameters for PGVector, update this when using PGVector Vector store
# export POSTGRES_PASSWORD=password
# export POSTGRES_USER=postgres
# export POSTGRES_DB=api
# Update this line when using an external PGVector Vector store
# export POSTGRES_HOST_IP=pgvector
# export POSTGRES_PORT_NUMBER=5432
### Riva Parameters:
# Riva Speech API URI: Riva Server IP address/hostname and port
export RIVA_API_URI=""
# [OPTIONAL] Riva Speech API Key
# If necessary, enter a key to access the Riva API
export RIVA_API_KEY=""
# [OPTIONAL] Riva Function ID
# If necessary, enter a function ID to access the Riva API
export RIVA_FUNCTION_ID=""
# TTS sample rate (Hz)
export TTS_SAMPLE_RATE=48000
# the config file for the OpenTelemetry collector
export OPENTELEMETRY_CONFIG_FILE="./configs/otel-collector-config.yaml"
# the config file for Jaeger
export JAEGER_CONFIG_FILE="./configs/jaeger.yaml"