community/rag-developer-chatbot/compose.env

# full path to the local copy of the model weights
# NOTE: This should be an absolute path and not relative path
export MODEL_DIRECTORY="/home/nvidia/llama2_13b_chat_hf_v1/"

# the number of GPUs needed by nemollm inference ms to deploy the model
export NUM_GPU=1

# To control which GPU the vector database uses, specify the device ID.
# export VECTORSTORE_GPU_DEVICE_ID=0

# Fill this out if you dont have a GPU. Leave this empty if you have a local GPU
export NVIDIA_API_KEY=${NVIDIA_API_KEY}

# flag to enable activation aware quantization for the LLM
# export QUANTIZATION="int4_awq"

# the architecture of the model. eg: llama, gptnext (for nemotron use gptnext)
export MODEL_ARCHITECTURE="llama"


# the name of the model being used - only for displaying on rag-playground
# export MODEL_NAME="Llama-2-13b-chat-hf"

# [OPTIONAL] the maximum number of input tokens
# export MODEL_MAX_INPUT_LENGTH=3000

# [OPTIONAL] the number of GPUs to make available to the inference server
# export INFERENCE_GPU_COUNT="all"

# [OPTIONAL] the base directory inside which all persistent volumes will be created
# export DOCKER_VOLUME_DIRECTORY="."

# full path to the model store directory storing the nemo embedding model
export EMBEDDING_MODEL_DIRECTORY="/home/nvidia/nv-embed-qa_v4"

# name of the nemo embedding model
export EMBEDDING_MODEL_NAME="NV-Embed-QA"
export EMBEDDING_MODEL_CKPT_NAME="NV-Embed-QA-4.nemo"

# GPU id which nemo embedding ms will use
# export EMBEDDING_MS_GPU_ID=0

# parameters for PGVector, update this when using PGVector Vector store
# export POSTGRES_PASSWORD=password
# export POSTGRES_USER=postgres
# export POSTGRES_DB=api

# Update this line when using an external PGVector Vector store
# export POSTGRES_HOST_IP=pgvector
# export POSTGRES_PORT_NUMBER=5432

### Riva Parameters:

# Riva Speech API URI: Riva Server IP address/hostname and port
export RIVA_API_URI=""

# [OPTIONAL] Riva Speech API Key
# If necessary, enter a key to access the Riva API
export RIVA_API_KEY=""

# [OPTIONAL] Riva Function ID
# If necessary, enter a function ID to access the Riva API
export RIVA_FUNCTION_ID=""

# TTS sample rate (Hz)
export TTS_SAMPLE_RATE=48000

# the config file for the OpenTelemetry collector
export OPENTELEMETRY_CONFIG_FILE="./configs/otel-collector-config.yaml"
# the config file for Jaeger
export JAEGER_CONFIG_FILE="./configs/jaeger.yaml"