Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,5 @@ FROM base AS http

COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router

# Amazon SageMaker compatible image
FROM http AS sagemaker
COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]

# Default image
FROM http

ENTRYPOINT ["text-embeddings-router"]
CMD ["--json-output"]
60 changes: 7 additions & 53 deletions Dockerfile-cuda-all
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ FROM base-builder AS builder

ARG GIT_SHA
ARG DOCKER_LABEL
ARG VERTEX="false"

# sccache specific variables
ARG SCCACHE_GHA_ENABLED
Expand All @@ -51,39 +50,19 @@ COPY --from=planner /usr/src/recipe.json recipe.json

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \
else \
cargo chef cook --release --recipe-path recipe.json && sccache -s; \
fi;
cargo chef cook --release --recipe-path recipe.json && sccache -s;

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
fi;
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s;

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
fi;
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
fi;
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;

COPY backends backends
COPY core core
Expand All @@ -93,34 +72,19 @@ COPY Cargo.lock ./

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s; \
else \
CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \
fi;
CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s;

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \
else \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
fi;
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \
else \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
fi;
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90

Expand All @@ -142,16 +106,6 @@ COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local
COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90

# Amazon SageMaker compatible image
FROM base AS sagemaker

COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]

# Default image
FROM base

COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]
Expand Down
14 changes: 6 additions & 8 deletions cuda-all-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
#!/bin/bash

if ! command -v nvidia-smi &> /dev/null; then
if ! command -v nvidia-smi &>/dev/null; then
echo "Error: 'nvidia-smi' command not found."
exit 1
fi

compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')

if [ ${compute_cap} -eq 75 ]
then
if [ ${compute_cap} -eq 75 ]; then
exec text-embeddings-router-75 "$@"
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]
then
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then
exec text-embeddings-router-80 "$@"
elif [ ${compute_cap} -eq 90 ]
then
elif [ ${compute_cap} -eq 90 ]; then
exec text-embeddings-router-90 "$@"
else
echo "cuda compute cap ${compute_cap} is not supported"; exit 1
echo "cuda compute cap ${compute_cap} is not supported"
exit 1
fi
81 changes: 22 additions & 59 deletions sagemaker-entrypoint-cuda-all.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
#!/bin/bash

if ! command -v nvidia-smi &>/dev/null; then
echo "Error: 'nvidia-smi' command not found."
exit 1
fi

# Function to compare version numbers
verlte() {
[ "$1" = "$2" ] && return 1 || [ "$2" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
}

# CUDA compat libs logic
if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d"." -f 3-)
echo "CUDA compat package requires Nvidia driver ≤${CUDA_COMPAT_MAX_DRIVER_VERSION}"
cat /proc/driver/nvidia/version
NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module \([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
echo "Current installed Nvidia driver version is ${NVIDIA_DRIVER_VERSION}"
if [ $(verlte "$CUDA_COMPAT_MAX_DRIVER_VERSION" "$NVIDIA_DRIVER_VERSION") ]; then
echo "Setup CUDA compatibility libs path to LD_LIBRARY_PATH"
Expand All @@ -21,71 +28,27 @@ else
echo "Skip CUDA compat libs setup as package not found"
fi

# Model variables check
if [[ -z "${HF_MODEL_ID}" ]]; then
echo "HF_MODEL_ID must be set"
exit 1
echo "HF_MODEL_ID must be set"
exit 1
fi
export MODEL_ID="${HF_MODEL_ID}"

if [[ -n "${HF_MODEL_REVISION}" ]]; then
export REVISION="${HF_MODEL_REVISION}"
fi

if ! command -v nvidia-smi &> /dev/null; then
echo "Error: 'nvidia-smi' command not found."
exit 1
fi

# Query GPU name using nvidia-smi
gpu_name=$(nvidia-smi --query-gpu=gpu_name --format=csv | awk 'NR==2')
if [ $? -ne 0 ]; then
echo "Error: $gpu_name"
echo "Query gpu_name failed"
else
echo "Query gpu_name succeeded. Printing output: $gpu_name"
export REVISION="${HF_MODEL_REVISION}"
fi

# Function to get compute capability based on GPU name
get_compute_cap() {
gpu_name="$1"

# Check if the GPU name contains "A10G"
if [[ "$gpu_name" == *"A10G"* ]]; then
echo "86"
# Check if the GPU name contains "A100"
elif [[ "$gpu_name" == *"A100"* ]]; then
echo "80"
# Check if the GPU name contains "H100"
elif [[ "$gpu_name" == *"H100"* ]]; then
echo "90"
# Cover Nvidia T4
elif [[ "$gpu_name" == *"T4"* ]]; then
echo "75"
# Cover Nvidia L4
elif [[ "$gpu_name" == *"L4"* ]]; then
echo "89"
else
echo "80" # Default compute capability
fi
}

if [[ -z "${CUDA_COMPUTE_CAP}" ]]
then
compute_cap=$(get_compute_cap "$gpu_name")
echo "the compute_cap is $compute_cap"
else
compute_cap=$CUDA_COMPUTE_CAP
fi
compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')

if [[ ${compute_cap} -eq 75 ]]
then
text-embeddings-router-75 --port 8080 --json-output
elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
then
text-embeddings-router-80 --port 8080 --json-output
elif [[ ${compute_cap} -eq 90 ]]
then
text-embeddings-router-90 --port 8080 --json-output
# Router selection logic
if [ ${compute_cap} -eq 75 ]; then
exec text-embeddings-router-75 --port 8080 --json-output
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then
exec text-embeddings-router-80 --port 8080 --json-output
elif [ ${compute_cap} -eq 90 ]; then
exec text-embeddings-router-90 --port 8080 --json-output
else
echo "cuda compute cap ${compute_cap} is not supported"; exit 1
echo "cuda compute cap ${compute_cap} is not supported"
exit 1
fi
8 changes: 4 additions & 4 deletions sagemaker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/bin/bash

if [[ -z "${HF_MODEL_ID}" ]]; then
echo "HF_MODEL_ID must be set"
exit 1
echo "HF_MODEL_ID must be set"
exit 1
fi
export MODEL_ID="${HF_MODEL_ID}"

if [[ -n "${HF_MODEL_REVISION}" ]]; then
export REVISION="${HF_MODEL_REVISION}"
export REVISION="${HF_MODEL_REVISION}"
fi

text-embeddings-router --port 8080 --json-output
exec text-embeddings-router --port 8080 --json-output
Loading