1
- ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real"
2
- ARG OMPI_VERSION="4.1.7rc1"
3
-
4
- # Build dependencies resolver stage
5
- FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
6
- WORKDIR /usr/src/text-generation-inference/backends/trtllm
7
-
8
- FROM chef AS planner
9
- COPY Cargo.lock Cargo.lock
10
- COPY Cargo.toml Cargo.toml
11
- COPY rust-toolchain.toml rust-toolchain.toml
12
- COPY router router
13
- COPY benchmark/ benchmark/
14
- COPY backends/ backends/
15
- COPY launcher/ launcher/
16
- RUN cargo chef prepare --recipe-path recipe.json
1
+ ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real"
2
+ ARG ompi_version="4.1.7rc1"
3
+ ARG build_type=release
4
+ ARG is_gha_build=false
17
5
18
6
# CUDA dependent dependencies resolver stage
19
7
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
@@ -26,8 +14,11 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
26
14
g++-14 \
27
15
git \
28
16
git-lfs \
17
+ lld \
29
18
libssl-dev \
30
19
libucx-dev \
20
+ libasan8 \
21
+ libubsan1 \
31
22
ninja-build \
32
23
pkg-config \
33
24
pipx \
@@ -43,9 +34,9 @@ ENV TENSORRT_INSTALL_PREFIX=/usr/local/tensorrt
43
34
44
35
# Install OpenMPI
45
36
FROM cuda-builder AS mpi-builder
46
- ARG OMPI_VERSION
37
+ ARG ompi_version
47
38
48
- ENV OMPI_TARBALL_FILENAME="openmpi-$OMPI_VERSION .tar.bz2"
39
+ ENV OMPI_TARBALL_FILENAME="openmpi-$ompi_version .tar.bz2"
49
40
RUN wget "https://download.open-mpi.org/release/open-mpi/v4.1/$OMPI_TARBALL_FILENAME" -P /opt/src && \
50
41
mkdir /usr/src/mpi && \
51
42
tar -xf "/opt/src/$OMPI_TARBALL_FILENAME" -C /usr/src/mpi --strip-components=1 && \
@@ -65,34 +56,56 @@ RUN chmod +x /opt/install_tensorrt.sh && \
65
56
FROM cuda-builder AS tgi-builder
66
57
WORKDIR /usr/src/text-generation-inference
67
58
59
+ # Scoped global args reuse
60
+ ARG is_gha_build
61
+ ARG build_type
62
+
68
63
# Install Rust
64
+ ENV PATH="/root/.cargo/bin:$PATH"
69
65
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y && \
70
66
chmod -R a+w /root/.rustup && \
71
- chmod -R a+w /root/.cargo
67
+ chmod -R a+w /root/.cargo && \
68
+ cargo install sccache --locked
69
+
70
+ # SCCACHE Specifics args - before finding a better, more generic, way...
71
+ ARG aws_access_key_id
72
+ ARG aws_secret_access_key
73
+ ARG aws_session_token
74
+ ARG sccache_bucket
75
+ ARG sccache_s3_key_prefix
76
+ ARG sccache_region
77
+
78
+ ENV AWS_ACCESS_KEY_ID=$aws_access_key_id
79
+ ENV AWS_SECRET_ACCESS_KEY=$aws_secret_access_key
80
+ ENV AWS_SESSION_TOKEN=$aws_session_token
81
+ ENV SCCACHE_BUCKET=$sccache_bucket
82
+ ENV SCCACHE_S3_KEY_PREFIX=$sccache_s3_key_prefix
83
+ ENV SCCACHE_REGION=$sccache_region
72
84
73
- ENV PATH="/root/.cargo/bin:$PATH"
74
- RUN cargo install cargo-chef
75
-
76
- # Cache dependencies
77
- COPY --from=planner /usr/src/text-generation-inference/backends/trtllm/recipe.json .
78
- RUN cargo chef cook --release --recipe-path recipe.json
79
-
80
- # Build actual TGI
81
- ARG CUDA_ARCH_LIST
82
- ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
83
85
ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
84
86
ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH"
87
+ ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
88
+
89
+ ENV USE_LLD_LINKER=ON
90
+ ENV CUDA_ARCH_LIST=${cuda_arch_list}
91
+ ENV IS_GHA_BUILD=${is_gha_build}
85
92
86
93
COPY Cargo.lock Cargo.lock
87
94
COPY Cargo.toml Cargo.toml
88
95
COPY rust-toolchain.toml rust-toolchain.toml
89
96
COPY router router
90
- COPY backends/trtllm backends/trtllm
97
+ COPY backends backends
98
+ COPY benchmark benchmark
99
+ COPY launcher launcher
91
100
COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
92
101
COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
102
+
93
103
RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
94
- cd backends/trtllm && \
95
- CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release
104
+ python3 backends/trtllm/scripts/setup_sccache.py --is-gha-build ${is_gha_build} && \
105
+ CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX \
106
+ RUSTC_WRAPPER=sccache \
107
+ cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \
108
+ sccache --show-stats
96
109
97
110
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime
98
111
RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
@@ -116,6 +129,28 @@ FROM runtime
116
129
117
130
LABEL co.huggingface.vendor="Hugging Face Inc."
118
131
LABEL org.opencontainers.image.authors="
[email protected] "
132
+ LABEL org.opencontainers.title="Text-Generation-Inference TensorRT-LLM Backend"
119
133
120
134
ENTRYPOINT ["./text-generation-launcher"]
121
135
CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"]
136
+
137
+ # This is used only for the CI/CD
138
+ FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS ci-runtime
139
+ RUN apt update && apt install -y libasan8 libubsan1 libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \
140
+ rm -rf /var/lib/{apt,dpkg,cache,log}/ && \
141
+ pipx ensurepath && \
142
+ pipx install --include-deps transformers tokenizers
143
+
144
+ WORKDIR /usr/local/tgi/bin
145
+
146
+ ENV PATH=/root/.local/share/pipx/venvs/transformers/bin/:$PATH
147
+ ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/mpi/lib:/usr/local/tensorrt/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
148
+ ENV TOKENIZERS_PARALLELISM=false
149
+ ENV OMPI_MCA_plm_rsh_agent=""
150
+
151
+ COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
152
+ COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
153
+ COPY --from=tgi-builder /usr/local/tgi /usr/local/tgi
154
+
155
+ # Basically we copy from target/debug instead of target/release
156
+ COPY --from=tgi-builder /usr/src/text-generation-inference/target/debug/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher
0 commit comments