-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile.gpu-cuda
107 lines (91 loc) · 4.48 KB
/
Dockerfile.gpu-cuda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Use an NVIDIA CUDA devel image (with CUDA 12.2)
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
# Disable interactive prompts.
ENV DEBIAN_FRONTEND=noninteractive
# Support a wide range of CUDA architectures.
ENV TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0"
# -------------------------------------------------------------------------------
# Install system-level dependencies.
# -------------------------------------------------------------------------------
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
git \
curl \
wget \
ninja-build \
python3 \
python3-pip \
libssl-dev \
libffi-dev \
&& rm -rf /var/lib/apt/lists/*
# -------------------------------------------------------------------------------
# Set the working directory.
# -------------------------------------------------------------------------------
WORKDIR /app
# -------------------------------------------------------------------------------
# Create a writable cache directory for Hugging Face and set environment variables.
# -------------------------------------------------------------------------------
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
ENV HF_HOME=/app/.cache
ENV HOME=/app
# -------------------------------------------------------------------------------
# Copy only the requirements file to leverage Docker cache.
# -------------------------------------------------------------------------------
COPY ./app/requirements.gpu-cuda.txt /app/
# -------------------------------------------------------------------------------
# Upgrade pip.
# -------------------------------------------------------------------------------
RUN python3 -m pip install --upgrade pip==25.0
# -------------------------------------------------------------------------------
# Force-install torch first so that auto-gptq’s metadata generation finds it.
# -------------------------------------------------------------------------------
RUN python3 -m pip install torch==2.6.0
# -------------------------------------------------------------------------------
# Install Python dependencies from the requirements file.
# -------------------------------------------------------------------------------
RUN python3 -m pip install -r requirements.gpu-cuda.txt
# -------------------------------------------------------------------------------
# Clone and build llama_cpp (for GGUF quantization).
# -------------------------------------------------------------------------------
RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama_cpp
WORKDIR /app/llama_cpp
RUN mkdir build && cd build && \
cmake -DGGML_CUDA=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs -lcuda" \
-G Ninja .. && \
ninja -j$(nproc)
# -------------------------------------------------------------------------------
# Clone and build AutoAWQ (for AWQ quantization).
# -------------------------------------------------------------------------------
WORKDIR /app
RUN git clone https://github.com/casper-hansen/AutoAWQ.git /app/AutoAWQ && \
cd /app/AutoAWQ && \
git checkout v0.2.4 && \
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) && \
python3 -m pip install -e .
# -------------------------------------------------------------------------------
# Clone and install exllamav2 (for EXL2 quantization).
# -------------------------------------------------------------------------------
RUN git clone https://github.com/turboderp-org/exllamav2.git /app/exllamav2 && \
cd /app/exllamav2 && \
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) && \
python3 -m pip install -e . && \
cd /app
# -------------------------------------------------------------------------------
# Copy the rest of the project files.
# -------------------------------------------------------------------------------
COPY ./app /app
# -------------------------------------------------------------------------------
# Reset the working directory to /app.
# -------------------------------------------------------------------------------
WORKDIR /app
# -------------------------------------------------------------------------------
# Expose the port for the Gradio UI.
# -------------------------------------------------------------------------------
EXPOSE 7860
# -------------------------------------------------------------------------------
# Set the entrypoint to run your application.
# -------------------------------------------------------------------------------
CMD ["python3", "app.py"]