-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile.cpu
82 lines (71 loc) · 3.53 KB
/
Dockerfile.cpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -------------------------------------------------------------------------------
# Use a plain Ubuntu image for CPU-only mode.
# -------------------------------------------------------------------------------
FROM ubuntu:22.04
# -------------------------------------------------------------------------------
# Disable interactive prompts.
# -------------------------------------------------------------------------------
ENV DEBIAN_FRONTEND=noninteractive
# -------------------------------------------------------------------------------
# Install required system dependencies.
# -------------------------------------------------------------------------------
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
git \
curl \
wget \
ninja-build \
python3 \
python3-pip \
libssl-dev \
libffi-dev \
&& rm -rf /var/lib/apt/lists/*
# -------------------------------------------------------------------------------
# Set the working directory.
# -------------------------------------------------------------------------------
WORKDIR /app
# -------------------------------------------------------------------------------
# Create a cache directory and set environment variables.
# -------------------------------------------------------------------------------
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
ENV HF_HOME=/app/.cache
ENV HOME=/app
# -------------------------------------------------------------------------------
# Copy the requirements file.
# -------------------------------------------------------------------------------
COPY ./app/requirements.cpu.txt /app/
# -------------------------------------------------------------------------------
# Upgrade pip.
# -------------------------------------------------------------------------------
RUN python3 -m pip install --upgrade pip==25.0
# -------------------------------------------------------------------------------
# Force-install torch first so that auto-gptq’s metadata generation finds it.
# -------------------------------------------------------------------------------
RUN python3 -m pip install torch==2.6.0
# -------------------------------------------------------------------------------
# Install the rest of the Python dependencies.
# -------------------------------------------------------------------------------
RUN python3 -m pip install -r requirements.cpu.txt
# -------------------------------------------------------------------------------
# Clone and build llama_cpp (for GGUF quantization).
# This section clones the llama.cpp repository and builds it with performance
# optimizations enabled.
# -------------------------------------------------------------------------------
RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama_cpp
WORKDIR /app/llama_cpp
RUN mkdir build && cd build && \
cmake -DCMAKE_BUILD_TYPE=Release \ \
-DCMAKE_CXX_FLAGS="-O3 -march=native -flto" \ \
-G Ninja .. && \
ninja -j$(nproc) # Build using all available CPU cores
# -------------------------------------------------------------------------------
# Copy the rest of your application files.
# -------------------------------------------------------------------------------
COPY ./app /app
WORKDIR /app
# -------------------------------------------------------------------------------
# Expose the port (for Gradio UI, for example) and set the entrypoint.
# -------------------------------------------------------------------------------
EXPOSE 7860
CMD ["python3", "app.py"]