Dockerfile.cpu

# -------------------------------------------------------------------------------
# Use a plain Ubuntu image for CPU-only mode.
# -------------------------------------------------------------------------------
FROM ubuntu:22.04

# -------------------------------------------------------------------------------
# Disable interactive prompts.
# -------------------------------------------------------------------------------
ENV DEBIAN_FRONTEND=noninteractive

# -------------------------------------------------------------------------------
# Install required system dependencies.
# -------------------------------------------------------------------------------
RUN apt-get update && apt-get install -y \
    build-essential \
    cmake \
    git \
    curl \
    wget \
    ninja-build \
    python3 \
    python3-pip \
    libssl-dev \
    libffi-dev \
 && rm -rf /var/lib/apt/lists/*

# -------------------------------------------------------------------------------
# Set the working directory.
# -------------------------------------------------------------------------------
WORKDIR /app

# -------------------------------------------------------------------------------
# Create a cache directory and set environment variables.
# -------------------------------------------------------------------------------
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
ENV HF_HOME=/app/.cache
ENV HOME=/app

# -------------------------------------------------------------------------------
# Copy the requirements file.
# -------------------------------------------------------------------------------
COPY ./app/requirements.cpu.txt /app/

# -------------------------------------------------------------------------------
# Upgrade pip.
# -------------------------------------------------------------------------------
RUN python3 -m pip install --upgrade pip==25.0

# -------------------------------------------------------------------------------
# Force-install torch first so that auto-gptq’s metadata generation finds it.
# -------------------------------------------------------------------------------
RUN python3 -m pip install torch==2.6.0

# -------------------------------------------------------------------------------
# Install the rest of the Python dependencies.
# -------------------------------------------------------------------------------
RUN python3 -m pip install -r requirements.cpu.txt

# -------------------------------------------------------------------------------
# Clone and build llama_cpp (for GGUF quantization).
# This section clones the llama.cpp repository and builds it with performance
# optimizations enabled.
# -------------------------------------------------------------------------------
    RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama_cpp
    WORKDIR /app/llama_cpp
    RUN mkdir build && cd build && \
        cmake -DCMAKE_BUILD_TYPE=Release \                \
              -DCMAKE_CXX_FLAGS="-O3 -march=native -flto" \  \
              -G Ninja .. && \
        ninja -j$(nproc)  # Build using all available CPU cores    

# -------------------------------------------------------------------------------
# Copy the rest of your application files.
# -------------------------------------------------------------------------------
COPY ./app /app
WORKDIR /app

# -------------------------------------------------------------------------------
# Expose the port (for Gradio UI, for example) and set the entrypoint.
# -------------------------------------------------------------------------------
EXPOSE 7860
CMD ["python3", "app.py"]