Skip to content

Commit 2d3e014

Browse files
committed
smaller dockerfile
1 parent f8fc3fd commit 2d3e014

File tree

1 file changed

+35
-77
lines changed

1 file changed

+35
-77
lines changed

Dockerfile

+35-77
Original file line numberDiff line numberDiff line change
@@ -1,98 +1,56 @@
1-
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
2-
ARG CPU_ONLY=false
1+
FROM python:3.12-slim-bookworm
32

4-
WORKDIR /app
3+
# Install system dependencies
4+
RUN apt-get update && apt-get install -y --no-install-recommends \
5+
libgl1 libglib2.0-0 curl wget git procps \
6+
&& rm -rf /var/lib/apt/lists/*
57

6-
# Install build dependencies
7-
RUN apt-get update && \
8-
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \
9-
rm -rf /var/lib/apt/lists/*
8+
# Copy UV from official image
9+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
1010

11-
# Enable bytecode compilation and set proper link mode for cache mounting
1211
ENV UV_COMPILE_BYTECODE=1 \
1312
UV_LINK_MODE=copy \
14-
HF_HOME=/app/.cache/huggingface \
15-
TORCH_HOME=/app/.cache/torch \
16-
PYTHONPATH=/app \
13+
UV_SYSTEM_PYTHON=1 \
14+
HF_HOME=/tmp/ \
15+
TORCH_HOME=/tmp/ \
1716
OMP_NUM_THREADS=4
1817

19-
# Copy dependency files and README
20-
COPY pyproject.toml uv.lock README.md ./
18+
WORKDIR /app
2119

22-
# Install dependencies but not the project itself
20+
RUN echo "# Docling API" > README.md
21+
22+
# Install dependencies first (for better layer caching)
23+
COPY pyproject.toml uv.lock ./
2324
RUN --mount=type=cache,target=/root/.cache/uv \
2425
uv sync --frozen --no-install-project
2526

26-
# Copy the rest of the project
27-
COPY . .
28-
29-
# Better GPU detection: Check both architecture and if NVIDIA is available
30-
RUN ARCH=$(uname -m) && \
31-
if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
32-
USE_GPU=false; \
33-
else \
34-
USE_GPU=true; \
35-
fi && \
36-
echo "Detected GPU availability: $USE_GPU" && \
37-
# For PyTorch installation with architecture detection
38-
uv pip uninstall -y torch torchvision torchaudio || true && \
39-
if [ "$USE_GPU" = "false" ]; then \
40-
# For CPU or ARM architectures or no NVIDIA
41-
echo "Installing PyTorch for CPU" && \
42-
uv pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
27+
ARG CPU_ONLY=false
28+
RUN if [ "$CPU_ONLY" = "true" ]; then \
29+
uv pip install --system --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
4330
else \
44-
# For x86_64 with GPU support
45-
echo "Installing PyTorch with CUDA support" && \
46-
uv pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
31+
uv pip install --system --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
4732
fi
4833

49-
# Install the project in non-editable mode
34+
# Install required packages
5035
RUN --mount=type=cache,target=/root/.cache/uv \
51-
uv sync --frozen --no-editable
52-
53-
# Download models for the pipeline
54-
RUN uv run python -c "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True)"
36+
uv pip install --system --no-cache-dir docling easyocr
5537

56-
# Pre-download EasyOCR models with better GPU detection
57-
RUN ARCH=$(uname -m) && \
58-
if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
59-
echo "Downloading EasyOCR models for CPU" && \
60-
uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=False); print('EasyOCR CPU models downloaded successfully')"; \
61-
else \
62-
echo "Downloading EasyOCR models with GPU support" && \
63-
uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=True); print('EasyOCR GPU models downloaded successfully')"; \
64-
fi
65-
66-
# Production stage
67-
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
68-
WORKDIR /app
69-
70-
# Install runtime dependencies
71-
RUN apt-get update && \
72-
apt-get install -y --no-install-recommends redis-server libgl1 libglib2.0-0 curl && \
73-
rm -rf /var/lib/apt/lists/*
38+
# Download models in a single step
39+
RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; \
40+
from easyocr import Reader; \
41+
artifacts_path = StandardPdfPipeline.download_models_hf(force=True); \
42+
reader = Reader(["fr", "de", "es", "en", "it", "pt"], gpu=True); \
43+
print("Models downloaded successfully")'
7444

75-
# Set environment variables
76-
ENV HF_HOME=/app/.cache/huggingface \
77-
TORCH_HOME=/app/.cache/torch \
78-
PYTHONPATH=/app \
79-
OMP_NUM_THREADS=4 \
80-
UV_COMPILE_BYTECODE=1
81-
82-
# Create a non-root user
83-
RUN useradd --create-home app && \
84-
mkdir -p /app && \
85-
chown -R app:app /app /tmp
86-
87-
# Copy the virtual environment from the builder stage
88-
COPY --from=builder --chown=app:app /app/.venv /app/.venv
89-
ENV PATH="/app/.venv/bin:$PATH"
45+
# Copy the application code
46+
COPY . .
9047

91-
# Copy necessary files for the application
92-
COPY --chown=app:app . .
48+
# Final dependency sync
49+
RUN --mount=type=cache,target=/root/.cache/uv uv sync --frozen
9350

94-
# Switch to non-root user
95-
USER app
51+
# Remove cache to save space
52+
RUN rm -rf /root/.cache/uv
9653

9754
EXPOSE 8080
98-
CMD ["uvicorn", "main:app", "--port", "8080", "--host", "0.0.0.0"]
55+
56+
CMD ["uv", "run", "uvicorn", "--port", "8080", "--host", "0.0.0.0", "main:app"]

0 commit comments

Comments
 (0)