1
- FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
2
- ARG CPU_ONLY=false
1
+ FROM python:3.12-slim-bookworm
3
2
4
- WORKDIR /app
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ libgl1 libglib2.0-0 curl wget git procps \
6
+ && rm -rf /var/lib/apt/lists/*
5
7
6
- # Install build dependencies
7
- RUN apt-get update && \
8
- apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \
9
- rm -rf /var/lib/apt/lists/*
8
+ # Copy UV from official image
9
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
10
10
11
- # Enable bytecode compilation and set proper link mode for cache mounting
12
11
ENV UV_COMPILE_BYTECODE=1 \
13
12
UV_LINK_MODE=copy \
14
- HF_HOME=/app/.cache/huggingface \
15
- TORCH_HOME=/app/.cache/torch \
16
- PYTHONPATH=/app \
13
+ UV_SYSTEM_PYTHON=1 \
14
+ HF_HOME=/tmp/ \
15
+ TORCH_HOME=/tmp/ \
17
16
OMP_NUM_THREADS=4
18
17
19
- # Copy dependency files and README
20
- COPY pyproject.toml uv.lock README.md ./
18
+ WORKDIR /app
21
19
22
- # Install dependencies but not the project itself
20
+ RUN echo "# Docling API" > README.md
21
+
22
+ # Install dependencies first (for better layer caching)
23
+ COPY pyproject.toml uv.lock ./
23
24
RUN --mount=type=cache,target=/root/.cache/uv \
24
25
uv sync --frozen --no-install-project
25
26
26
- # Copy the rest of the project
27
- COPY . .
28
-
29
- # Better GPU detection: Check both architecture and if NVIDIA is available
30
- RUN ARCH=$(uname -m) && \
31
- if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
32
- USE_GPU=false; \
33
- else \
34
- USE_GPU=true; \
35
- fi && \
36
- echo "Detected GPU availability: $USE_GPU" && \
37
- # For PyTorch installation with architecture detection
38
- uv pip uninstall -y torch torchvision torchaudio || true && \
39
- if [ "$USE_GPU" = "false" ]; then \
40
- # For CPU or ARM architectures or no NVIDIA
41
- echo "Installing PyTorch for CPU" && \
42
- uv pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
27
+ ARG CPU_ONLY=false
28
+ RUN if [ "$CPU_ONLY" = "true" ]; then \
29
+ uv pip install --system --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
43
30
else \
44
- # For x86_64 with GPU support
45
- echo "Installing PyTorch with CUDA support" && \
46
- uv pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
31
+ uv pip install --system --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
47
32
fi
48
33
49
- # Install the project in non-editable mode
34
+ # Install required packages
50
35
RUN --mount=type=cache,target=/root/.cache/uv \
51
- uv sync --frozen --no-editable
52
-
53
- # Download models for the pipeline
54
- RUN uv run python -c "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True)"
36
+ uv pip install --system --no-cache-dir docling easyocr
55
37
56
- # Pre-download EasyOCR models with better GPU detection
57
- RUN ARCH=$(uname -m) && \
58
- if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
59
- echo "Downloading EasyOCR models for CPU" && \
60
- uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=False); print('EasyOCR CPU models downloaded successfully')" ; \
61
- else \
62
- echo "Downloading EasyOCR models with GPU support" && \
63
- uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=True); print('EasyOCR GPU models downloaded successfully')" ; \
64
- fi
65
-
66
- # Production stage
67
- FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
68
- WORKDIR /app
69
-
70
- # Install runtime dependencies
71
- RUN apt-get update && \
72
- apt-get install -y --no-install-recommends redis-server libgl1 libglib2.0-0 curl && \
73
- rm -rf /var/lib/apt/lists/*
38
+ # Download models in a single step
39
+ RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; \
40
+ from easyocr import Reader; \
41
+ artifacts_path = StandardPdfPipeline.download_models_hf(force=True); \
42
+ reader = Reader(["fr", "de", "es", "en", "it", "pt"], gpu=True); \
43
+ print("Models downloaded successfully")'
74
44
75
- # Set environment variables
76
- ENV HF_HOME=/app/.cache/huggingface \
77
- TORCH_HOME=/app/.cache/torch \
78
- PYTHONPATH=/app \
79
- OMP_NUM_THREADS=4 \
80
- UV_COMPILE_BYTECODE=1
81
-
82
- # Create a non-root user
83
- RUN useradd --create-home app && \
84
- mkdir -p /app && \
85
- chown -R app:app /app /tmp
86
-
87
- # Copy the virtual environment from the builder stage
88
- COPY --from=builder --chown=app:app /app/.venv /app/.venv
89
- ENV PATH="/app/.venv/bin:$PATH"
45
+ # Copy the application code
46
+ COPY . .
90
47
91
- # Copy necessary files for the application
92
- COPY --chown=app:app . .
48
+ # Final dependency sync
49
+ RUN --mount=type=cache,target=/root/.cache/uv uv sync --frozen
93
50
94
- # Switch to non-root user
95
- USER app
51
+ # Remove cache to save space
52
+ RUN rm -rf /root/.cache/uv
96
53
97
54
EXPOSE 8080
98
- CMD ["uvicorn" , "main:app" , "--port" , "8080" , "--host" , "0.0.0.0" ]
55
+
56
+ CMD ["uv" , "run" , "uvicorn" , "--port" , "8080" , "--host" , "0.0.0.0" , "main:app" ]
0 commit comments