diff --git a/dockerfiles/Dockerfile_cu130 b/dockerfiles/Dockerfile_cu130 new file mode 100644 index 000000000..2ae19d112 --- /dev/null +++ b/dockerfiles/Dockerfile_cu130 @@ -0,0 +1,96 @@ +FROM nvidia/cuda:13.0.3-cudnn-devel-ubuntu24.04 AS base + +WORKDIR /app + +ENV DEBIAN_FRONTEND=noninteractive +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 +ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH + +RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \ + curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \ + libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \ + && apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install + +RUN curl -fsSL -v -o /app/miniconda.sh -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" +RUN chmod +x /app/miniconda.sh && \ + bash /app/miniconda.sh -b -p /app/miniconda && \ + rm /app/miniconda.sh && \ + /app/miniconda/bin/conda install python=3.11 -y && \ + /app/miniconda/bin/conda clean -ya +ENV PATH="/app/miniconda/bin:$PATH" + +RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y + +RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U + +RUN pip install torch==2.11.0 torchvision==0.26.0 torchaudio==2.11.0 --index-url https://download.pytorch.org/whl/cu130 + +RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \ + && python use_existing_torch.py && pip install --no-cache-dir -r requirements/build/cuda.txt \ + && pip install --no-cache-dir --no-build-isolation -v -e . + +RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \ + && make build && make clean + +RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \ + imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy soundfile jsonschema pymongo modelscope \ + Pillow peft omegaconf mooncake-transfer-engine torchao scipy bson aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 \ + PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos zmq + +RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive + +RUN cd flash-attention && python setup.py install && rm -rf build + +RUN cd flash-attention/hopper && python setup.py install && rm -rf build + +RUN cd flash-attention && pip install -e "flash_attn/cute[dev,cu13]" + +RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1 + +RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir --no-build-isolation -v -e . + +RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1 + +RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build + +RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive + +RUN cd MagiAttention && MAGI_ATTENTION_BUILD_COMPUTE_CAPABILITY="90,100" pip install --no-cache-dir --no-build-isolation -v -e . + +COPY lightx2v_kernel /app/lightx2v_kernel + +RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \ + uv build --wheel \ + -Cbuild-dir=build . \ + -Ccmake.define.CUTLASS_PATH=/app/cutlass \ + --verbose \ + --color=always \ + --no-build-isolation \ + && pip install dist/*whl --force-reinstall --no-deps \ + && rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass + +RUN git clone --depth 1 https://github.com/linux-rdma/rdma-core.git && \ + cd rdma-core/ && \ + mkdir build-ibv57 && \ + cd build-ibv57 && \ + cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local -DNO_MAN_PAGES=1 && \ + make -j"$(nproc)" && \ + make install && \ + ldconfig + +# for base docker +RUN git clone https://github.com/ModelTC/q8_kernels.git --depth 1 +RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build + +RUN git clone https://github.com/ModelTC/SpargeAttn-Fix.git --depth 1 +RUN cd SpargeAttn-Fix && TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" pip install --no-cache-dir --no-build-isolation -v -e . + +# for 5090 docker +# RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1 +# RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build + +# RUN git clone https://github.com/ModelTC/SpargeAttn.git --depth 1 +# RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="12.0" pip install --no-cache-dir --no-build-isolation -v -e . + +WORKDIR /workspace