diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 97347d5e5fe..5b46e62067f 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -67,13 +67,6 @@ case "${IMAGE_NAME}" in # From https://developer.android.com/ndk/downloads ANDROID_NDK_VERSION=r28c ;; - executorch-ubuntu-22.04-cuda-windows) - LINTRUNNER="" - GCC_VERSION=11 - CUDA_WINDOWS_CROSS_COMPILE=yes - CUDA_VERSION=12.8 - SKIP_PYTORCH=yes - ;; *) echo "Invalid image name ${IMAGE_NAME}" exit 1 @@ -108,8 +101,6 @@ docker build \ --build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \ --build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \ --build-arg "SKIP_PYTORCH=${SKIP_PYTORCH:-}" \ - --build-arg "CUDA_WINDOWS_CROSS_COMPILE=${CUDA_WINDOWS_CROSS_COMPILE:-}" \ - --build-arg "CUDA_VERSION=${CUDA_VERSION:-}" \ -f "${OS}"/Dockerfile \ "$@" \ . diff --git a/.ci/docker/common/install_cuda.sh b/.ci/docker/common/install_cuda.sh deleted file mode 100644 index 8464fba0747..00000000000 --- a/.ci/docker/common/install_cuda.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Install Linux CUDA toolkit -# This installs nvcc and other CUDA development tools needed for compiling CUDA code - -set -ex - -# CUDA version must be specified (e.g., 12.8) -CUDA_VERSION="${CUDA_VERSION:?CUDA_VERSION must be set}" - -# Convert version format (e.g., 12.8 -> 12-8 for package names) -CUDA_VERSION_DASH=$(echo "${CUDA_VERSION}" | tr '.' '-') - -# Add NVIDIA package repository -apt-get update -apt-get install -y --no-install-recommends \ - gnupg2 \ - ca-certificates \ - wget - -# Download and install the CUDA keyring -wget -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb" -O /tmp/cuda-keyring.deb -dpkg -i /tmp/cuda-keyring.deb -rm /tmp/cuda-keyring.deb - -apt-get update - -# Install CUDA toolkit (nvcc and development libraries) -# We install a minimal set of packages needed for compilation: -# - cuda-nvcc: The CUDA compiler -# - cuda-cudart-dev: CUDA runtime development files -# - cuda-nvrtc-dev: CUDA runtime compilation library -# - libcublas-dev: cuBLAS development files -# - libcusparse-dev: cuSPARSE development files -# - libcufft-dev: cuFFT development files -apt-get install -y --no-install-recommends \ - "cuda-nvcc-${CUDA_VERSION_DASH}" \ - "cuda-cudart-dev-${CUDA_VERSION_DASH}" \ - "cuda-nvrtc-dev-${CUDA_VERSION_DASH}" \ - "libcublas-dev-${CUDA_VERSION_DASH}" \ - "libcusparse-dev-${CUDA_VERSION_DASH}" \ - "libcufft-dev-${CUDA_VERSION_DASH}" - -# Clean up -apt-get clean -rm -rf /var/lib/apt/lists/* - -# Verify installation -/usr/local/cuda-${CUDA_VERSION}/bin/nvcc --version - -echo "CUDA ${CUDA_VERSION} toolkit installation complete" -echo "CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}" diff --git a/.ci/docker/common/install_cuda_windows_cross_compile.sh b/.ci/docker/common/install_cuda_windows_cross_compile.sh deleted file mode 100644 index 19f41cf32aa..00000000000 --- a/.ci/docker/common/install_cuda_windows_cross_compile.sh +++ /dev/null @@ -1,149 +0,0 @@ -#!/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Install mingw-w64 cross-compiler and Windows CUDA toolkit for cross-compilation - -set -ex - -INSTALL_DIR="${WINDOWS_CUDA_INSTALL_DIR:-/opt/cuda-windows}" - -# Mapping of CUDA versions to their corresponding driver versions for Windows installers -# Source: https://developer.nvidia.com/cuda-toolkit-archive -declare -A CUDA_DRIVER_MAP=( - ["12.6"]="12.6.3:561.17" - ["12.8"]="12.8.1:572.61" - ["12.9"]="12.9.1:576.57" -) - -install_mingw() { - echo "Installing mingw-w64 cross-compiler..." - - apt-get update - # Install the POSIX threads version of mingw-w64 which supports C++11 threading - # primitives (std::mutex, std::condition_variable, std::shared_mutex). - # The default win32 threads version does not support these. - apt-get install -y --no-install-recommends \ - g++-mingw-w64-x86-64-posix \ - mingw-w64-tools \ - p7zip-full \ - wget - - # Verify installation shows POSIX threads - x86_64-w64-mingw32-g++ --version - - # Cleanup - apt-get clean - rm -rf /var/lib/apt/lists/* - - echo "mingw-w64 installation complete (POSIX threads version)" -} - -get_torch_cuda_version() { - # Query PyTorch for its CUDA version using conda environment - conda run -n "py_${PYTHON_VERSION}" python3 -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo "" -} - -install_windows_cuda() { - # Get CUDA version from torch - TORCH_CUDA_VERSION=$(get_torch_cuda_version) - - if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then - echo "ERROR: Could not detect CUDA version from PyTorch." - echo "Make sure PyTorch with CUDA support is installed before running this script." - exit 1 - fi - - echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}" - - # Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8") - CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2) - - # Look up the full version and driver version - if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then - echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map." - echo "Known versions: ${!CUDA_DRIVER_MAP[*]}" - exit 1 - fi - - CUDA_INFO="${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" - CUDA_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f1) - CUDA_DRIVER_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f2) - - echo "Using CUDA ${CUDA_VERSION} with driver ${CUDA_DRIVER_VERSION}" - - echo "Installing Windows CUDA toolkit ${CUDA_VERSION}..." - - mkdir -p "${INSTALL_DIR}" - cd "${INSTALL_DIR}" - - CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe" - CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${CUDA_INSTALLER}" - - # Check if already downloaded and extracted - if [ -d "${INSTALL_DIR}/extracted/cuda_cudart" ]; then - echo "Windows CUDA toolkit already installed, skipping download..." - return 0 - fi - - echo "Downloading CUDA installer from ${CUDA_URL}..." - wget -q "${CUDA_URL}" -O "${CUDA_INSTALLER}" - - echo "Extracting CUDA toolkit..." - 7z x "${CUDA_INSTALLER}" -o"extracted" -y - - # Fix permissions so ci-user can access the files - chmod -R a+rX "${INSTALL_DIR}" - - # Clean up installer to save space - rm -f "${CUDA_INSTALLER}" - - echo "Windows CUDA toolkit installation complete" - echo "WINDOWS_CUDA_HOME=${INSTALL_DIR}/extracted/cuda_cudart/cudart" -} - -# Parse command line arguments -INSTALL_MINGW=false -INSTALL_CUDA=false - -while [[ $# -gt 0 ]]; do - case $1 in - --mingw) - INSTALL_MINGW=true - shift - ;; - --cuda) - INSTALL_CUDA=true - shift - ;; - --all) - INSTALL_MINGW=true - INSTALL_CUDA=true - shift - ;; - *) - echo "Unknown option: $1" - echo "Usage: $0 [--mingw] [--cuda] [--all]" - exit 1 - ;; - esac -done - -# Default to installing everything if no options specified -if [ "${INSTALL_MINGW}" = false ] && [ "${INSTALL_CUDA}" = false ]; then - INSTALL_MINGW=true - INSTALL_CUDA=true -fi - -if [ "${INSTALL_MINGW}" = true ]; then - install_mingw -fi - -if [ "${INSTALL_CUDA}" = true ]; then - install_windows_cuda -fi - -echo "Installation complete" diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index 6b223cda3c3..b7478df5489 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -98,23 +98,5 @@ ARG QNN_SDK ARG MEDIATEK_SDK -ARG CUDA_WINDOWS_CROSS_COMPILE -ARG CUDA_VERSION -COPY ./common/install_cuda.sh install_cuda.sh -COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_compile.sh -COPY ./common/utils.sh utils.sh -RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \ - CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \ - bash ./install_cuda_windows_cross_compile.sh; \ - fi -RUN rm -f install_cuda.sh install_cuda_windows_cross_compile.sh utils.sh -# Set up CUDA environment for Linux compilation (nvcc, etc.) -ENV CUDA_HOME=/usr/local/cuda -ENV PATH=${CUDA_HOME}/bin:${PATH} -# Ensure system libstdc++ is found before conda's (GLIBCXX_3.4.30 compatibility) -ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} -# Windows CUDA for cross-compilation -ENV WINDOWS_CUDA_HOME=/opt/cuda-windows/extracted/cuda_cudart/cudart - USER ci-user CMD ["bash"] diff --git a/.ci/scripts/export_model_artifact.sh b/.ci/scripts/export_model_artifact.sh index 188f375202f..3c173b0ea2a 100755 --- a/.ci/scripts/export_model_artifact.sh +++ b/.ci/scripts/export_model_artifact.sh @@ -58,13 +58,11 @@ OUTPUT_DIR="${4:-.}" case "$DEVICE" in cuda) ;; - cuda-windows) - ;; metal) ;; *) echo "Error: Unsupported device '$DEVICE'" - echo "Supported devices: cuda, cuda-windows, metal" + echo "Supported devices: cuda, metal" exit 1 ;; esac @@ -149,7 +147,7 @@ if [ -n "$MAX_SEQ_LEN" ]; then fi DEVICE_ARG="" -if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then +if [ "$DEVICE" = "cuda" ]; then DEVICE_ARG="--device cuda" fi @@ -171,15 +169,8 @@ if [ -n "$PREPROCESSOR_OUTPUT" ]; then --output_file $PREPROCESSOR_OUTPUT fi -# Determine blob file name - cuda and cuda-windows both use aoti_cuda_blob.ptd -if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then - BLOB_FILE="aoti_cuda_blob.ptd" -else - BLOB_FILE="aoti_${DEVICE}_blob.ptd" -fi - test -f model.pte -test -f $BLOB_FILE +test -f aoti_${DEVICE}_blob.ptd if [ -n "$PREPROCESSOR_OUTPUT" ]; then test -f $PREPROCESSOR_OUTPUT fi @@ -188,7 +179,7 @@ echo "::endgroup::" echo "::group::Store $MODEL_NAME Artifacts" mkdir -p "${OUTPUT_DIR}" mv model.pte "${OUTPUT_DIR}/" -mv $BLOB_FILE "${OUTPUT_DIR}/" +mv aoti_${DEVICE}_blob.ptd "${OUTPUT_DIR}/" if [ -n "$PREPROCESSOR_OUTPUT" ]; then mv $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/" fi diff --git a/.github/workflows/cuda-windows.yml b/.github/workflows/cuda-windows.yml deleted file mode 100644 index eda2d4bf680..00000000000 --- a/.github/workflows/cuda-windows.yml +++ /dev/null @@ -1,91 +0,0 @@ -# Test ExecuTorch CUDA Windows Cross-Compilation Export -# This workflow tests model export targeting CUDA Windows using optimum-executorch. -# It runs on a Linux machine with CUDA and uses the executorch-ubuntu-22.04-cuda-windows -# Docker image which has mingw and Windows CUDA SDK pre-installed for cross-compilation. - -name: Test CUDA Windows Export - -on: - pull_request: - push: - branches: - - main - - release/* - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} - cancel-in-progress: false - -jobs: - export-model-cuda-windows-artifact: - name: export-model-cuda-windows-artifact - # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) - if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - secrets: inherit - strategy: - fail-fast: false - matrix: - model: - - repo: "mistralai" - name: "Voxtral-Mini-3B-2507" - - repo: "openai" - name: "whisper-small" - - repo: "openai" - name: "whisper-large-v3-turbo" - - repo: "google" - name: "gemma-3-4b-it" - quant: - - "non-quantized" - - "quantized-int4-weight-only" - exclude: - # TODO: enable int4-weight-only on gemma3. - - model: - repo: "google" - name: "gemma-3-4b-it" - quant: "quantized-int4-weight-only" - with: - timeout: 90 - secrets-env: EXECUTORCH_HF_TOKEN - runner: linux.g5.4xlarge.nvidia.gpu - gpu-arch-type: cuda - gpu-arch-version: 12.8 - docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows - submodules: recursive - upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }} - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - set -eux - - echo "::group::Fix libstdc++ GLIBCXX version" - # The executorch pybindings require GLIBCXX_3.4.30 which conda's libstdc++ doesn't have. - # Replace conda's libstdc++ with the system version to fix ImportError. - # Verify system version has GLIBCXX_3.4.30 - strings /usr/lib/x86_64-linux-gnu/libstdc++.so.6 | grep GLIBCXX_3.4.30 - # Backup and replace conda's version - mv /opt/conda/lib/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6.bak || true - ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6 - echo "::endgroup::" - - echo "::group::Verify pre-installed dependencies" - x86_64-w64-mingw32-g++ --version - nvcc --version - echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}" - ls -la "${WINDOWS_CUDA_HOME}" - echo "::endgroup::" - - echo "::group::Setup ExecuTorch" - PYTHON_EXECUTABLE=python ./install_executorch.sh - echo "::endgroup::" - - echo "::group::Setup Huggingface" - pip install -U "huggingface_hub[cli]<1.0" accelerate - huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN - OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) - pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} - echo "::endgroup::" - - source .ci/scripts/export_model_artifact.sh cuda-windows "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index 0fa4d3685f7..e3b72a6bcd6 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -46,8 +46,6 @@ jobs: include: - docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64 runner: linux.arm64.2xlarge - - docker-image-name: executorch-ubuntu-22.04-cuda-windows - runner: linux.g5.4xlarge.nvidia.gpu runs-on: [self-hosted, "${{ matrix.runner }}"] env: