Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ case "${IMAGE_NAME}" in
# From https://developer.android.com/ndk/downloads
ANDROID_NDK_VERSION=r28c
;;
executorch-ubuntu-22.04-cuda-windows)
LINTRUNNER=""
GCC_VERSION=11
CUDA_WINDOWS_CROSS_COMPILE=yes
CUDA_VERSION=12.8
SKIP_PYTORCH=yes
;;
*)
echo "Invalid image name ${IMAGE_NAME}"
exit 1
Expand Down Expand Up @@ -101,6 +108,8 @@ docker build \
--build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
--build-arg "SKIP_PYTORCH=${SKIP_PYTORCH:-}" \
--build-arg "CUDA_WINDOWS_CROSS_COMPILE=${CUDA_WINDOWS_CROSS_COMPILE:-}" \
--build-arg "CUDA_VERSION=${CUDA_VERSION:-}" \
-f "${OS}"/Dockerfile \
"$@" \
.
57 changes: 57 additions & 0 deletions .ci/docker/common/install_cuda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Install Linux CUDA toolkit
# This installs nvcc and other CUDA development tools needed for compiling CUDA code

set -ex

# CUDA version must be specified (e.g., 12.8)
CUDA_VERSION="${CUDA_VERSION:?CUDA_VERSION must be set}"

# Convert version format (e.g., 12.8 -> 12-8 for package names)
CUDA_VERSION_DASH=$(echo "${CUDA_VERSION}" | tr '.' '-')

# Add NVIDIA package repository
apt-get update
apt-get install -y --no-install-recommends \
gnupg2 \
ca-certificates \
wget

# Download and install the CUDA keyring
wget -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb" -O /tmp/cuda-keyring.deb
dpkg -i /tmp/cuda-keyring.deb
rm /tmp/cuda-keyring.deb

apt-get update

# Install CUDA toolkit (nvcc and development libraries)
# We install a minimal set of packages needed for compilation:
# - cuda-nvcc: The CUDA compiler
# - cuda-cudart-dev: CUDA runtime development files
# - cuda-nvrtc-dev: CUDA runtime compilation library
# - libcublas-dev: cuBLAS development files
# - libcusparse-dev: cuSPARSE development files
# - libcufft-dev: cuFFT development files
apt-get install -y --no-install-recommends \
"cuda-nvcc-${CUDA_VERSION_DASH}" \
"cuda-cudart-dev-${CUDA_VERSION_DASH}" \
"cuda-nvrtc-dev-${CUDA_VERSION_DASH}" \
"libcublas-dev-${CUDA_VERSION_DASH}" \
"libcusparse-dev-${CUDA_VERSION_DASH}" \
"libcufft-dev-${CUDA_VERSION_DASH}"

# Clean up
apt-get clean
rm -rf /var/lib/apt/lists/*

# Verify installation
/usr/local/cuda-${CUDA_VERSION}/bin/nvcc --version

echo "CUDA ${CUDA_VERSION} toolkit installation complete"
echo "CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}"
149 changes: 149 additions & 0 deletions .ci/docker/common/install_cuda_windows_cross_compile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Install mingw-w64 cross-compiler and Windows CUDA toolkit for cross-compilation

set -ex

INSTALL_DIR="${WINDOWS_CUDA_INSTALL_DIR:-/opt/cuda-windows}"

# Mapping of CUDA versions to their corresponding driver versions for Windows installers
# Source: https://developer.nvidia.com/cuda-toolkit-archive
declare -A CUDA_DRIVER_MAP=(
["12.6"]="12.6.3:561.17"
["12.8"]="12.8.1:572.61"
["12.9"]="12.9.1:576.57"
)

install_mingw() {
echo "Installing mingw-w64 cross-compiler..."

apt-get update
# Install the POSIX threads version of mingw-w64 which supports C++11 threading
# primitives (std::mutex, std::condition_variable, std::shared_mutex).
# The default win32 threads version does not support these.
apt-get install -y --no-install-recommends \
g++-mingw-w64-x86-64-posix \
mingw-w64-tools \
p7zip-full \
wget

# Verify installation shows POSIX threads
x86_64-w64-mingw32-g++ --version

# Cleanup
apt-get clean
rm -rf /var/lib/apt/lists/*

echo "mingw-w64 installation complete (POSIX threads version)"
}

get_torch_cuda_version() {
# Query PyTorch for its CUDA version using conda environment
conda run -n "py_${PYTHON_VERSION}" python3 -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo ""
}

install_windows_cuda() {
# Get CUDA version from torch
TORCH_CUDA_VERSION=$(get_torch_cuda_version)

if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
echo "ERROR: Could not detect CUDA version from PyTorch."
echo "Make sure PyTorch with CUDA support is installed before running this script."
exit 1
fi

echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"

# Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8")
CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)

# Look up the full version and driver version
if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then
echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map."
echo "Known versions: ${!CUDA_DRIVER_MAP[*]}"
exit 1
fi

CUDA_INFO="${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}"
CUDA_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f1)
CUDA_DRIVER_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f2)

echo "Using CUDA ${CUDA_VERSION} with driver ${CUDA_DRIVER_VERSION}"

echo "Installing Windows CUDA toolkit ${CUDA_VERSION}..."

mkdir -p "${INSTALL_DIR}"
cd "${INSTALL_DIR}"

CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe"
CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${CUDA_INSTALLER}"

# Check if already downloaded and extracted
if [ -d "${INSTALL_DIR}/extracted/cuda_cudart" ]; then
echo "Windows CUDA toolkit already installed, skipping download..."
return 0
fi

echo "Downloading CUDA installer from ${CUDA_URL}..."
wget -q "${CUDA_URL}" -O "${CUDA_INSTALLER}"

echo "Extracting CUDA toolkit..."
7z x "${CUDA_INSTALLER}" -o"extracted" -y

# Fix permissions so ci-user can access the files
chmod -R a+rX "${INSTALL_DIR}"

# Clean up installer to save space
rm -f "${CUDA_INSTALLER}"

echo "Windows CUDA toolkit installation complete"
echo "WINDOWS_CUDA_HOME=${INSTALL_DIR}/extracted/cuda_cudart/cudart"
}

# Parse command line arguments
INSTALL_MINGW=false
INSTALL_CUDA=false

while [[ $# -gt 0 ]]; do
case $1 in
--mingw)
INSTALL_MINGW=true
shift
;;
--cuda)
INSTALL_CUDA=true
shift
;;
--all)
INSTALL_MINGW=true
INSTALL_CUDA=true
shift
;;
*)
echo "Unknown option: $1"
echo "Usage: $0 [--mingw] [--cuda] [--all]"
exit 1
;;
esac
done

# Default to installing everything if no options specified
if [ "${INSTALL_MINGW}" = false ] && [ "${INSTALL_CUDA}" = false ]; then
INSTALL_MINGW=true
INSTALL_CUDA=true
fi

if [ "${INSTALL_MINGW}" = true ]; then
install_mingw
fi

if [ "${INSTALL_CUDA}" = true ]; then
install_windows_cuda
fi

echo "Installation complete"
18 changes: 18 additions & 0 deletions .ci/docker/ubuntu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,23 @@ ARG QNN_SDK

ARG MEDIATEK_SDK

ARG CUDA_WINDOWS_CROSS_COMPILE
ARG CUDA_VERSION
COPY ./common/install_cuda.sh install_cuda.sh
COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_compile.sh
COPY ./common/utils.sh utils.sh
RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \
CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \
bash ./install_cuda_windows_cross_compile.sh; \
fi
RUN rm -f install_cuda.sh install_cuda_windows_cross_compile.sh utils.sh
# Set up CUDA environment for Linux compilation (nvcc, etc.)
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${PATH}
# Ensure system libstdc++ is found before conda's (GLIBCXX_3.4.30 compatibility)
ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
# Windows CUDA for cross-compilation
ENV WINDOWS_CUDA_HOME=/opt/cuda-windows/extracted/cuda_cudart/cudart

USER ci-user
CMD ["bash"]
17 changes: 13 additions & 4 deletions .ci/scripts/export_model_artifact.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,13 @@ OUTPUT_DIR="${4:-.}"
case "$DEVICE" in
cuda)
;;
cuda-windows)
;;
metal)
;;
*)
echo "Error: Unsupported device '$DEVICE'"
echo "Supported devices: cuda, metal"
echo "Supported devices: cuda, cuda-windows, metal"
exit 1
;;
esac
Expand Down Expand Up @@ -147,7 +149,7 @@ if [ -n "$MAX_SEQ_LEN" ]; then
fi

DEVICE_ARG=""
if [ "$DEVICE" = "cuda" ]; then
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
DEVICE_ARG="--device cuda"
fi

Expand All @@ -169,8 +171,15 @@ if [ -n "$PREPROCESSOR_OUTPUT" ]; then
--output_file $PREPROCESSOR_OUTPUT
fi

# Determine blob file name - cuda and cuda-windows both use aoti_cuda_blob.ptd
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
BLOB_FILE="aoti_cuda_blob.ptd"
else
BLOB_FILE="aoti_${DEVICE}_blob.ptd"
fi

test -f model.pte
test -f aoti_${DEVICE}_blob.ptd
test -f $BLOB_FILE
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
test -f $PREPROCESSOR_OUTPUT
fi
Expand All @@ -179,7 +188,7 @@ echo "::endgroup::"
echo "::group::Store $MODEL_NAME Artifacts"
mkdir -p "${OUTPUT_DIR}"
mv model.pte "${OUTPUT_DIR}/"
mv aoti_${DEVICE}_blob.ptd "${OUTPUT_DIR}/"
mv $BLOB_FILE "${OUTPUT_DIR}/"
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
mv $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
fi
Expand Down
91 changes: 91 additions & 0 deletions .github/workflows/cuda-windows.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Test ExecuTorch CUDA Windows Cross-Compilation Export
# This workflow tests model export targeting CUDA Windows using optimum-executorch.
# It runs on a Linux machine with CUDA and uses the executorch-ubuntu-22.04-cuda-windows
# Docker image which has mingw and Windows CUDA SDK pre-installed for cross-compilation.

name: Test CUDA Windows Export

on:
pull_request:
push:
branches:
- main
- release/*

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: false

jobs:
export-model-cuda-windows-artifact:
name: export-model-cuda-windows-artifact
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
secrets: inherit
strategy:
fail-fast: false
matrix:
model:
- repo: "mistralai"
name: "Voxtral-Mini-3B-2507"
- repo: "openai"
name: "whisper-small"
- repo: "openai"
name: "whisper-large-v3-turbo"
- repo: "google"
name: "gemma-3-4b-it"
quant:
- "non-quantized"
- "quantized-int4-weight-only"
exclude:
# TODO: enable int4-weight-only on gemma3.
- model:
repo: "google"
name: "gemma-3-4b-it"
quant: "quantized-int4-weight-only"
with:
timeout: 90
secrets-env: EXECUTORCH_HF_TOKEN
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: 12.8
docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows
submodules: recursive
upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }}
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
set -eux

echo "::group::Fix libstdc++ GLIBCXX version"
# The executorch pybindings require GLIBCXX_3.4.30 which conda's libstdc++ doesn't have.
# Replace conda's libstdc++ with the system version to fix ImportError.
# Verify system version has GLIBCXX_3.4.30
strings /usr/lib/x86_64-linux-gnu/libstdc++.so.6 | grep GLIBCXX_3.4.30
# Backup and replace conda's version
mv /opt/conda/lib/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6.bak || true
ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6
echo "::endgroup::"

echo "::group::Verify pre-installed dependencies"
x86_64-w64-mingw32-g++ --version
nvcc --version
echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}"
ls -la "${WINDOWS_CUDA_HOME}"
echo "::endgroup::"

echo "::group::Setup ExecuTorch"
PYTHON_EXECUTABLE=python ./install_executorch.sh
echo "::endgroup::"

echo "::group::Setup Huggingface"
pip install -U "huggingface_hub[cli]<1.0" accelerate
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
echo "::endgroup::"

source .ci/scripts/export_model_artifact.sh cuda-windows "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"
Loading
Loading