dmlc · hcho3 · Dec 9, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml
@@ -61,6 +61,8 @@ jobs:
             runner: linux-arm64-cpu
           - image_repo: xgb-ci.manylinux_2_28_aarch64
             runner: linux-arm64-cpu
+          - image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64
+            runner: linux-arm64-cpu
     steps:
       - name: Workflow trigger information
         run: |

diff --git a/containers/ci_container.yml b/containers/ci_container.yml
@@ -22,6 +22,14 @@ xgb-ci.gpu_build_cuda13_rockylinux8:
   build_args:
     CUDA_VERSION: "13.0.0"
     NCCL_VERSION: *nccl_version
+    ARCH: x86_64
+
+xgb-ci.gpu_build_cuda13_rockylinux8_aarch64:
+  container_def: gpu_build_cuda13_rockylinux8
+  build_args:
+    CUDA_VERSION: "13.0.0"
+    NCCL_VERSION: *nccl_version
+    ARCH: aarch64
 
 xgb-ci.gpu_build_r_rockylinux8:
   container_def: gpu_build_r_rockylinux8

diff --git a/containers/dockerfile/Dockerfile.gpu_build_cuda13_rockylinux8 b/containers/dockerfile/Dockerfile.gpu_build_cuda13_rockylinux8
@@ -2,7 +2,8 @@ ARG CUDA_VERSION=notset
 FROM nvcr.io/nvidia/cuda:$CUDA_VERSION-devel-rockylinux8
 ARG CUDA_VERSION
 ARG NCCL_VERSION
-ARG MINIFORGE_VERSION=25.3.1-0
+ARG ARCH=x86_64
+ARG MINIFORGE_VERSION=25.11.0-1
 ARG CMAKE_VERSION=4.1.0
 
 SHELL ["/bin/bash", "-c"]
@@ -16,30 +17,33 @@ ENV GOSU_VERSION=1.10
 
 # Install all basic requirements
 RUN \
-    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \
+    { [ $ARCH = "aarch64" ] && export CUDA_REPO_ARCH="sbsa" || export CUDA_REPO_ARCH="x86_64"; } && \
+    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${CUDA_REPO_ARCH}/D42D0685.pub | sed '/^Version/d' \
         > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
     dnf -y update && \
     dnf -y install dnf-plugins-core && \
     dnf config-manager --set-enabled powertools && \
     dnf install -y tar unzip wget xz git which ninja-build gcc-toolset-10-gcc gcc-toolset-10-binutils gcc-toolset-10-gcc-c++ && \
     # Miniforge
-    wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/$MINIFORGE_VERSION/Miniforge3-$MINIFORGE_VERSION-Linux-x86_64.sh && \
+    wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/$MINIFORGE_VERSION/Miniforge3-$MINIFORGE_VERSION-Linux-${ARCH}.sh && \
     bash conda.sh -b -p /opt/miniforge && \
     /opt/miniforge/bin/python -m pip install awscli && \
     # CMake
-    wget -nv -O cmake.sh https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.sh && \
+    wget -nv -O cmake.sh https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${ARCH}.sh && \
     bash cmake.sh --skip-license --prefix=/usr
 
 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
+    { [ $ARCH = "aarch64" ] && export CUDA_REPO_ARCH="sbsa" || export CUDA_REPO_ARCH="x86_64"; } && \
     export NCCL_VERSION=$NCCL_VERSION && \
-    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && \
+    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${CUDA_REPO_ARCH}/cuda-rhel8.repo && \
     dnf -y update && \
     dnf install -y libnccl-${NCCL_VERSION}+cuda13.0 libnccl-devel-${NCCL_VERSION}+cuda13.0 libnccl-static-${NCCL_VERSION}+cuda13.0
 
 # Install lightweight sudo (not bound to TTY)
 RUN set -ex; \
-    wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
+    { [ $ARCH = "aarch64" ] && export GOSU_ARCH="arm64" || export GOSU_ARCH="amd64"; } && \
+    wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-${GOSU_ARCH}" && \
     chmod +x /usr/local/bin/gosu && \
     gosu nobody true
 

diff --git a/containers/dockerfile/Dockerfile.gpu_build_r_rockylinux8 b/containers/dockerfile/Dockerfile.gpu_build_r_rockylinux8
@@ -18,7 +18,7 @@ ENV GOSU_VERSION=1.10
 
 # Install all basic requirements
 RUN \
-    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \
+    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/D42D0685.pub | sed '/^Version/d' \
         > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
     dnf -y update && \
     dnf -y install dnf-plugins-core && \

diff --git a/containers/dockerfile/Dockerfile.gpu_build_rockylinux8 b/containers/dockerfile/Dockerfile.gpu_build_rockylinux8
@@ -17,7 +17,7 @@ ENV GOSU_VERSION=1.10
 
 # Install all basic requirements
 RUN \
-    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \
+    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/D42D0685.pub | sed '/^Version/d' \
         > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
     dnf -y update && \
     dnf -y install dnf-plugins-core && \
@@ -34,7 +34,7 @@ RUN \
 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
     export NCCL_VERSION=$NCCL_VERSION && \
-    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
+    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && \
     dnf -y update && \
     dnf install -y libnccl-${NCCL_VERSION}+cuda12.9 libnccl-devel-${NCCL_VERSION}+cuda12.9
 

diff --git a/containers/dockerfile/Dockerfile.jvm_gpu_build b/containers/dockerfile/Dockerfile.jvm_gpu_build
@@ -36,7 +36,7 @@ RUN \
 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
     export NCCL_VERSION=$NCCL_VERSION && \
-    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
+    dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && \
     dnf -y update && \
     dnf install -y libnccl-${NCCL_VERSION}+cuda12.9 libnccl-devel-${NCCL_VERSION}+cuda12.9 libnccl-static-${NCCL_VERSION}+cuda12.9
 

diff --git a/vm_images/linux-arm64/bootstrap.sh b/vm_images/linux-arm64/bootstrap.sh
@@ -31,6 +31,19 @@ sudo systemctl is-active --quiet docker.service || sudo systemctl start docker.s
 sudo systemctl is-enabled --quiet docker.service || sudo systemctl enable docker.service
 sleep 10  # Docker daemon takes time to come up after installing
 sudo docker info
+
+## Install NVIDIA Container Toolkit
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+  && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
+    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
+    sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+sudo apt-get update
+sudo apt-get install -y nvidia-container-toolkit
+sudo nvidia-ctk runtime configure --runtime=docker
+sudo systemctl restart docker
+
+sleep 10
+sudo docker run --rm --gpus all ubuntu nvidia-smi
 sudo systemctl stop docker
 
 ## Install AWS CLI v2

diff --git a/vm_images/linux-arm64/install_drivers.sh b/vm_images/linux-arm64/install_drivers.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -euo pipefail
+
+## Install basic tools
+echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
+sudo apt-get update
+sudo apt-get install -y cmake git build-essential wget ca-certificates curl unzip
+
+## Install CUDA Driver 580
+wget -nv https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt-get update
+sudo apt-get -y install nvidia-open-580
+rm cuda-keyring_1.1-1_all.deb
diff --git a/vm_images/linux-arm64/linux-arm64.pkr.hcl b/vm_images/linux-arm64/linux-arm64.pkr.hcl
@@ -9,7 +9,7 @@ packer {
 
 locals {
   ami_name_prefix = "xgboost-ci"
-  image_name      = "RunsOn worker with Ubuntu 24.04 ARM64"
+  image_name      = "RunsOn worker with Ubuntu 24.04 ARM64 + CUDA driver 580"
   region          = "us-west-2"
   timestamp       = regex_replace(timestamp(), "[- TZ:]", "")
   volume_size     = 40
@@ -33,7 +33,7 @@ source "amazon-ebs" "runs-on-linux-arm64" {
   ami_virtualization_type     = "hvm"
   associate_public_ip_address = true
   communicator                = "ssh"
-  instance_type               = "c6g.4xlarge"
+  instance_type               = "g5g.xlarge"
   region                      = "${local.region}"
   ssh_timeout                 = "10m"
   ssh_username                = "ubuntu"
@@ -63,6 +63,17 @@ build {
   sources = ["source.amazon-ebs.runs-on-linux-arm64"]
 
   provisioner "shell" {
-    script = "bootstrap.sh"
+    script      = "install_drivers.sh"
+    pause_after = "30s"
+  }
+
+  provisioner "shell" {
+    expect_disconnect = true
+    inline            = ["echo 'Reboot VM'", "sudo reboot"]
+  }
+
+  provisioner "shell" {
+    pause_before = "1m0s"
+    script       = "bootstrap.sh"
   }
 }