diff --git a/.ci/docker/README.md b/.ci/docker/README.md new file mode 100644 index 000000000..9995dd684 --- /dev/null +++ b/.ci/docker/README.md @@ -0,0 +1,17 @@ +# Docker images for GitHub CI and CD + +This directory contains everything needed to build the Docker images +that are used in our CI tests. + +## Docker CI builds + +* `pytorch/manylinux2_28-builder:xpu-main` -- can use pytorch CICD image directly + +## Docker CI tests + +If also use this for build, need install Intel® Deep Learning Essentials, +refer to [Intel® Deep Learning Essentials](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?packages=dl-essentials&dl-essentials-os=linux&dl-lin=offline) +```bash +# Build a specific image for tests +docker build --build-arg UBUNTU_VERSION=22.04 --file ubuntu/Dockerfile --build-arg XPU_DRIVER_TYPE=LTS2 . +``` diff --git a/.ci/docker/common/install_xpu.sh b/.ci/docker/common/install_xpu.sh new file mode 100644 index 000000000..b2d3938c8 --- /dev/null +++ b/.ci/docker/common/install_xpu.sh @@ -0,0 +1,210 @@ +#!/bin/bash +set -xe +# Script used in CI and CD pipeline + +# Intel® software for general purpose GPU capabilities. +# Refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html + +# Users should update to the latest version as it becomes available + +function install_ubuntu() { + . /etc/os-release + if [ "${XPU_DRIVER_TYPE,,}" == "lts" ]; then + if [[ ! " jammy " =~ " ${VERSION_CODENAME} " ]]; then + echo "Ubuntu version ${VERSION_CODENAME} with ${XPU_DRIVER_TYPE} not supported" + exit 1 + fi + else + if [[ ! " jammy noble " =~ " ${VERSION_CODENAME} " ]]; then + echo "Ubuntu version ${VERSION_CODENAME} with ${XPU_DRIVER_TYPE} not supported" + exit 1 + fi + fi + + apt-get update -y + apt-get install -y gpg-agent wget + # To add the online network package repository for the GPU Driver + wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \ + | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] \ + https://repositories.intel.com/gpu/ubuntu ${VERSION_CODENAME}${XPU_DRIVER_VERSION} unified" \ + | tee /etc/apt/sources.list.d/intel-gpu-${VERSION_CODENAME}.list + + # Update the packages list and repository index + apt-get update + + # The xpu-smi packages + apt-get install -y flex bison xpu-smi + if [ "${XPU_DRIVER_TYPE,,}" == "lts" ]; then + # Compute and Media Runtimes + apt-get install -y \ + intel-opencl-icd intel-level-zero-gpu level-zero \ + intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \ + libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ + libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ + mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo + # Development Packages + apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev + else # rolling or lts2 driver + if [ "${VERSION_CODENAME}" == "jammy" ];then + apt-get install -y \ + intel-opencl-icd libze-intel-gpu1 libze1 \ + intel-media-va-driver-non-free libmfx-gen1 libvpl2 \ + libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ + libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ + mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc + else + apt-get install -y \ + intel-opencl-icd libze-intel-gpu1 libze1 \ + intel-media-va-driver-non-free libmfx-gen1 libvpl2 \ + libegl-mesa0 libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ + libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ + mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc + fi + apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev + fi + + # Cleanup + apt-get autoclean && apt-get clean + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +} + +function install_rhel() { + . /etc/os-release + if [ "${XPU_DRIVER_TYPE,,}" == "lts" ]; then + if [[ "${ID}" == "rhel" ]]; then + if [[ ! " 8.8 8.10 9.2 9.4 9.5 " =~ " ${VERSION_ID} " ]]; then + echo "RHEL version ${VERSION_ID} with ${XPU_DRIVER_TYPE} not supported" + exit 1 + fi + elif [[ "${ID}" == "almalinux" ]]; then + # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64 + VERSION_ID="8.8" + fi + elif [ "${XPU_DRIVER_TYPE,,}" == "lts2" ]; then + if [[ "${ID}" == "rhel" ]]; then + if [[ ! " 8.10 9.4 9.6 10.0 " =~ " ${VERSION_ID} " ]]; then + echo "RHEL version ${VERSION_ID} with ${XPU_DRIVER_TYPE} not supported" + exit 1 + fi + elif [[ "${ID}" == "almalinux" ]]; then + # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64 + VERSION_ID="8.10" + fi + else # rolling driver + if [[ "${ID}" == "rhel" ]]; then + if [[ ! " 8.10 9.4 9.6 " =~ " ${VERSION_ID} " ]]; then + echo "RHEL version ${VERSION_ID} with ${XPU_DRIVER_TYPE} not supported" + exit 1 + fi + elif [[ "${ID}" == "almalinux" ]]; then + # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64 + VERSION_ID="8.10" + fi + fi + + dnf install -y 'dnf-command(config-manager)' + # To add the online network package repository for the GPU Driver + dnf config-manager --add-repo \ + https://repositories.intel.com/gpu/rhel/${VERSION_ID}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_ID}.repo + + # Install Intel Support Packages + yum install -y ${XPU_PACKAGES} + # The xpu-smi packages + dnf install -y flex bison xpu-smi + # Compute and Media Runtimes + if [ "${XPU_DRIVER_TYPE,,}" == "lts" ]; then + dnf install --skip-broken -y \ + intel-opencl intel-media libmfxgen1 libvpl2 \ + level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \ + mesa-vdpau-drivers mesa-libEGL mesa-libgbm mesa-libGL \ + mesa-libxatracker libvpl-tools intel-metrics-discovery \ + intel-metrics-library intel-igc-core intel-igc-cm \ + libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc + else + dnf install --skip-broken -y \ + intel-opencl intel-media libmfxgen1 libvpl2 \ + level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \ + mesa-vdpau-drivers libdrm mesa-libEGL mesa-libgbm mesa-libGL \ + mesa-libxatracker libvpl-tools intel-metrics-discovery \ + intel-metrics-library intel-igc-core intel-igc-cm \ + libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc + fi + # Development packages + dnf install -y --refresh intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel + dnf install --enablerepo epel -y hwinfo clinfo + + # Cleanup + dnf clean all + rm -rf /var/cache/yum + rm -rf /var/lib/yum/yumdb + rm -rf /var/lib/yum/history +} + +function install_sles() { + . /etc/os-release + VERSION_SP=${VERSION_ID//./sp} + if [ "${XPU_DRIVER_TYPE,,}" == "lts" ]; then + if [[ ! " 15sp4 15sp5 15sp6 " =~ " ${VERSION_SP} " ]]; then + echo "SLES version ${VERSION_ID} with ${XPU_DRIVER_TYPE} not supported" + exit + fi + elif [ "${XPU_DRIVER_TYPE,,}" == "lts2" ]; then + if [[ ! " 15sp4 15sp5 15sp6 15sp7" =~ " ${VERSION_SP} " ]]; then + echo "SLES version ${VERSION_ID} with ${XPU_DRIVER_TYPE} not supported" + exit + fi + else # rolling + if [[ ! " 15sp4 15sp5 15sp6 " =~ " ${VERSION_SP} " ]]; then + echo "SLES version ${VERSION_ID} with ${XPU_DRIVER_TYPE} not supported" + exit + fi + fi + + # To add the online network package repository for the GPU Driver + zypper addrepo -f -r \ + https://repositories.intel.com/gpu/sles/${VERSION_SP}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_SP}.repo + rpm --import https://repositories.intel.com/gpu/intel-graphics.key + + # The xpu-smi packages + zypper install -y lsb-release flex bison xpu-smi + # Compute and Media Runtimes + zypper install -y \ + intel-level-zero-gpu level-zero intel-gsc intel-opencl intel-ocloc \ + intel-media-driver libigfxcmrt7 libvpl2 libvpl-tools libmfxgen1 + if [ "${XPU_DRIVER_TYPE,,}" == "lts" ]; then + zypper install -y libmfx1 + fi + # Development packages + zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel + zypper install -y clinfo libOpenCL1 libva-utils hwinfo + +} + +# Default use GPU driver rolling releases +XPU_DRIVER_VERSION="" +if [ "${XPU_DRIVER_TYPE,,}" == "lts" ]; then + # Use GPU driver LTS releases + XPU_DRIVER_VERSION="/lts/2350" +elif [ "${XPU_DRIVER_TYPE,,}" == "lts2" ]; then + # Use GPU driver LTS releases + XPU_DRIVER_VERSION="/lts/2523" +fi + +# The installation depends on the base OS +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +case "$ID" in + ubuntu) + install_ubuntu + ;; + rhel|almalinux) + install_rhel + ;; + sles) + install_sles + ;; + *) + echo "Unable to determine OS..." + exit 1 + ;; +esac diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile new file mode 100644 index 000000000..ac2557e03 --- /dev/null +++ b/.ci/docker/ubuntu/Dockerfile @@ -0,0 +1,23 @@ +ARG UBUNTU_VERSION + +FROM ubuntu:${UBUNTU_VERSION} + +ENV DEBIAN_FRONTEND noninteractive + +# install Intel GPU driver LTS2, refer to https://dgpu-docs.intel.com/driver/installation-lts2.html +ARG XPU_DRIVER_TYPE +ENV XPU_DRIVER_TYPE ${XPU_DRIVER_TYPE} + +COPY ./common/install_xpu.sh install_xpu.sh +RUN bash ./install_xpu.sh && rm -f install_xpu.sh + +# install extra packages for pytorch benchmark +RUN apt-get update && \ + apt-get install -y wget curl sudo git unzip zip gh numactl rsync jq && \ + apt-get install -y gcc g++ cmake libgl1 zlib1g-dev libglib2.0-dev && \ + apt-get install -y libnl-genl-3-200 && \ + apt-get autoclean && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + + +USER jenkins +CMD ["bash"] diff --git a/.github/scripts/lintrunner.sh b/.github/scripts/lintrunner.sh index 12a9ac6e6..91ed49da4 100755 --- a/.github/scripts/lintrunner.sh +++ b/.github/scripts/lintrunner.sh @@ -24,7 +24,7 @@ if ! command -v lintrunner &> /dev/null; then fi # Ignoring errors in one specific run -export SHELLCHECK_OPTS="-e SC2154 -e SC2086 -e SC1091 -e SC2046" +export SHELLCHECK_OPTS="-e SC2154 -e SC2086 -e SC1091 -e SC2046 -e SC2076" # This has already been cached in the docker image lintrunner init 2> /dev/null diff --git a/.github/workflows/_linux_accelerate.yml b/.github/workflows/_linux_accelerate.yml index fbaa0c3f0..a8e17ba0b 100644 --- a/.github/workflows/_linux_accelerate.yml +++ b/.github/workflows/_linux_accelerate.yml @@ -92,7 +92,7 @@ jobs: runs-on: ${{ needs.prepare.outputs.runner_id }} needs: prepare container: - image: mengfeili/intel-pvc-driver:1146-1136 + image: intelgpu/ubuntu-22.04-lts2:2523.31 volumes: - ${{ github.workspace }}:${{ github.workspace }} options: --device=/dev/mem --device=/dev/dri --group-add video --group-add ${{ needs.prepare.outputs.render_id }} diff --git a/.github/workflows/_linux_e2e.yml b/.github/workflows/_linux_e2e.yml index 08f88462e..bb1f66c61 100644 --- a/.github/workflows/_linux_e2e.yml +++ b/.github/workflows/_linux_e2e.yml @@ -78,7 +78,7 @@ jobs: needs: runner timeout-minutes: 3600 container: - image: mengfeili/intel-pvc-driver:1146-1136 + image: intelgpu/ubuntu-22.04-lts2:2523.31 volumes: - ${{ github.workspace }}:${{ github.workspace }} options: --device=/dev/mem --device=/dev/dri --group-add video --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g diff --git a/.github/workflows/_linux_op_benchmark.yml b/.github/workflows/_linux_op_benchmark.yml index e8474d7ab..fa01b43a8 100644 --- a/.github/workflows/_linux_op_benchmark.yml +++ b/.github/workflows/_linux_op_benchmark.yml @@ -53,7 +53,7 @@ jobs: runs-on: ${{ needs.runner.outputs.runner_id }} timeout-minutes: 900 container: - image: mengfeili/intel-pvc-driver:1146-1136 + image: intelgpu/ubuntu-22.04-lts2:2523.31 volumes: - ${{ github.workspace }}:${{ github.workspace }} options: --device=/dev/mem --device=/dev/dri --group-add video --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g diff --git a/.github/workflows/_linux_test_image.yml b/.github/workflows/_linux_test_image.yml new file mode 100644 index 000000000..a26b56eb0 --- /dev/null +++ b/.github/workflows/_linux_test_image.yml @@ -0,0 +1,73 @@ +name: Linux Image for Tests + +on: + pull_request: + branches: + - main + paths: + - '.ci/docker/common/install_xpu.sh' + - '.ci/docker/ubuntu/Dockerfile' + - '.github/workflows/_linux_test_image.yml' + workflow_call: + inputs: + runner: + type: string + default: 'ubuntu-24.04' + description: Runner label + driver: + required: true + type: string + default: 'lts2' + description: Driver version, lts, lts2 or rolling + ubuntu: + required: true + type: string + default: '22.04' + description: Ubuntu version, 22.04 or 24.04 + tag: + required: true + type: string + default: 'intelgpu/ubuntu-22.04-lts2:2523.31' + description: Whether push image to docker hub or not + push_to_hub: + type: boolean + default: false + description: Whether push image to docker hub or not + +permissions: read-all + +defaults: + run: + shell: bash -xe {0} +env: + GH_TOKEN: ${{ github.token }} + DOCKER_REGISTRY_AUTH_TOKEN: ${{ secrets.DOCKER_HUB_TOKEN }} + +jobs: + build-image: + runs-on: ${{ inputs.runner != '' && inputs.runner || 'ubuntu-24.04' }} + if: ${{ github.event.pull_request.draft == false }} + steps: + - name: Cleanup + run: | + if systemctl is-active --quiet docker; then + echo "Docker daemon is running..."; + else + echo "Starting docker deamon..." && sudo sh -c "systemctl start docker"; + fi + docker system prune -af || true + - name: Checkout torch-xpu-ops + uses: actions/checkout@v4 + - name: Build image + run: | + cd .ci/docker + docker build . -t ${{ inputs.tag != '' && inputs.tag || 'intelgpu/ubuntu-22.04-lts2:latest' }} \ + --build-arg UBUNTU_VERSION=${{ inputs.ubuntu != '' && inputs.ubuntu || '22.04' }} \ + --build-arg XPU_DRIVER_TYPE=${{ inputs.driver != '' && inputs.driver || 'lts2' }} \ + -f ubuntu/Dockerfile + docker images + - name: Push image + if: ${{ inputs.push_to_hub }} + run: | + echo "$DOCKER_REGISTRY_AUTH_TOKEN" | docker login -u intelgpu --password-stdin + docker push ${{ inputs.tag != '' && inputs.tag || 'intelgpu/ubuntu-22.04-lts2:latest' }} diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml index 67e208283..e975039db 100644 --- a/.github/workflows/_linux_transformers.yml +++ b/.github/workflows/_linux_transformers.yml @@ -145,7 +145,7 @@ jobs: needs: prepare runs-on: ${{ needs.prepare.outputs.runner_id }} container: - image: mengfeili/intel-pvc-driver:1146-1136 + image: intelgpu/ubuntu-22.04-lts2:2523.31 volumes: - ${{ github.workspace }}:${{ github.workspace }} options: --device=/dev/mem --device=/dev/dri --group-add video --group-add ${{ needs.prepare.outputs.render_id }} diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index e895a959b..a2d659f11 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -61,7 +61,7 @@ jobs: if: ${{ ! contains(inputs.ut, 'distributed') }} runs-on: ${{ needs.runner.outputs.runner_id }} container: - image: mengfeili/intel-pvc-driver:1146-1136 + image: intelgpu/ubuntu-22.04-lts2:2523.31 volumes: - ${{ github.workspace }}:${{ github.workspace }} options: --device=/dev/mem --device=/dev/dri --group-add video --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g diff --git a/.github/workflows/bisect_search.yml b/.github/workflows/bisect_search.yml index d45c0a83f..cdb9029a1 100644 --- a/.github/workflows/bisect_search.yml +++ b/.github/workflows/bisect_search.yml @@ -65,7 +65,7 @@ jobs: needs: get_runner runs-on: ${{ needs.get_runner.outputs.runner_id }} container: - image: mengfeili/intel-pvc-driver:1146-1136 + image: intelgpu/ubuntu-22.04-lts2:2523.31 volumes: - ${{ github.workspace }}:${{ github.workspace }} options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g