intel · zhangxiaoli73 · Sep 15, 2025 · Sep 22, 2025 · Sep 4, 2025 · Sep 6, 2025
diff --git a/.github/actions/get-runner/action.yml b/.github/actions/get-runner/action.yml
@@ -16,8 +16,6 @@ outputs:
   pytest_extra_args:
     value: ${{ steps.runner.outputs.pytest_extra_args }}
 
-permissions: read-all
-
 runs:
   using: composite
   steps:
@@ -59,10 +57,17 @@ runs:
           if(gpu==1 && $0~/Platform/){gpu=0}; if(gpu==1){print $0}; if($0~/Platform.*Graphics/){gpu=1}
         }' |wc -l)"
         cpus_per_xpu="$(echo |awk -v c="${cpu_num}" -v x="${xpu_num}" '{printf c/x}')"
-        pytest_extra_args="$(echo |awk -v x="${xpu_num}" -v cx="${cpus_per_xpu}" '{
+        pytest_extra_args="$(echo |awk -v x="${xpu_num}" -v z="${ZE_AFFINITY_MASK}" -v cx="${cpus_per_xpu}" '{
           if (x > 0) {
+            split(z, xpu_list, ",");
             for (i=0;i<x;i++) {
-              printf(" --tx popen//env:ZE_AFFINITY_MASK=%d//env:OMP_NUM_THREADS=%d//python=\"numactl -l -C %d-%d python\"", i, cx, i*cx, (i+1)*cx-1);
+              if (z != "") {
+                  ze = xpu_list[i+1];
+              } else {
+                  ze = i;
+              }
+              printf(" --tx popen//env:ZE_AFFINITY_MASK=%d//env:OMP_NUM_THREADS=%d//python=\"numactl -l -C %d-%d python\"",
+                      ze, cx, i*cx, (i+1)*cx-1);
             }
           }else {
             printf(" -n 1 ");
@@ -82,8 +87,5 @@ runs:
         cd ${RUNNER_WORKSPACE}/..
         if [ "${PWD}" != "/" ];then
           ls -al
-          sudo chmod 777 -R torch-xpu-ops _temp _actions _tool || true
-          # mount HOME dir to use caches to save time
-          rm -rf _temp && mkdir _temp
-          ln -sf ${HOME} _temp/_github_home
+          sudo chmod 777 -R . || true
         fi
diff --git a/.github/actions/linux-e2etest/action.yml b/.github/actions/linux-e2etest/action.yml
@@ -25,10 +25,14 @@ inputs:
 runs:
   using: composite
   steps:
+    - name: Check Python
+      shell: bash -xe {0}
+      run: |
+        which python && python -V
+        which pip && pip list
     - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
       shell: bash -x {0}
       run: |
-        pip list |grep -E 'intel|torch'
         cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
         cd ./pytorch
         # check param
@@ -62,10 +66,16 @@ runs:
                 contains "accuracy,performance" $scenario
                 $contains_status
                 if [ "${MODEL_ONLY_NAME}" == "" ];then
-                  for xpu_id in $(seq 0 $[ ${xpu_num} - 1 ])
+                  for var in $(seq 0 $[ ${xpu_num} - 1 ])
                   do
-                    cpu_list="$(echo "${cpus_per_xpu} ${xpu_id}" |awk '{printf("%d-%d", $1*$2, $1*$2+$1-1)}')"
-                    numactl --localalloc --physcpubind=${cpu_list} bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${xpu_id} &
+                    cpu_list="$(echo "${cpus_per_xpu} ${var}" |awk '{printf("%d-%d", $1*$2, $1*$2+$1-1)}')"
+                    if [ "${ZE_AFFINITY_MASK}" != "" ];then
+                      xpu_list=($(echo ${ZE_AFFINITY_MASK} |sed 's/,/ /g'))
+                      xpu_id=${xpu_list[$[ ${var} + 1 ]]}
+                    else
+                      xpu_id=${var}
+                    fi
+                    numactl --localalloc --physcpubind=${cpu_list} bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${var} &
                   done
                 else
                   for test_model in $(echo ${MODEL_ONLY_NAME} |sed 's/,/ /g')

diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
@@ -18,8 +18,6 @@ inputs:
     default: 'None'
     description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
 
-permissions: read-all
-
 runs:
   using: composite
   steps:
@@ -113,18 +111,23 @@ runs:
     - name: Install E2E Requirements
       shell: bash -xe {0}
       run: |
-        cd pytorch
-        TIMM_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/timm.txt 2> /dev/null || echo 'v1.0.14')"
-        TORCHBENCH_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/torchbench.txt 2> /dev/null || echo 'e03a63be')"
+        # common
         pip install pandas psutil scipy pyyaml
-        if [[ "${{ inputs.suite }}" == *"timm_models"* ]];then
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@${TIMM_COMMIT_ID}
+        cd pytorch
+        if [[ "${{ inputs.suite }}" == *"huggingface"* ]];then
+          pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt || pip install transformers==4.54.0 soxr==0.5.0
+          TRANSFORMERS_VERSION_ID="$(python -c 'import os; os.chdir("/tmp"); import transformers; print(transformers.__version__)')"
+        elif [[ "${{ inputs.suite }}" == *"timm_models"* ]];then
+          TIMM_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/timm.txt 2> /dev/null || echo 'v1.0.14')"
+          pip install git+https://github.com/huggingface/pytorch-image-models@${TIMM_COMMIT_ID}
         elif [[ "${{ inputs.suite }}" == *"torchbench"* ]];then
+          TORCHBENCH_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/torchbench.txt 2> /dev/null || echo 'e03a63be')"
           cd ../
-          rm -rf ./e2e-benchmark
-          git clone https://github.com/pytorch/benchmark e2e-benchmark
-          cd e2e-benchmark
+          rm -rf ./benchmark
+          git clone https://github.com/pytorch/benchmark benchmark
+          cd benchmark
           git checkout ${TORCHBENCH_COMMIT_ID}
+          sed -i 's/^ *pynvml.*//' requirements.txt
           pip install -r requirements.txt
           if [ "${{ github.event_name }}" == "pull_request" ];then
             while read line
@@ -134,10 +137,11 @@ runs:
           else
             python install.py --continue_on_fail
           fi
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@${TIMM_COMMIT_ID}
           # for dlrm
           pip install pyre-extensions
           curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install --no-deps
+          # for soft_actor_critic, temp fix
+          pip install git+https://github.com/nocoding03/gym@fix-np
           cd ../pytorch
         elif [[ "${{ inputs.suite }}" == *"pt2e"* ]];then
           cd ../
@@ -146,20 +150,17 @@ runs:
           git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark pt2e-performance
           cd pt2e-performance
           TORCHBENCH_COMMIT_ID="$(git rev-parse --short  HEAD)"
+          sed -i 's/^ *pynvml.*//' requirements.txt
           pip install -r requirements.txt
           python install.py --continue_on_fail
-          pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@${TIMM_COMMIT_ID}
           # for dlrm
           pip install pyre-extensions
           curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install --no-deps
           cd ../pytorch
         else
           pip install -r ./.ci/docker/requirements-ci.txt
-          pip install -U pytest-timeout pytest-xdist
+          pip install -U pytest pytest-timeout pytest-xdist
         fi
-        # transformers
-        pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt || pip install transformers==4.54.0 soxr==0.5.0
-        TRANSFORMERS_VERSION_ID="$(python -c 'import os; os.chdir("/tmp"); import transformers; print(transformers.__version__)')"
         # install the corresponding torchao
         pip uninstall -y torchao
         if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" -c) -ne 0 ];then

diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
@@ -6,11 +6,14 @@ inputs:
     type: string
     description: Which ut to launch
 
-permissions: read-all
-
 runs:
   using: composite
   steps:
+    - name: Check Python
+      shell: bash -xe {0}
+      run: |
+        which python && python -V
+        which pip && pip list
     - name: op_regression
       shell: timeout 3600 bash -xe {0}
       if: ${{ inputs.ut_name == 'op_regression' || inputs.ut_name == 'basic' }}
@@ -20,6 +23,8 @@ runs:
         cd pytorch/third_party/torch-xpu-ops/test/regressions
         pytest --junit-xml=${{ github.workspace }}/ut_log/op_regression.xml \
           2> ${log_dir}/op_regression_test_error.log |tee ${log_dir}/op_regression_test.log
+        echo -e "File Path: cd pytorch/third_party/torch-xpu-ops/test/regressions" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_regression.log
+        echo -e "Reproduce Command: pytest -sv failed_case" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_regression.log
     - name: op_regression_dev1
       shell: timeout 300 bash -xe {0}
       if: ${{ inputs.ut_name == 'op_regression_dev1' || inputs.ut_name == 'basic' }}
@@ -30,6 +35,8 @@ runs:
         timeout 180 pytest test_operation_on_device_1.py \
           --junit-xml=${{ github.workspace }}/ut_log/op_regression_dev1.xml \
           2> ${log_dir}/op_regression_dev1_test_error.log |tee ${log_dir}/op_regression_dev1_test.log
+        echo -e "File Path: cd pytorch/third_party/torch-xpu-ops/test/regressions" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_regression_dev1.log
+        echo -e "Reproduce Command: pytest -sv failed_case" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_regression_dev1.log
     - name: op_transformers
       shell: timeout 3600 bash -xe {0}
       if: ${{ inputs.ut_name == 'op_transformers' || inputs.ut_name == 'basic' }}
@@ -41,6 +48,8 @@ runs:
         pytest test/test_transformers.py -k xpu \
           --junit-xml=${{ github.workspace }}/ut_log/op_transformers.xml \
           2> ${log_dir}/op_transformers_test_error.log |tee ${log_dir}/op_transformers_test.log
+        echo -e "File Path: cd pytorch" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_transformers.log
+        echo -e "Reproduce Command: pytest -sv test/failed_case -k xpu" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_transformers.log
     - name: op_extended
       shell: timeout 3600 bash -xe {0}
       if: ${{ inputs.ut_name == 'op_extended' || inputs.ut_name == 'basic' }}
@@ -53,6 +62,8 @@ runs:
           2> ${log_dir}/op_extended_test_error.log |tee ${log_dir}/op_extended_test.log
         ls -al
         cp *.xml ${{ github.workspace }}/ut_log
+        echo -e "File Path: cd pytorch/third_party/torch-xpu-ops/test/xpu/extended" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_extended.log
+        echo -e "Reproduce Command: pytest -sv failed_case" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_extended.log
     - name: op_ut
       shell: timeout 18000 bash -xe {0}
       if: ${{ inputs.ut_name == 'op_ut' }}
@@ -89,6 +100,8 @@ runs:
           tee ${{ github.workspace }}/ut_log/op_ut/op_ut_with_only_test.log
         ls -al
         cp *.xml ${{ github.workspace }}/ut_log
+        echo -e "File Path: cd pytorch/third_party/torch-xpu-ops/test/xpu" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_ut.log
+        echo -e "Reproduce Command: pytest -sv failed_case" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_ut.log
     - name: torch_xpu
       shell: timeout 3600 bash -xe {0}
       if: ${{ inputs.ut_name == 'torch_xpu' }}
@@ -129,7 +142,6 @@ runs:
         python test/profiling/llama.py | \
           tee ${{ github.workspace }}/ut_log/xpu_profiling/llama.log
         python .github/scripts/llama_summary.py -i ${{ github.workspace }}/ut_log/xpu_profiling/llama.log -o ${{ github.workspace }}/ut_log/xpu_profiling/llama_summary.csv
-        bash .github/scripts/check_baseline.sh .github/scripts/llama_baseline.csv ${{ github.workspace }}/ut_log/xpu_profiling/llama_summary.csv
 
         # All xpu ut under test/profiler
         cd ../../test/profiler
@@ -147,10 +159,6 @@ runs:
       if: ${{ inputs.ut_name == 'xpu_distributed' }}
       run: |
         xpu-smi topology -m
-        sudo rm -rf ${{ github.workspace }}/ptrace_scope.bk
-        sudo cp /proc/sys/kernel/yama/ptrace_scope ${{ github.workspace }}/ptrace_scope.bk
-        cat ${{ github.workspace }}/ptrace_scope.bk
-        echo "0" |sudo tee /proc/sys/kernel/yama/ptrace_scope
         mkdir -p ut_log/xpu_distributed
         cd pytorch/third_party/torch-xpu-ops/test/xpu
         XCCL_ENABLE=$(python -c "import torch;print(torch.distributed.is_xccl_available())")
@@ -161,3 +169,39 @@ runs:
         python run_distributed.py \
           2> ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
           tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log
+
+    # Summary
+    - name: UT Test Results Summary
+      shell: timeout 180 bash -xe {0}
+      run: |
+        pip install junitparser
+        python ./.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
+        # Check the failure logs
+        if ls ${{ github.workspace }}/failures*.log 1> /dev/null 2>&1; then
+          echo -e "Exist Failure logs"
+          echo "Found Failure logs as below: "
+          for file in ${{ github.workspace }}/failures*.log; do
+            echo "  - $file"
+            cp "$file" ${{ github.workspace }}/ut_log
+          done
+          echo -e "Failure logs Copied"
+        else
+          echo -e "No Failure logs"
+        fi
+        # Copied the passed logs
+        if ls passed*.log 1> /dev/null 2>&1; then
+          cp passed*.log ${{ github.workspace }}/ut_log
+          echo -e "Passed logs Copied"
+        else
+          echo -e "No Passed logs"
+        fi
+        # Copied the Summary logs
+        if ls category*.log 1> /dev/null 2>&1; then
+          cp category*.log ${{ github.workspace }}/ut_log
+          echo -e "Category logs Copied"
+        else
+          echo -e "No Category logs"
+        fi
+        if [ -e ut_failure_list.csv ];then
+            cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv || true
+        fi
diff --git a/.github/actions/pt2e/action.yml b/.github/actions/pt2e/action.yml
@@ -20,17 +20,20 @@ inputs:
 runs:
   using: composite
   steps:
+    - name: Check Python
+      shell: bash -xe {0}
+      run: |
+        which python && python -V
+        which pip && pip list
     - name: Prepare dataset
       shell: bash -xe {0}
       run: |
         # dataset
-        if [ ! -d ${HOME}/datasets/imagenet ];then
-          rm -rf ${HOME}/datasets/imagenet
-          mkdir -p ${HOME}/datasets/imagenet
-          cd ${HOME}/datasets/imagenet
-          wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
-          tar -xf ILSVRC2012_img_val.tar
+        if [ ! -d ${dataset_dir} ];then
+          rm -rf ${dataset_dir} && mkdir -p ${dataset_dir} && cd ${dataset_dir}
           wget -O valprep.sh https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
+          wget -q https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
+          tar -xf ILSVRC2012_img_val.tar
           bash valprep.sh
         fi
     - name: PT2E Test (${{ inputs.dt }}  ${{ inputs.scenario }})
@@ -41,7 +44,7 @@ runs:
         echo "Mode,Model,Dtype,Result" |tee ${pt2e_logs_dir}/summary.csv
         if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then
           models="alexnet,mnasnet1_0,mobilenet_v2,mobilenet_v3_large,resnet152,resnet18,resnet50,resnext50_32x4d,shufflenet_v2_x1_0,squeezenet1_1,vgg16"
-          cmd_line=" python pt2e-accuracy/scripts/modelbench/quant/inductor_quant_acc.py --device xpu --dataset_dir ${HOME}/datasets/imagenet "
+          cmd_line=" python pt2e-accuracy/scripts/modelbench/quant/inductor_quant_acc.py --device xpu --dataset_dir ${dataset_dir} "
           for model_name in $(echo $models |sed 's/,/ /g')
           do
             if [[ "${{ inputs.dt }}" == *"float32"* ]];then

diff --git a/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv b/.github/ci_expected_accuracy/rolling/inductor_timm_models_training.csv
@@ -52,7 +52,7 @@ sebotnet33ts_256,pass,pass,pass,pass,pass
 selecsls42b,pass,pass,pass,pass,pass
 spnasnet_100,pass,pass,pass,pass,pass
 # https://github.com/intel/torch-xpu-ops/issues/1768
-swin_base_patch4_window7_224,pass,pass,pass,pass,pass
+swin_base_patch4_window7_224,pass,fail_accuracy,fail_accuracy,pass,pass
 swsl_resnext101_32x16d,pass,pass,pass,pass,pass
 tf_efficientnet_b0,pass,pass,pass,pass,pass
 tf_mixnet_l,pass,pass,pass,pass,pass