Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions .github/actions/get-runner/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ outputs:
pytest_extra_args:
value: ${{ steps.runner.outputs.pytest_extra_args }}

permissions: read-all

runs:
using: composite
steps:
Expand Down Expand Up @@ -59,10 +57,17 @@ runs:
if(gpu==1 && $0~/Platform/){gpu=0}; if(gpu==1){print $0}; if($0~/Platform.*Graphics/){gpu=1}
}' |wc -l)"
cpus_per_xpu="$(echo |awk -v c="${cpu_num}" -v x="${xpu_num}" '{printf c/x}')"
pytest_extra_args="$(echo |awk -v x="${xpu_num}" -v cx="${cpus_per_xpu}" '{
pytest_extra_args="$(echo |awk -v x="${xpu_num}" -v z="${ZE_AFFINITY_MASK}" -v cx="${cpus_per_xpu}" '{
if (x > 0) {
split(z, xpu_list, ",");
for (i=0;i<x;i++) {
printf(" --tx popen//env:ZE_AFFINITY_MASK=%d//env:OMP_NUM_THREADS=%d//python=\"numactl -l -C %d-%d python\"", i, cx, i*cx, (i+1)*cx-1);
if (z != "") {
ze = xpu_list[i+1];
} else {
ze = i;
}
printf(" --tx popen//env:ZE_AFFINITY_MASK=%d//env:OMP_NUM_THREADS=%d//python=\"numactl -l -C %d-%d python\"",
ze, cx, i*cx, (i+1)*cx-1);
}
}else {
printf(" -n 1 ");
Expand All @@ -82,8 +87,5 @@ runs:
cd ${RUNNER_WORKSPACE}/..
if [ "${PWD}" != "/" ];then
ls -al
sudo chmod 777 -R torch-xpu-ops _temp _actions _tool || true
# mount HOME dir to use caches to save time
rm -rf _temp && mkdir _temp
ln -sf ${HOME} _temp/_github_home
sudo chmod 777 -R . || true
fi
18 changes: 14 additions & 4 deletions .github/actions/linux-e2etest/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,14 @@ inputs:
runs:
using: composite
steps:
- name: Check Python
shell: bash -xe {0}
run: |
which python && python -V
which pip && pip list
- name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
shell: bash -x {0}
run: |
pip list |grep -E 'intel|torch'
cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
cd ./pytorch
# check param
Expand Down Expand Up @@ -62,10 +66,16 @@ runs:
contains "accuracy,performance" $scenario
$contains_status
if [ "${MODEL_ONLY_NAME}" == "" ];then
for xpu_id in $(seq 0 $[ ${xpu_num} - 1 ])
for var in $(seq 0 $[ ${xpu_num} - 1 ])
do
cpu_list="$(echo "${cpus_per_xpu} ${xpu_id}" |awk '{printf("%d-%d", $1*$2, $1*$2+$1-1)}')"
numactl --localalloc --physcpubind=${cpu_list} bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${xpu_id} &
cpu_list="$(echo "${cpus_per_xpu} ${var}" |awk '{printf("%d-%d", $1*$2, $1*$2+$1-1)}')"
if [ "${ZE_AFFINITY_MASK}" != "" ];then
xpu_list=($(echo ${ZE_AFFINITY_MASK} |sed 's/,/ /g'))
xpu_id=${xpu_list[$[ ${var} + 1 ]]}
else
xpu_id=${var}
fi
numactl --localalloc --physcpubind=${cpu_list} bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${var} &
done
else
for test_model in $(echo ${MODEL_ONLY_NAME} |sed 's/,/ /g')
Expand Down
33 changes: 17 additions & 16 deletions .github/actions/linux-testenv/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ inputs:
default: 'None'
description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma

permissions: read-all

runs:
using: composite
steps:
Expand Down Expand Up @@ -113,18 +111,23 @@ runs:
- name: Install E2E Requirements
shell: bash -xe {0}
run: |
cd pytorch
TIMM_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/timm.txt 2> /dev/null || echo 'v1.0.14')"
TORCHBENCH_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/torchbench.txt 2> /dev/null || echo 'e03a63be')"
# common
pip install pandas psutil scipy pyyaml
if [[ "${{ inputs.suite }}" == *"timm_models"* ]];then
pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@${TIMM_COMMIT_ID}
cd pytorch
if [[ "${{ inputs.suite }}" == *"huggingface"* ]];then
pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt || pip install transformers==4.54.0 soxr==0.5.0
TRANSFORMERS_VERSION_ID="$(python -c 'import os; os.chdir("/tmp"); import transformers; print(transformers.__version__)')"
elif [[ "${{ inputs.suite }}" == *"timm_models"* ]];then
TIMM_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/timm.txt 2> /dev/null || echo 'v1.0.14')"
pip install git+https://github.com/huggingface/pytorch-image-models@${TIMM_COMMIT_ID}
elif [[ "${{ inputs.suite }}" == *"torchbench"* ]];then
TORCHBENCH_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/torchbench.txt 2> /dev/null || echo 'e03a63be')"
cd ../
rm -rf ./e2e-benchmark
git clone https://github.com/pytorch/benchmark e2e-benchmark
cd e2e-benchmark
rm -rf ./benchmark
git clone https://github.com/pytorch/benchmark benchmark
cd benchmark
git checkout ${TORCHBENCH_COMMIT_ID}
sed -i 's/^ *pynvml.*//' requirements.txt
pip install -r requirements.txt
if [ "${{ github.event_name }}" == "pull_request" ];then
while read line
Expand All @@ -134,10 +137,11 @@ runs:
else
python install.py --continue_on_fail
fi
pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@${TIMM_COMMIT_ID}
# for dlrm
pip install pyre-extensions
curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install --no-deps
# for soft_actor_critic, temp fix
pip install git+https://github.com/nocoding03/gym@fix-np
cd ../pytorch
elif [[ "${{ inputs.suite }}" == *"pt2e"* ]];then
cd ../
Expand All @@ -146,20 +150,17 @@ runs:
git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark pt2e-performance
cd pt2e-performance
TORCHBENCH_COMMIT_ID="$(git rev-parse --short HEAD)"
sed -i 's/^ *pynvml.*//' requirements.txt
pip install -r requirements.txt
python install.py --continue_on_fail
pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@${TIMM_COMMIT_ID}
# for dlrm
pip install pyre-extensions
curl -fsSL https://raw.githubusercontent.com/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install --no-deps
cd ../pytorch
else
pip install -r ./.ci/docker/requirements-ci.txt
pip install -U pytest-timeout pytest-xdist
pip install -U pytest pytest-timeout pytest-xdist
fi
# transformers
pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt || pip install transformers==4.54.0 soxr==0.5.0
TRANSFORMERS_VERSION_ID="$(python -c 'import os; os.chdir("/tmp"); import transformers; print(transformers.__version__)')"
# install the corresponding torchao
pip uninstall -y torchao
if [ $(echo "${{ inputs.pytorch }}" |grep -w "release_wheel" -c) -ne 0 ];then
Expand Down
58 changes: 51 additions & 7 deletions .github/actions/linux-uttest/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ inputs:
type: string
description: Which ut to launch

permissions: read-all

runs:
using: composite
steps:
- name: Check Python
shell: bash -xe {0}
run: |
which python && python -V
which pip && pip list
- name: op_regression
shell: timeout 3600 bash -xe {0}
if: ${{ inputs.ut_name == 'op_regression' || inputs.ut_name == 'basic' }}
Expand All @@ -20,6 +23,8 @@ runs:
cd pytorch/third_party/torch-xpu-ops/test/regressions
pytest --junit-xml=${{ github.workspace }}/ut_log/op_regression.xml \
2> ${log_dir}/op_regression_test_error.log |tee ${log_dir}/op_regression_test.log
echo -e "File Path: cd pytorch/third_party/torch-xpu-ops/test/regressions" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_regression.log
echo -e "Reproduce Command: pytest -sv failed_case" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_regression.log
- name: op_regression_dev1
shell: timeout 300 bash -xe {0}
if: ${{ inputs.ut_name == 'op_regression_dev1' || inputs.ut_name == 'basic' }}
Expand All @@ -30,6 +35,8 @@ runs:
timeout 180 pytest test_operation_on_device_1.py \
--junit-xml=${{ github.workspace }}/ut_log/op_regression_dev1.xml \
2> ${log_dir}/op_regression_dev1_test_error.log |tee ${log_dir}/op_regression_dev1_test.log
echo -e "File Path: cd pytorch/third_party/torch-xpu-ops/test/regressions" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_regression_dev1.log
echo -e "Reproduce Command: pytest -sv failed_case" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_regression_dev1.log
- name: op_transformers
shell: timeout 3600 bash -xe {0}
if: ${{ inputs.ut_name == 'op_transformers' || inputs.ut_name == 'basic' }}
Expand All @@ -41,6 +48,8 @@ runs:
pytest test/test_transformers.py -k xpu \
--junit-xml=${{ github.workspace }}/ut_log/op_transformers.xml \
2> ${log_dir}/op_transformers_test_error.log |tee ${log_dir}/op_transformers_test.log
echo -e "File Path: cd pytorch" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_transformers.log
echo -e "Reproduce Command: pytest -sv test/failed_case -k xpu" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_transformers.log
- name: op_extended
shell: timeout 3600 bash -xe {0}
if: ${{ inputs.ut_name == 'op_extended' || inputs.ut_name == 'basic' }}
Expand All @@ -53,6 +62,8 @@ runs:
2> ${log_dir}/op_extended_test_error.log |tee ${log_dir}/op_extended_test.log
ls -al
cp *.xml ${{ github.workspace }}/ut_log
echo -e "File Path: cd pytorch/third_party/torch-xpu-ops/test/xpu/extended" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_extended.log
echo -e "Reproduce Command: pytest -sv failed_case" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_extended.log
- name: op_ut
shell: timeout 18000 bash -xe {0}
if: ${{ inputs.ut_name == 'op_ut' }}
Expand Down Expand Up @@ -89,6 +100,8 @@ runs:
tee ${{ github.workspace }}/ut_log/op_ut/op_ut_with_only_test.log
ls -al
cp *.xml ${{ github.workspace }}/ut_log
echo -e "File Path: cd pytorch/third_party/torch-xpu-ops/test/xpu" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_ut.log
echo -e "Reproduce Command: pytest -sv failed_case" | tee -a ${{ github.workspace }}/ut_log/reproduce_op_ut.log
- name: torch_xpu
shell: timeout 3600 bash -xe {0}
if: ${{ inputs.ut_name == 'torch_xpu' }}
Expand Down Expand Up @@ -129,7 +142,6 @@ runs:
python test/profiling/llama.py | \
tee ${{ github.workspace }}/ut_log/xpu_profiling/llama.log
python .github/scripts/llama_summary.py -i ${{ github.workspace }}/ut_log/xpu_profiling/llama.log -o ${{ github.workspace }}/ut_log/xpu_profiling/llama_summary.csv
bash .github/scripts/check_baseline.sh .github/scripts/llama_baseline.csv ${{ github.workspace }}/ut_log/xpu_profiling/llama_summary.csv

# All xpu ut under test/profiler
cd ../../test/profiler
Expand All @@ -147,10 +159,6 @@ runs:
if: ${{ inputs.ut_name == 'xpu_distributed' }}
run: |
xpu-smi topology -m
sudo rm -rf ${{ github.workspace }}/ptrace_scope.bk
sudo cp /proc/sys/kernel/yama/ptrace_scope ${{ github.workspace }}/ptrace_scope.bk
cat ${{ github.workspace }}/ptrace_scope.bk
echo "0" |sudo tee /proc/sys/kernel/yama/ptrace_scope
mkdir -p ut_log/xpu_distributed
cd pytorch/third_party/torch-xpu-ops/test/xpu
XCCL_ENABLE=$(python -c "import torch;print(torch.distributed.is_xccl_available())")
Expand All @@ -161,3 +169,39 @@ runs:
python run_distributed.py \
2> ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log

# Summary
- name: UT Test Results Summary
shell: timeout 180 bash -xe {0}
run: |
pip install junitparser
python ./.github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true
# Check the failure logs
if ls ${{ github.workspace }}/failures*.log 1> /dev/null 2>&1; then
echo -e "Exist Failure logs"
echo "Found Failure logs as below: "
for file in ${{ github.workspace }}/failures*.log; do
echo " - $file"
cp "$file" ${{ github.workspace }}/ut_log
done
echo -e "Failure logs Copied"
else
echo -e "No Failure logs"
fi
# Copied the passed logs
if ls passed*.log 1> /dev/null 2>&1; then
cp passed*.log ${{ github.workspace }}/ut_log
echo -e "Passed logs Copied"
else
echo -e "No Passed logs"
fi
# Copied the Summary logs
if ls category*.log 1> /dev/null 2>&1; then
cp category*.log ${{ github.workspace }}/ut_log
echo -e "Category logs Copied"
else
echo -e "No Category logs"
fi
if [ -e ut_failure_list.csv ];then
cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv || true
fi
17 changes: 10 additions & 7 deletions .github/actions/pt2e/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,20 @@ inputs:
runs:
using: composite
steps:
- name: Check Python
shell: bash -xe {0}
run: |
which python && python -V
which pip && pip list
- name: Prepare dataset
shell: bash -xe {0}
run: |
# dataset
if [ ! -d ${HOME}/datasets/imagenet ];then
rm -rf ${HOME}/datasets/imagenet
mkdir -p ${HOME}/datasets/imagenet
cd ${HOME}/datasets/imagenet
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
tar -xf ILSVRC2012_img_val.tar
if [ ! -d ${dataset_dir} ];then
rm -rf ${dataset_dir} && mkdir -p ${dataset_dir} && cd ${dataset_dir}
wget -O valprep.sh https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
wget -q https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
tar -xf ILSVRC2012_img_val.tar
bash valprep.sh
fi
- name: PT2E Test (${{ inputs.dt }} ${{ inputs.scenario }})
Expand All @@ -41,7 +44,7 @@ runs:
echo "Mode,Model,Dtype,Result" |tee ${pt2e_logs_dir}/summary.csv
if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then
models="alexnet,mnasnet1_0,mobilenet_v2,mobilenet_v3_large,resnet152,resnet18,resnet50,resnext50_32x4d,shufflenet_v2_x1_0,squeezenet1_1,vgg16"
cmd_line=" python pt2e-accuracy/scripts/modelbench/quant/inductor_quant_acc.py --device xpu --dataset_dir ${HOME}/datasets/imagenet "
cmd_line=" python pt2e-accuracy/scripts/modelbench/quant/inductor_quant_acc.py --device xpu --dataset_dir ${dataset_dir} "
for model_name in $(echo $models |sed 's/,/ /g')
do
if [[ "${{ inputs.dt }}" == *"float32"* ]];then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ sebotnet33ts_256,pass,pass,pass,pass,pass
selecsls42b,pass,pass,pass,pass,pass
spnasnet_100,pass,pass,pass,pass,pass
# https://github.com/intel/torch-xpu-ops/issues/1768
swin_base_patch4_window7_224,pass,pass,pass,pass,pass
swin_base_patch4_window7_224,pass,fail_accuracy,fail_accuracy,pass,pass
swsl_resnext101_32x16d,pass,pass,pass,pass,pass
tf_efficientnet_b0,pass,pass,pass,pass,pass
tf_mixnet_l,pass,pass,pass,pass,pass
Expand Down
Loading