@@ -160,7 +160,7 @@ jobs:
160160
161161 torch_test_files = [f for f in all_tests+all_tests_models if (not input_test_files_list or f in input_test_files_list) and f not in transformers_test_files and 'mlx' not in f]
162162
163- torch_test_files = [test for test in torch_test_files if re.match(f'{TEST_REGEX}', test)]
163+ torch_test_files = [test for test in torch_test_files if re.match(f'{TEST_REGEX}', test) and 'ipex' not in test and 'xpu' not in test ]
164164 transformers_test_files = [test for test in transformers_test_files if re.match(f'{TEST_REGEX}', test)]
165165
166166 m4_test_files = [f for f in all_tests if ('mlx' in f or 'apple' in f) and (f.strip().removesuffix('.py') in input_test_files_list if input_test_files_list else True)]
@@ -320,204 +320,12 @@ jobs:
320320 if : always()
321321 run : rm -rf ./* .[^.] .??* # pip cache purge && uv cache clean &&
322322
323- legacy :
324- needs :
325- - build
326- - list-test-files
327- - check-vm
328- runs-on : [ self-hosted, xeon5 ]
329- if : always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.transformers-files != '[]'
330- container :
331- image : ${{ needs.check-vm.outputs.ip }}:5000/nvidia/cuda:${{ needs.check-vm.outputs.cuda_version }}-ubuntu22.04
332- volumes :
333- - /home/ci/models:/monster/data/model
334- - /home/ci/models/huggingface:/github/home/.cache/huggingface
335- - /home/ci/models/pyenv:/opt/pyenv
336- strategy :
337- fail-fast : false
338- max-parallel : ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 20 }}
339- matrix :
340- test_script : ${{ fromJSON(needs.list-test-files.outputs.transformers-files) }}
341- steps :
342- - name : Checkout Codes
343- uses : actions/checkout@v5
344- with :
345- repository : ${{ github.event.inputs.repo }}
346- ref : ${{ github.event.inputs.ref }}
347-
348- - name : Fetch PR by number
349- if : ${{ github.event.inputs.pr_number != 0 }}
350- run : |
351- PR_NUMBER=${{ github.event.inputs.pr_number }}
352- echo "pr number $PR_NUMBER"
353- git config --global --add safe.directory $(pwd)
354- git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
355- git checkout pr-${PR_NUMBER}
356-
357- - name : Print Env
358- run : |
359- python_version=${{ env.PYTHON_VERSION }}
360- if [[ "$python_version" != *"."* ]]; then
361- python_version="${python_version/3/3.}"
362- fi
363- test_name=${{ matrix.test_script }}
364- test_name=${test_name//\//_}
365- env_name="cu${{ needs.check-vm.outputs.cuda_version }}_torch${{ env.TORCH_VERSION }}_py${python_version}_test_${test_name}"
366-
367- if [ -d "$(pyenv root)/versions/$env_name" ]; then
368- echo "env exists, skip"
369- pyenv local $env_name
370- pyenv activate $env_name
371- else
372- echo "creating venv..."
373- pyenv virtualenv "$python_version" "$env_name"
374- pyenv local $env_name
375- pyenv activate $env_name
376- bash -c "$(curl -L http://${RUNNER}/scripts/env/init_compiler_no_env.sh)" @ ${{ needs.check-vm.outputs.cuda_version }} ${{ env.TORCH_VERSION }} $python_version
377- fi
378-
379- pyenv local $env_name
380- pyenv activate $env_name
381-
382- echo "== pyenv =="
383- pyenv versions
384- echo "== python =="
385- python --version
386- echo "== nvcc =="
387- nvcc --version
388- echo "== torch =="
389- pip show torch || true
390- echo "== pip list =="
391- pip list
392-
393- # - name: Install requirements
394- # run: |
395- # bash -c "$(curl -L http://${RUNNER}/scripts/env/init_compiler_no_env.sh)" @ ${{ needs.check-vm.outputs.cuda_version }} ${{ env.TORCH_VERSION }} $python_version
396-
397- - name : Download source from local
398- continue-on-error : true
399- run : |
400- curl -s -O http://$RUNNER/whl/${{ env.repo }}/${{ github.run_id }}/gptqmodel_source.tar.gz
401- ls -ahl .
402- sha256=$(sha256sum $file_name)
403- echo "sha256=$sha256"
404- echo "SOURCE_DOWNLOADED=1" >> $GITHUB_ENV
405-
406- # - name: Download source from github
407- # if: env.SOURCE_DOWNLOADED == '' && !cancelled()
408- # uses: actions/download-artifact@v5
409- # with:
410- # name: source
411- # path: dist
412- # run-id: ${{ github.run_id }}
413-
414- # - name: Uncompress source
415- # continue-on-error: true
416- # run: |
417- # find . -mindepth 1 ! -name "gptqmodel_source.tar.gz" -exec rm -rf {} +
418- # ls -ahl .
419- # tar -zxf gptqmodel_source.tar.gz
420-
421- - name : Download wheel from local
422- continue-on-error : true
423- run : |
424- file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://$RUNNER/gpu/whl/download")
425-
426- echo "file_name=$file_name"
427-
428- if echo "$file_name" | grep -q "gptqmodel"; then
429- mkdir dist || true
430- cd dist
431- curl -s -O http://$RUNNER/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
432- ls -ahl .
433- sha256=$(sha256sum $file_name)
434- echo "sha256=$sha256"
435- echo "WHL_DOWNLOADED=1" >> $GITHUB_ENV
436- fi
437-
438- - name : Download artifact from github
439- if : env.WHL_DOWNLOADED == '' && !cancelled()
440- uses : actions/download-artifact@v5
441- with :
442- name : whl
443- path : dist
444- run-id : ${{ needs.check-vm.outputs.run_id }}
445-
446- - name : Install wheel
447- run : |
448- pip install uv -U
449- uv pip install -r requirements.txt
450- echo "===== install optimum bitblas parameterized uvicorn ====="
451- uv pip install optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
452- echo "===== install dist/whl ====="
453- uv pip install dist/*.whl -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
454- echo "===== init test env ====="
455- echo "===== install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 ====="
456- uv pip install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 -U -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
457- if [ "${{ matrix.test_script }}" == "models/test_xverse" ]; then
458- echo "===== install tokenizers==0.15.2 ====="
459- uv pip install tokenizers==0.15.2 -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
460- fi
461- if [ "${{ matrix.test_script }}" == "test_quant_formats" ] || [ "${{ matrix.test_script }}" == "test_perplexity" ]; then
462- echo "===== install auto_round ====="
463- uv pip install auto_round -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
464- fi
465-
466- echo "== pip list =="
467- pip list
468-
469- - name : Find suitable GPU
470- if : ${{ !contains(matrix.test_script, 'ipex') && !cancelled() }}
471- run : |
472- timestamp=$(date +%s%3N)
473- gpu_id=-1
474-
475- url="http://$XEON5/gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }}"
476- echo "$url"
477- while [ "$gpu_id" -lt 0 ]; do
478- gpu_id=$(curl -s "$url")
479-
480- if [ "$gpu_id" -lt 0 ]; then
481- echo "http://$XEON5/gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }} returned $gpu_id"
482- echo "No available GPU, waiting 5 seconds..."
483- curl http://$XEON5/gpu/status2
484- sleep 5
485- else
486- echo "Allocated GPU ID: $gpu_id"
487- fi
488- done
489- if [[ ! "$gpu_id" =~ ^[0-9]+$ ]]; then
490- echo "gpu_id: $gpu_id is not a number"
491- fi
492- echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
493- echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
494- echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"
495- curl http://$XEON5/gpu/status2
496-
497- - name : Run tests
498- if : ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
499- run : |
500- start_time=$(date +%s)
501- pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }
502- execution_time=$(( $(date +%s) - start_time ))
503- echo "$((execution_time / 60))m $((execution_time % 60))s"
504- curl "http://$RUNNER/gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&name=${{ matrix.test_script }}"
505-
506- - name : Release GPU
507- if : always() && !contains(matrix.test_script, 'ipex')
508- run : curl -X GET "http://$XEON5/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"
509-
510- - name : Clean cache
511- if : always()
512- run : rm -rf ./* .[^.] .??* # pip cache purge && uv cache clean &&
513-
514323 torch :
515324 needs :
516325 - build
517326 - list-test-files
518327 - check-vm
519328 runs-on : [ self-hosted, xeon5 ]
520- if : always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-files != '[]'
521329 container :
522330 image : ${{ needs.check-vm.outputs.ip }}:5000/nvidia/cuda:${{ needs.check-vm.outputs.cuda_version }}-ubuntu22.04
523331 options : --device /dev/dri --ipc=host --runtime=nvidia --gpus all
@@ -531,6 +339,7 @@ jobs:
531339 max-parallel : ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 20 }}
532340 matrix :
533341 test_script : ${{ fromJSON(needs.list-test-files.outputs.torch-files) }}
342+ if : always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-files != '[]'
534343 steps :
535344 - name : Checkout Codes
536345 uses : actions/checkout@v5
@@ -712,12 +521,6 @@ jobs:
712521 uv pip install numpy==1.26.3
713522 fi
714523
715-
716- if [[ "${{ matrix.test_script }}" != "models/test_qwen2_vl" ]]; then
717- echo "===== uninstall torchvision ====="
718- uv pip uninstall torchvision || true
719- fi
720-
721524 echo "===== install -r requirements.txt ====="
722525 uv pip install -r requirements.txt
723526
@@ -790,7 +593,6 @@ jobs:
790593 container :
791594 image : modelcloud/gptqmodel:alpine-ci-v1
792595 needs :
793- - legacy
794596 - torch
795597 steps :
796598 - name : Print statistics
0 commit comments