Skip to content

Commit d9bd9f1

Browse files
[CI] remove legacy tests & skip intel tests & disable flash_attn for some models (#1722)
* [CI] remove legacy tests & skip intel tests * [CI] move if down * [CI] filter in py * [CI] remove legacy dep * [CI] skip tests if ipex was not available * fix models don't support flash_attn
1 parent 5ba8809 commit d9bd9f1

File tree

13 files changed

+23
-207
lines changed

13 files changed

+23
-207
lines changed

.github/workflows/unit_tests.yml

Lines changed: 2 additions & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ jobs:
160160
161161
torch_test_files = [f for f in all_tests+all_tests_models if (not input_test_files_list or f in input_test_files_list) and f not in transformers_test_files and 'mlx' not in f]
162162
163-
torch_test_files = [test for test in torch_test_files if re.match(f'{TEST_REGEX}', test)]
163+
torch_test_files = [test for test in torch_test_files if re.match(f'{TEST_REGEX}', test) and 'ipex' not in test and 'xpu' not in test]
164164
transformers_test_files = [test for test in transformers_test_files if re.match(f'{TEST_REGEX}', test)]
165165
166166
m4_test_files = [f for f in all_tests if ('mlx' in f or 'apple' in f) and (f.strip().removesuffix('.py') in input_test_files_list if input_test_files_list else True)]
@@ -320,204 +320,12 @@ jobs:
320320
if: always()
321321
run: rm -rf ./* .[^.] .??* # pip cache purge && uv cache clean &&
322322

323-
legacy:
324-
needs:
325-
- build
326-
- list-test-files
327-
- check-vm
328-
runs-on: [ self-hosted, xeon5 ]
329-
if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.transformers-files != '[]'
330-
container:
331-
image: ${{ needs.check-vm.outputs.ip }}:5000/nvidia/cuda:${{ needs.check-vm.outputs.cuda_version }}-ubuntu22.04
332-
volumes:
333-
- /home/ci/models:/monster/data/model
334-
- /home/ci/models/huggingface:/github/home/.cache/huggingface
335-
- /home/ci/models/pyenv:/opt/pyenv
336-
strategy:
337-
fail-fast: false
338-
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 20 }}
339-
matrix:
340-
test_script: ${{ fromJSON(needs.list-test-files.outputs.transformers-files) }}
341-
steps:
342-
- name: Checkout Codes
343-
uses: actions/checkout@v5
344-
with:
345-
repository: ${{ github.event.inputs.repo }}
346-
ref: ${{ github.event.inputs.ref }}
347-
348-
- name: Fetch PR by number
349-
if: ${{ github.event.inputs.pr_number != 0 }}
350-
run: |
351-
PR_NUMBER=${{ github.event.inputs.pr_number }}
352-
echo "pr number $PR_NUMBER"
353-
git config --global --add safe.directory $(pwd)
354-
git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
355-
git checkout pr-${PR_NUMBER}
356-
357-
- name: Print Env
358-
run: |
359-
python_version=${{ env.PYTHON_VERSION }}
360-
if [[ "$python_version" != *"."* ]]; then
361-
python_version="${python_version/3/3.}"
362-
fi
363-
test_name=${{ matrix.test_script }}
364-
test_name=${test_name//\//_}
365-
env_name="cu${{ needs.check-vm.outputs.cuda_version }}_torch${{ env.TORCH_VERSION }}_py${python_version}_test_${test_name}"
366-
367-
if [ -d "$(pyenv root)/versions/$env_name" ]; then
368-
echo "env exists, skip"
369-
pyenv local $env_name
370-
pyenv activate $env_name
371-
else
372-
echo "creating venv..."
373-
pyenv virtualenv "$python_version" "$env_name"
374-
pyenv local $env_name
375-
pyenv activate $env_name
376-
bash -c "$(curl -L http://${RUNNER}/scripts/env/init_compiler_no_env.sh)" @ ${{ needs.check-vm.outputs.cuda_version }} ${{ env.TORCH_VERSION }} $python_version
377-
fi
378-
379-
pyenv local $env_name
380-
pyenv activate $env_name
381-
382-
echo "== pyenv =="
383-
pyenv versions
384-
echo "== python =="
385-
python --version
386-
echo "== nvcc =="
387-
nvcc --version
388-
echo "== torch =="
389-
pip show torch || true
390-
echo "== pip list =="
391-
pip list
392-
393-
# - name: Install requirements
394-
# run: |
395-
# bash -c "$(curl -L http://${RUNNER}/scripts/env/init_compiler_no_env.sh)" @ ${{ needs.check-vm.outputs.cuda_version }} ${{ env.TORCH_VERSION }} $python_version
396-
397-
- name: Download source from local
398-
continue-on-error: true
399-
run: |
400-
curl -s -O http://$RUNNER/whl/${{ env.repo }}/${{ github.run_id }}/gptqmodel_source.tar.gz
401-
ls -ahl .
402-
sha256=$(sha256sum $file_name)
403-
echo "sha256=$sha256"
404-
echo "SOURCE_DOWNLOADED=1" >> $GITHUB_ENV
405-
406-
# - name: Download source from github
407-
# if: env.SOURCE_DOWNLOADED == '' && !cancelled()
408-
# uses: actions/download-artifact@v5
409-
# with:
410-
# name: source
411-
# path: dist
412-
# run-id: ${{ github.run_id }}
413-
414-
# - name: Uncompress source
415-
# continue-on-error: true
416-
# run: |
417-
# find . -mindepth 1 ! -name "gptqmodel_source.tar.gz" -exec rm -rf {} +
418-
# ls -ahl .
419-
# tar -zxf gptqmodel_source.tar.gz
420-
421-
- name: Download wheel from local
422-
continue-on-error: true
423-
run: |
424-
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://$RUNNER/gpu/whl/download")
425-
426-
echo "file_name=$file_name"
427-
428-
if echo "$file_name" | grep -q "gptqmodel"; then
429-
mkdir dist || true
430-
cd dist
431-
curl -s -O http://$RUNNER/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
432-
ls -ahl .
433-
sha256=$(sha256sum $file_name)
434-
echo "sha256=$sha256"
435-
echo "WHL_DOWNLOADED=1" >> $GITHUB_ENV
436-
fi
437-
438-
- name: Download artifact from github
439-
if: env.WHL_DOWNLOADED == '' && !cancelled()
440-
uses: actions/download-artifact@v5
441-
with:
442-
name: whl
443-
path: dist
444-
run-id: ${{ needs.check-vm.outputs.run_id }}
445-
446-
- name: Install wheel
447-
run: |
448-
pip install uv -U
449-
uv pip install -r requirements.txt
450-
echo "===== install optimum bitblas parameterized uvicorn ====="
451-
uv pip install optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
452-
echo "===== install dist/whl ====="
453-
uv pip install dist/*.whl -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
454-
echo "===== init test env ====="
455-
echo "===== install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 ====="
456-
uv pip install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 -U -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
457-
if [ "${{ matrix.test_script }}" == "models/test_xverse" ]; then
458-
echo "===== install tokenizers==0.15.2 ====="
459-
uv pip install tokenizers==0.15.2 -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
460-
fi
461-
if [ "${{ matrix.test_script }}" == "test_quant_formats" ] || [ "${{ matrix.test_script }}" == "test_perplexity" ]; then
462-
echo "===== install auto_round ====="
463-
uv pip install auto_round -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
464-
fi
465-
466-
echo "== pip list =="
467-
pip list
468-
469-
- name: Find suitable GPU
470-
if: ${{ !contains(matrix.test_script, 'ipex') && !cancelled() }}
471-
run: |
472-
timestamp=$(date +%s%3N)
473-
gpu_id=-1
474-
475-
url="http://$XEON5/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }}"
476-
echo "$url"
477-
while [ "$gpu_id" -lt 0 ]; do
478-
gpu_id=$(curl -s "$url")
479-
480-
if [ "$gpu_id" -lt 0 ]; then
481-
echo "http://$XEON5/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }} returned $gpu_id"
482-
echo "No available GPU, waiting 5 seconds..."
483-
curl http://$XEON5/gpu/status2
484-
sleep 5
485-
else
486-
echo "Allocated GPU ID: $gpu_id"
487-
fi
488-
done
489-
if [[ ! "$gpu_id" =~ ^[0-9]+$ ]]; then
490-
echo "gpu_id: $gpu_id is not a number"
491-
fi
492-
echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
493-
echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
494-
echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"
495-
curl http://$XEON5/gpu/status2
496-
497-
- name: Run tests
498-
if: ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
499-
run: |
500-
start_time=$(date +%s)
501-
pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }
502-
execution_time=$(( $(date +%s) - start_time ))
503-
echo "$((execution_time / 60))m $((execution_time % 60))s"
504-
curl "http://$RUNNER/gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&name=${{ matrix.test_script }}"
505-
506-
- name: Release GPU
507-
if: always() && !contains(matrix.test_script, 'ipex')
508-
run: curl -X GET "http://$XEON5/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"
509-
510-
- name: Clean cache
511-
if: always()
512-
run: rm -rf ./* .[^.] .??* #pip cache purge && uv cache clean &&
513-
514323
torch:
515324
needs:
516325
- build
517326
- list-test-files
518327
- check-vm
519328
runs-on: [ self-hosted, xeon5 ]
520-
if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-files != '[]'
521329
container:
522330
image: ${{ needs.check-vm.outputs.ip }}:5000/nvidia/cuda:${{ needs.check-vm.outputs.cuda_version }}-ubuntu22.04
523331
options: --device /dev/dri --ipc=host --runtime=nvidia --gpus all
@@ -531,6 +339,7 @@ jobs:
531339
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 20 }}
532340
matrix:
533341
test_script: ${{ fromJSON(needs.list-test-files.outputs.torch-files) }}
342+
if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-files != '[]'
534343
steps:
535344
- name: Checkout Codes
536345
uses: actions/checkout@v5
@@ -712,12 +521,6 @@ jobs:
712521
uv pip install numpy==1.26.3
713522
fi
714523
715-
716-
if [[ "${{ matrix.test_script }}" != "models/test_qwen2_vl" ]]; then
717-
echo "===== uninstall torchvision ====="
718-
uv pip uninstall torchvision || true
719-
fi
720-
721524
echo "===== install -r requirements.txt ====="
722525
uv pip install -r requirements.txt
723526
@@ -790,7 +593,6 @@ jobs:
790593
container:
791594
image: modelcloud/gptqmodel:alpine-ci-v1
792595
needs:
793-
- legacy
794596
- torch
795597
steps:
796598
- name: Print statistics

tests/models/test_bloom.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class TestBloom(ModelTest):
2323
NATIVE_ARC_CHALLENGE_ACC = 0.2201
2424
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.2440
2525
TORCH_DTYPE = torch.float16
26+
USE_FLASH_ATTN = False
2627

2728
def test_bloom(self):
2829
self.quant_lm_eval()

tests/models/test_chatglm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class TestChatGlm(ModelTest):
2525
NATIVE_ARC_CHALLENGE_ACC = 0.3319
2626
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3729
2727
TRUST_REMOTE_CODE = True
28+
USE_FLASH_ATTN = False
2829

2930
def test_chatglm(self):
3031
self.quant_lm_eval()

tests/models/test_codegen.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class TestCodeGen(ModelTest):
2323
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.2005
2424
TRUST_REMOTE_CODE = True
2525
USE_VLLM = False
26+
USE_FLASH_ATTN = False
2627

2728
def test_codegen(self):
2829
self.quant_lm_eval()

tests/models/test_ernie4_5.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class TestErnie4_5(ModelTest):
2323
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3183
2424
TRUST_REMOTE_CODE = True
2525
EVAL_BATCH_SIZE = 6
26+
USE_FLASH_ATTN = False
2627

2728
def test_exaone(self):
2829
self.quant_lm_eval()

tests/models/test_longllama.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class TestLongLlama(ModelTest):
2424
TRUST_REMOTE_CODE = True
2525
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.5
2626
USE_VLLM = False
27+
USE_FLASH_ATTN = False
2728

2829
def test_longllama(self):
2930
self.quant_lm_eval()

tests/models/test_mpt.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class TestMpt(ModelTest):
2424
APPLY_CHAT_TEMPLATE = False
2525
TRUST_REMOTE_CODE = False
2626
EVAL_BATCH_SIZE = 6
27+
USE_FLASH_ATTN = False
2728

2829
def test_mpt(self):
2930
self.quant_lm_eval()

tests/models/test_ovis_1_6_llama.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class TestOvis1_6_Llama(ModelTest):
2727
TRUST_REMOTE_CODE = True
2828
APPLY_CHAT_TEMPLATE = False
2929
EVAL_BATCH_SIZE = 1
30+
USE_FLASH_ATTN = False
3031

3132
def test_ovis_1_6(self):
3233
model, tokenizer = self.quantModel(self.NATIVE_MODEL_ID, trust_remote_code=self.TRUST_REMOTE_CODE,

tests/models/test_telechat2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class TestTeleChat_2(ModelTest):
2424
TRUST_REMOTE_CODE = True
2525
EVAL_BATCH_SIZE = 6
2626
USE_VLLM = False
27+
USE_FLASH_ATTN = False
2728

2829

2930
def test_telechat2(self):

tests/test_bits.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from gptqmodel.nn_modules.qlinear.bitblas import BitBLASQuantLinear # noqa: E402
3030
from gptqmodel.nn_modules.qlinear.exllama import ExllamaQuantLinear # noqa: E402
3131
from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear # noqa: E402
32-
from gptqmodel.nn_modules.qlinear.ipex import IPEXQuantLinear # noqa: E402
32+
from gptqmodel.nn_modules.qlinear.ipex import IPEXQuantLinear, HAS_IPEX # noqa: E402
3333
from gptqmodel.nn_modules.qlinear.marlin import MarlinQuantLinear # noqa: E402
3434
from gptqmodel.nn_modules.qlinear.torch import TorchQuantLinear # noqa: E402
3535
from gptqmodel.nn_modules.qlinear.tritonv2 import TritonV2QuantLinear # noqa: E402
@@ -49,10 +49,11 @@ class TestBits(unittest.TestCase):
4949
BACKEND.TRITON: TritonV2QuantLinear,
5050
BACKEND.TORCH: TorchQuantLinear,
5151
BACKEND.BITBLAS: BitBLASQuantLinear,
52-
BACKEND.IPEX: IPEXQuantLinear,
5352
BACKEND.MARLIN: MarlinQuantLinear,
5453
BACKEND.EXLLAMA_EORA: ExllamaEoraQuantLinear,
5554
}
55+
if HAS_IPEX:
56+
QLINEAR_DICT[BACKEND.IPEX] = IPEXQuantLinear
5657

5758
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2
5859
QUANT_ARC_MAX_POSITIVE_DELTA_CEIL_PERCENT = 0.2

0 commit comments

Comments
 (0)