diff --git a/tests/full_tests/ci_gsm8k_tests.sh b/tests/full_tests/ci_gsm8k_tests.sh index 87604e502..822592629 100644 --- a/tests/full_tests/ci_gsm8k_tests.sh +++ b/tests/full_tests/ci_gsm8k_tests.sh @@ -265,6 +265,14 @@ run_qwen2_5_vl_unified_attn_test() { echo "✅ Test multimodal-support + unified attention with qwen2.5-vl-7b passed." } +# Multimodal-support with qwen3-vl +run_qwen3_vl_test() { + echo "➡️ Testing Qwen3-VL-32B..." + VLLM_SKIP_WARMUP=true VLLM_CONTIGUOUS_PA=False PT_HPU_LAZY_MODE=0 \ + python -u "${VLLM_GAUDI_PREFIX}/tests/models/language/generation/generation_mm.py" --model-card-path "${VLLM_GAUDI_PREFIX}/tests/full_tests/model_cards/qwen3-vl-32b.yaml" + echo "✅ Test with multimodal-support with qwen3-vl-32b passed." +} + # Spec decode with ngram run_spec_decode_ngram_test() { echo "➡️ Testing Spec-decode with ngram..." diff --git a/tests/full_tests/model_cards/qwen3-vl-32b.yaml b/tests/full_tests/model_cards/qwen3-vl-32b.yaml new file mode 100644 index 000000000..8ba2a767b --- /dev/null +++ b/tests/full_tests/model_cards/qwen3-vl-32b.yaml @@ -0,0 +1,15 @@ +model_name: "Qwen/Qwen3-VL-32B-Instruct-FP8" + +description: > + Qwen3-VL 32B Instruct model (FP8) validated for image + multimodal inference using vLLM built-in test assets. + +test_config: + - modality: image + extra_engine_args: + mm_processor_kwargs: + min_pixels: 784 + max_pixels: 1003520 + input_data_config: + num_prompts: 4 + media_source: default