diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0a91cd2c..aabeb8aa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -119,6 +119,8 @@ jobs: uv.lock tests/**/*.py vllm_spyre/**/*.py + vllm_spyre/config/known_model_configs.json + vllm_spyre/config/supported_configs.yaml - name: "Install PyTorch 2.7.1" if: steps.changed-src-files.outputs.any_changed == 'true' @@ -157,6 +159,7 @@ jobs: - name: "Standardize HF model names for caching" id: standardize-names + if: steps.changed-src-files.outputs.any_changed == 'true' run: | # replace '/' characters in HF_MODEL with '--' for GHA cache keys and # in model file names in local HF hub cache diff --git a/docs/user_guide/supported_models.md b/docs/user_guide/supported_models.md index 3725a2ae..13bdaaa9 100644 --- a/docs/user_guide/supported_models.md +++ b/docs/user_guide/supported_models.md @@ -35,6 +35,7 @@ configurations. | [BAAI/BGE-Reranker (v2-m3)][] | 1 | 8192 | 1 | | [BAAI/BGE-Reranker (Large)][] | 1 | 512 | 1 | | [BAAI/BGE-Reranker (Large)][] | 1 | 512 | 64 | +| [Multilingual-E5-large][] | 1 | 512 | 64 | [Granite-3.3-8b]: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct [Granite-3.3-8b (FP8)]: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct @@ -42,6 +43,7 @@ configurations. [Granite-Embedding-278m (Multilingual)]: https://huggingface.co/ibm-granite/granite-embedding-278m-multilingual [BAAI/BGE-Reranker (v2-m3)]: https://huggingface.co/BAAI/bge-reranker-v2-m3 [BAAI/BGE-Reranker (Large)]: https://huggingface.co/BAAI/bge-reranker-large +[Multilingual-E5-large]: https://huggingface.co/intfloat/multilingual-e5-large ## Runtime Validation diff --git a/tests/download_model_configs.py b/tests/download_model_configs.py old mode 100644 new mode 100755 index 7ed1210b..a90d140c --- a/tests/download_model_configs.py +++ b/tests/download_model_configs.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import os from pathlib import Path from urllib.request import urlretrieve diff --git a/tests/fixtures/model_configs/intfloat/multilingual-e5-large/config.json b/tests/fixtures/model_configs/intfloat/multilingual-e5-large/config.json new file mode 100644 index 00000000..2868fc29 --- /dev/null +++ b/tests/fixtures/model_configs/intfloat/multilingual-e5-large/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "XLMRobertaModel" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 16, + "num_hidden_layers": 24, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.55.2", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/tests/utils/test_model_config_validator.py b/tests/utils/test_model_config_validator.py index dc38455e..f86d8f0d 100644 --- a/tests/utils/test_model_config_validator.py +++ b/tests/utils/test_model_config_validator.py @@ -212,9 +212,12 @@ def test_find_model_by_config(monkeypatch, caplog): assert model_config.model != model_id models_found = find_known_models_by_model_config(model_config) - assert len(models_found) == 1 + assert len(models_found) == 1, \ + (f"More than one model found. Need to add more distinguishing" + f" parameters for models `{models_found}` in file" + f" `vllm_spyre/config/known_model_configs.json`") assert models_found[0] == model_id validate(model_config) - assert f"Model '{model_config.model}' is not a known model" + assert f"Model '{model_config.model}' is not a known" in caplog.text assert f"Found model '{model_id}'" in caplog.text diff --git a/vllm_spyre/config/known_model_configs.json b/vllm_spyre/config/known_model_configs.json index be3fae3c..5feb84ee 100644 --- a/vllm_spyre/config/known_model_configs.json +++ b/vllm_spyre/config/known_model_configs.json @@ -1,5 +1,6 @@ { "BAAI/bge-reranker-large": { + "architectures": ["XLMRobertaForSequenceClassification"], "model_type": "xlm-roberta", "max_position_embeddings": 514, "num_hidden_layers": 24, @@ -39,5 +40,11 @@ "model_type": "roberta", "num_hidden_layers": 24, "vocab_size": 50265 + }, + "intfloat/multilingual-e5-large": { + "architectures": ["XLMRobertaModel"], + "model_type": "xlm-roberta", + "num_hidden_layers": 24, + "vocab_size": 250002 } -} \ No newline at end of file +} diff --git a/vllm_spyre/config/supported_configs.yaml b/vllm_spyre/config/supported_configs.yaml index 2327f65a..b4f4a119 100644 --- a/vllm_spyre/config/supported_configs.yaml +++ b/vllm_spyre/config/supported_configs.yaml @@ -31,6 +31,10 @@ configs: [ { cb: False, tp_size: 1, warmup_shapes: [[512, 0, 64]] }, ] +- model: "intfloat/multilingual-e5-large" + configs: [ + { cb: False, tp_size: 1, warmup_shapes: [[512, 0, 64]] }, + ] - model: "BAAI/bge-reranker-v2-m3" configs: [ { cb: False, tp_size: 1, warmup_shapes: [[8192, 0, 1]] },