Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ jobs:
uv.lock
tests/**/*.py
vllm_spyre/**/*.py
vllm_spyre/config/known_model_configs.json
vllm_spyre/config/supported_configs.yaml

- name: "Install PyTorch 2.7.1"
if: steps.changed-src-files.outputs.any_changed == 'true'
Expand Down Expand Up @@ -157,6 +159,7 @@ jobs:

- name: "Standardize HF model names for caching"
id: standardize-names
if: steps.changed-src-files.outputs.any_changed == 'true'
run: |
# replace '/' characters in HF_MODEL with '--' for GHA cache keys and
# in model file names in local HF hub cache
Expand Down
2 changes: 2 additions & 0 deletions docs/user_guide/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ configurations.
| [BAAI/BGE-Reranker (v2-m3)][] | 1 | 8192 | 1 |
| [BAAI/BGE-Reranker (Large)][] | 1 | 512 | 1 |
| [BAAI/BGE-Reranker (Large)][] | 1 | 512 | 64 |
| [Multilingual-E5-large][] | 1 | 512 | 64 |

[Granite-3.3-8b]: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct
[Granite-3.3-8b (FP8)]: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct
[Granite-Embedding-125m (English)]: https://huggingface.co/ibm-granite/granite-embedding-125m-english
[Granite-Embedding-278m (Multilingual)]: https://huggingface.co/ibm-granite/granite-embedding-278m-multilingual
[BAAI/BGE-Reranker (v2-m3)]: https://huggingface.co/BAAI/bge-reranker-v2-m3
[BAAI/BGE-Reranker (Large)]: https://huggingface.co/BAAI/bge-reranker-large
[Multilingual-E5-large]: https://huggingface.co/intfloat/multilingual-e5-large

## Runtime Validation

Expand Down
2 changes: 2 additions & 0 deletions tests/download_model_configs.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

import os
from pathlib import Path
from urllib.request import urlretrieve
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"architectures": [
"XLMRobertaModel"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"classifier_dropout": null,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "xlm-roberta",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"output_past": true,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"torch_dtype": "float32",
"transformers_version": "4.55.2",
"type_vocab_size": 1,
"use_cache": true,
"vocab_size": 250002
}
7 changes: 5 additions & 2 deletions tests/utils/test_model_config_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,12 @@ def test_find_model_by_config(monkeypatch, caplog):
assert model_config.model != model_id

models_found = find_known_models_by_model_config(model_config)
assert len(models_found) == 1
assert len(models_found) == 1, \
(f"More than one model found. Need to add more distinguishing"
f" parameters for models `{models_found}` in file"
f" `vllm_spyre/config/known_model_configs.json`")
assert models_found[0] == model_id

validate(model_config)
assert f"Model '{model_config.model}' is not a known model"
assert f"Model '{model_config.model}' is not a known" in caplog.text
assert f"Found model '{model_id}'" in caplog.text
9 changes: 8 additions & 1 deletion vllm_spyre/config/known_model_configs.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"BAAI/bge-reranker-large": {
"architectures": ["XLMRobertaForSequenceClassification"],
"model_type": "xlm-roberta",
"max_position_embeddings": 514,
"num_hidden_layers": 24,
Expand Down Expand Up @@ -39,5 +40,11 @@
"model_type": "roberta",
"num_hidden_layers": 24,
"vocab_size": 50265
},
"intfloat/multilingual-e5-large": {
"architectures": ["XLMRobertaModel"],
"model_type": "xlm-roberta",
"num_hidden_layers": 24,
"vocab_size": 250002
}
}
}
4 changes: 4 additions & 0 deletions vllm_spyre/config/supported_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
configs: [
{ cb: False, tp_size: 1, warmup_shapes: [[512, 0, 64]] },
]
- model: "intfloat/multilingual-e5-large"
configs: [
{ cb: False, tp_size: 1, warmup_shapes: [[512, 0, 64]] },
]
- model: "BAAI/bge-reranker-v2-m3"
configs: [
{ cb: False, tp_size: 1, warmup_shapes: [[8192, 0, 1]] },
Expand Down