diff --git a/python/ctranslate2/converters/transformers.py b/python/ctranslate2/converters/transformers.py index 6924ac637..737899dbc 100644 --- a/python/ctranslate2/converters/transformers.py +++ b/python/ctranslate2/converters/transformers.py @@ -1331,6 +1331,8 @@ def get_vocabulary(self, model, tokenizer): extra_ids = model.config.vocab_size - len(tokens) for i in range(extra_ids): tokens.append("" % i) + if model.config.vocab_size < len(tokens): + tokens = tokens[: model.config.vocab_size] return tokens