|
9 | 9 | from transformers import PretrainedConfig
|
10 | 10 |
|
11 | 11 | from vllm.logger import init_logger
|
| 12 | +from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS |
12 | 13 | from vllm.transformers_utils.config import get_config, get_hf_text_config
|
13 | 14 | from vllm.utils import (get_cpu_memory, get_nvcc_cuda_version, is_cpu, is_hip,
|
14 | 15 | is_neuron)
|
@@ -118,8 +119,8 @@ def _verify_tokenizer_mode(self) -> None:
|
118 | 119 | self.tokenizer_mode = tokenizer_mode
|
119 | 120 |
|
120 | 121 | def _verify_quantization(self) -> None:
|
121 |
| - supported_quantization = ["awq", "gptq", "squeezellm", "marlin"] |
122 |
| - rocm_not_supported_quantization = ["awq", "marlin"] |
| 122 | + supported_quantization = [*QUANTIZATION_METHODS] |
| 123 | + rocm_supported_quantization = ["gptq", "squeezellm"] |
123 | 124 | if self.quantization is not None:
|
124 | 125 | self.quantization = self.quantization.lower()
|
125 | 126 |
|
@@ -155,7 +156,7 @@ def _verify_quantization(self) -> None:
|
155 | 156 | f"Unknown quantization method: {self.quantization}. Must "
|
156 | 157 | f"be one of {supported_quantization}.")
|
157 | 158 | if is_hip(
|
158 |
| - ) and self.quantization in rocm_not_supported_quantization: |
| 159 | + ) and self.quantization not in rocm_supported_quantization: |
159 | 160 | raise ValueError(
|
160 | 161 | f"{self.quantization} quantization is currently not "
|
161 | 162 | f"supported in ROCm.")
|
|
0 commit comments