-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Awq 4 bit quantization support (#2508)
* add awq linear from AutoAWQ and/or llm-awq * add generic converter for llama-like models from HF with or without awq quantization
- Loading branch information
Showing
6 changed files
with
582 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import torch.nn as nn | ||
|
||
|
||
def replace_awq_linear( | ||
model, module_to_convert=[], w_bit=4, group_size=128, q_type="llm_awq" | ||
): | ||
if q_type == "llm_awq": | ||
try: | ||
from awq.quantize.qmodule import WQLinear | ||
except ImportError: | ||
raise ImportError("Install llm-awq to use awq") | ||
AWQLin = WQLinear | ||
elif q_type in ["aawq_gemm", "aawq_gemv"]: | ||
try: | ||
from awq.modules.linear import WQLinear_GEMM, WQLinear_GEMV | ||
except ImportError: | ||
raise ImportError("Install AutoAWQ to use awq") | ||
if q_type == "aawq_gemm": | ||
AWQLin = WQLinear_GEMM | ||
else: | ||
AWQLin = WQLinear_GEMV | ||
else: | ||
raise ValueError("No Awq framework for this value") | ||
|
||
for name, module in model.named_children(): | ||
if len(list(module.children())) > 0: | ||
replace_awq_linear(module, module_to_convert, w_bit, group_size, q_type) | ||
|
||
if isinstance(module, nn.Linear) and name in module_to_convert: | ||
model._modules[name] = AWQLin( | ||
w_bit=w_bit, | ||
group_size=group_size, | ||
in_features=module.in_features, | ||
out_features=module.out_features, | ||
bias=module.bias is not None, | ||
dev=module.weight.device, | ||
) | ||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.