-
Notifications
You must be signed in to change notification settings - Fork 742
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: remove the dependency on FusedMoE (#2153)
- Loading branch information
Showing
7 changed files
with
1,602 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from contextlib import contextmanager | ||
from typing import Any, Dict, Optional | ||
|
||
import sglang.srt.layers.triton_fused_moe.fused_moe # noqa | ||
from sglang.srt.layers.triton_fused_moe.fused_moe import ( | ||
fused_experts, | ||
fused_topk, | ||
get_config_file_name, | ||
grouped_topk, | ||
) | ||
from sglang.srt.layers.triton_fused_moe.layer import ( | ||
FusedMoE, | ||
FusedMoEMethodBase, | ||
FusedMoeWeightScaleSupported, | ||
) | ||
|
||
_config: Optional[Dict[str, Any]] = None | ||
|
||
|
||
@contextmanager | ||
def override_config(config): | ||
global _config | ||
old_config = _config | ||
_config = config | ||
yield | ||
_config = old_config | ||
|
||
|
||
def get_config() -> Optional[Dict[str, Any]]: | ||
return _config | ||
|
||
|
||
__all__ = [ | ||
"FusedMoE", | ||
"FusedMoEMethodBase", | ||
"FusedMoeWeightScaleSupported", | ||
"override_config", | ||
"get_config", | ||
"fused_moe", | ||
"fused_topk", | ||
"fused_experts", | ||
"get_config_file_name", | ||
"grouped_topk", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
This directory contains tuned configurations for different settings of the fused_moe kernel. | ||
For different settings of | ||
- E (number of experts) | ||
- N (intermediate size) | ||
- device_name (torch.cuda.get_device_name()) | ||
the JSON file contains a mapping from M (batch size) to the chosen configuration. | ||
|
||
The example configurations provided are for the Mixtral model for TP2 on H100 | ||
and TP4 on A100. Mixtral has intermediate size N = 14336, i.e. for TP2 we have | ||
N = 7168 and for TP4 we have N = 3584. |
Oops, something went wrong.