Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/tilegym/ops/cutile/mla.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def _mla_sm90_autotune_configs():
for tm in [64, 128, 256]:
for tn in [64, 128]:
yield SimpleNamespace(TILE_M=tm, TILE_N=tn, num_ctas=1, occupancy=1)
for tm in [64, 128]:
for tn in [64, 128]:
yield SimpleNamespace(TILE_M=tm, TILE_N=tn, num_ctas=1, occupancy=2)


@ct.kernel
Expand Down
18 changes: 18 additions & 0 deletions src/tilegym/suites/liger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,32 @@
# Import unified interface
from .ops import cross_entropy
from .ops import fused_linear_jsd
from .ops import fused_neighborhood_attention
from .ops import geglu
from .ops import group_norm
from .ops import jsd
from .ops import kl_div
from .ops import layer_norm
from .ops import llama4_rope
from .ops import multi_token_attention
from .ops import qwen2vl_mrope
from .ops import rope
from .ops import sparsemax
from .ops import tiled_mlp

__all__ = [
"cross_entropy",
"fused_linear_jsd",
"fused_neighborhood_attention",
"geglu",
"group_norm",
"jsd",
"kl_div",
"layer_norm",
"llama4_rope",
"multi_token_attention",
"qwen2vl_mrope",
"rope",
"sparsemax",
"tiled_mlp",
]
32 changes: 32 additions & 0 deletions src/tilegym/suites/liger/cutile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,56 @@

from . import cross_entropy # noqa: F401
from . import fused_linear_jsd # noqa: F401
from . import fused_neighborhood_attention # noqa: F401
from . import geglu # noqa: F401
from . import group_norm # noqa: F401
from . import jsd # noqa: F401
from . import kl_div # noqa: F401
from . import layer_norm # noqa: F401
from . import llama4_rope # noqa: F401
from . import multi_token_attention # noqa: F401
from . import qwen2vl_mrope # noqa: F401
from . import rope # noqa: F401
from . import sparsemax # noqa: F401
from . import tiled_mlp # noqa: F401
from .cross_entropy import CrossEntropyCuTileFunction # noqa: F401
from .fused_linear_jsd import FusedLinearJSDCuTileFunction # noqa: F401
from .geglu import GEGLUCuTileFunction # noqa: F401
from .group_norm import GroupNormCuTileFunction # noqa: F401
from .jsd import JSDCuTileFunction # noqa: F401
from .kl_div import KLDivCuTileFunction # noqa: F401
from .layer_norm import LayerNormCuTileFunction # noqa: F401
from .llama4_rope import Llama4RopeCuTileFunction # noqa: F401
from .multi_token_attention import MultiTokenAttentionCuTileFunction # noqa: F401
from .qwen2vl_mrope import Qwen2VLMRopeCuTileFunction # noqa: F401
from .rope import RopeCuTileFunction # noqa: F401
from .sparsemax import SparsemaxCuTileFunction # noqa: F401

__all__ = [
"CrossEntropyCuTileFunction",
"FusedLinearJSDCuTileFunction",
"GEGLUCuTileFunction",
"GroupNormCuTileFunction",
"JSDCuTileFunction",
"KLDivCuTileFunction",
"LayerNormCuTileFunction",
"Llama4RopeCuTileFunction",
"MultiTokenAttentionCuTileFunction",
"SparsemaxCuTileFunction",
"cross_entropy",
"fused_linear_jsd",
"fused_neighborhood_attention",
"geglu",
"group_norm",
"jsd",
"kl_div",
"layer_norm",
"llama4_rope",
"multi_token_attention",
"Qwen2VLMRopeCuTileFunction",
"RopeCuTileFunction",
"qwen2vl_mrope",
"rope",
"sparsemax",
"tiled_mlp",
]
Loading
Loading