Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
316 changes: 0 additions & 316 deletions benchmark/matmul/benchmark_matmul_intrinsic.py

This file was deleted.

2 changes: 1 addition & 1 deletion docs/deeplearning_operators/matmul.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Below is a simplified code snippet for a 1024 x 1024 x 1024 matrix multiplicatio
```python
import tilelang
import tilelang.language as T
from tilelang.intrinsics import make_mma_swizzle_layout
from tilelang.cuda.intrinsics import make_mma_swizzle_layout

def matmul(M, N, K, block_M, block_N, block_K, dtype="float16", accum_dtype="float"):
@T.prim_func
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
import tilelang.language as T
from tilelang import tvm as tvm
from tvm import DataType
from tilelang.intrinsics.mma_layout import (
from tilelang.cuda.intrinsics.layout.mma_layout import (
make_mma_swizzle_layout as make_swizzle_layout,
)
import numpy as np

from tilelang.intrinsics.mma_macro_generator import (
from tilelang.cuda.intrinsics.macro.mma_macro_generator import (
INT4TensorCoreIntrinEmitter,
)
from tilelang.transform import simplify_prim_func
Expand Down
4 changes: 2 additions & 2 deletions examples/dequantize_gemm/example_dequant_gemm_fine_grained.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ def tl_matmul_with_ladder_weight_only_transform_block_reduce_int4(
accum_dtype,
transform_b,
):
from tilelang.intrinsics.mma_layout import make_mma_swizzle_layout as make_swizzle_layout
from tilelang.intrinsics.mma_macro_generator import (
from tilelang.cuda.intrinsics.layout.mma_layout import make_mma_swizzle_layout as make_swizzle_layout
from tilelang.cuda.intrinsics.macro.mma_macro_generator import (
TensorCoreIntrinEmitterWithLadderTransform,
)

Expand Down
2 changes: 1 addition & 1 deletion examples/gemm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ Below is a more advanced snippet that showcases how to apply memory layouts, ena
import tilelang.language as T
# `make_mma_swizzle_layout` is a python-defined layout function
# that helps align data for MMA (Matrix Multiply-Accumulate) operations.
from tilelang.intrinsics import make_mma_swizzle_layout as make_swizzle_layout
from tilelang.cuda.intrinsics import make_mma_swizzle_layout as make_swizzle_layout

def matmul(M, N, K, block_M, block_N, block_K, dtype=T.float16, accum_dtype=T.float):
@T.prim_func
Expand Down
4 changes: 2 additions & 2 deletions examples/gemm/example_gemm_intrinsics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from tvm import DataType
import tilelang
import tilelang.language as T
from tilelang.intrinsics import get_swizzle_layout
from tilelang.intrinsics.mma_macro_generator import (
from tilelang.cuda.intrinsics import get_swizzle_layout
from tilelang.cuda.intrinsics.macro.mma_macro_generator import (
TensorCoreIntrinEmitter,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import tilelang.language as T
from tilelang.tileop.base import GemmWarpPolicy
from tilelang.layout import make_swizzled_layout
from tilelang.intrinsics.mfma_macro_generator import MatrixCorePreshuffleIntrinEmitter
from tilelang.rocm.intrinsics.mfma_macro_generator import MatrixCorePreshuffleIntrinEmitter
from tilelang.utils import determine_fp8_type

tilelang.testing.set_random_seed(0)
Expand Down
Loading
Loading