Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/target/codegen_npuir_dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2682,10 +2682,14 @@ void CodeGenTileLangNPUIRDEV::VpadCodegen(const CallNode *op) {

void CodeGenTileLangNPUIRDEV::VflipCodegen(const CallNode *op) {
tvm::tl::NpuirFlip npuirop(op->args, this->vmap);
Value src = GenSubviewFromRegion(npuirop.src, npuirop.src_range);
Value dst = GenSubviewFromRegion(npuirop.dst, npuirop.dst_range);
builder.create<mlir::hivm::VFlipOp>(builder.getUnknownLoc(), TypeRange{}, src,
dst, npuirop.axis);
Value src = GetVarValue(npuirop.src);
Value dst = GetVarValue(npuirop.dst);

auto loc = builder.getUnknownLoc();
auto srcTy = src.getType().cast<RankedTensorType>();

auto flipOp = builder.create<mlir::hfusion::FlipOp>(loc, srcTy, src, static_cast<uint64_t>(npuirop.axis));
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a negative axis is passed upstream (e.g., axis=-1 in Python), the implicit int64_t → u_int64_t conversion would produce a huge unsigned value. Add negative axis legalization in the NpuirFlip constructor (converting negative to positive), or add an ICHECK(npuirop.axis >= 0) assertion in VflipCodegen .

SetVarValue(npuirop.dst, flipOp.getResult());
}

void CodeGenTileLangNPUIRDEV::Nd2NzCodegen(const CallNode *op) {
Expand Down
70 changes: 70 additions & 0 deletions testing/npuir/shape_manipulation_ops/test_vflip_dev.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import pytest
import torch
import torch_npu

import tilelang
import tilelang.language as T

tilelang.cache.clear_cache()
os.environ["TILELANG_ASCEND_MODE"] = "Developer"


@pytest.fixture(
params=[
((4, 64), "float16", 0),
((64, 64), "float16", 0),
((128, 64), "float32", 0),
((64, 256), "float32", 0),
((4, 64), "float16", 1),
((64, 64), "float16", 1),
((128, 64), "float32", 1),
((64, 256), "float32", 1),
]
)
def flip_case(request):
return request.param


def flip_kernel(M, N, axis, dtype):
BLOCK_SIZE = 1

@T.prim_func
def main(A: T.Tensor((M, N), dtype), B: T.Tensor((M, N), dtype)):
with T.Kernel(BLOCK_SIZE, is_npu=True) as (cid, _):


A_VEC = T.alloc_shared((M, N), dtype)
B_VEC = T.alloc_shared((M, N), dtype)

T.copy(A, A_VEC)
T.npuir_flip(A_VEC, B_VEC, axis)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace T.npuir_flip with T.flip

T.copy(B_VEC, B)

return main


def generate_tensor(shape, dtype, clear=False, positive=False):
if clear:
return torch.zeros(shape, dtype=eval("torch." + dtype))
if dtype in ("float32", "float16"):
t = torch.randn(size=shape, dtype=eval("torch." + dtype))
if positive:
t = torch.abs(t) + 0.1
return t
raise ValueError('Invalid parameter "dtype" is found : {}'.format(dtype))


def test_flip_kernel(flip_case):
shape, dtype, axis = flip_case

func = flip_kernel(*shape, axis, dtype)
compiled_kernel = tilelang.compile(func, target="npuir")

src = generate_tensor(shape, dtype).npu()
dst = generate_tensor(shape, dtype, clear=True).npu()

ref = torch.flip(src.cpu(), dims=[axis])
compiled_kernel(src, dst)

assert torch.allclose(dst.cpu(), ref, rtol=1e-5, atol=1e-5)