Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/flag_gems/runtime/backend/_mthreads/ops/arange.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from flag_gems.utils import triton_lang_extension as tle

logger = logging.getLogger(
f'flag_gems.runtime.backend._mthreads.ops.{__name__.split(".")[-1]}'
f"flag_gems.runtime.backend._mthreads.ops.{__name__.split('.')[-1]}"
)

device_ = runtime.device
Expand Down Expand Up @@ -140,6 +140,19 @@ def arange_start(
else:
device = torch.device(device)

# Handle int64 dtype with float parameters - convert to int
if dtype is torch.int64:
if (
isinstance(start, float)
or isinstance(end, float)
or isinstance(step, float)
):
start = int(start) if isinstance(start, float) else start
end = int(end) if isinstance(end, float) else end
step = int(step) if isinstance(step, float) else step
if step == 0:
raise RuntimeError("step must be nonzero")

is_float_dtype = torch.is_floating_point(torch.tensor(0, dtype=dtype))
use_int64 = dtype == torch.int64
size = _compute_size(start, end, step, is_float_dtype)
Expand Down
2 changes: 1 addition & 1 deletion src/flag_gems/runtime/backend/_mthreads/ops/repeat.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def repeat(inp: torch.Tensor, sizes) -> torch.Tensor:
assert (
sizes_shape[i] >= 0
), f"the number of repetitions per dimension out of range (expected to >= 0) but got {sizes_shape[i]}"
if sizes_shape[i] == 0:
if in0_shape[i] * sizes_shape[i] == 0:
is_empty = True
out_shape.append(in0_shape[i] * sizes_shape[i])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from flag_gems.utils.tensor_wrapper import StridedBuffer

logger = logging.getLogger(
f'flag_gems.runtime.backend._mthreads.ops.{__name__.split(".")[-1]}'
f"flag_gems.runtime.backend._mthreads.ops.{__name__.split('.')[-1]}"
)


Expand Down Expand Up @@ -437,6 +437,9 @@ def fused_repeat_interleave_dim0(inp, repeats, dim):
def repeat_interleave_self_tensor(inp, repeats, dim=None, *, output_size=None):
logger.debug("GEMS_MTHREADS REPEAT_INTERLEAVE_SELF_TENSOR")

if repeats.numel() == 0:
return inp.clone()

if dim is None:
inp = inp.flatten()
dim = 0
Expand Down
20 changes: 20 additions & 0 deletions src/flag_gems/runtime/backend/_mthreads/tune_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,26 @@ cross_entropy_loss_sum_and_scale:
- 256
- 1024

conj_physical:
- META:
BLOCK_SIZE: 64
num_warps: 8
- META:
BLOCK_SIZE: 128
num_warps: 8
- META:
BLOCK_SIZE: 256
num_warps: 8
- META:
BLOCK_SIZE: 512
num_warps: 8
- META:
BLOCK_SIZE: 1024
num_warps: 8
- META:
BLOCK_SIZE: 2048
num_warps: 8

upsample_nearest2d:
- gen: true
param_map:
Expand Down
Loading