Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 63 additions & 42 deletions tileops/manifest/elementwise_binary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,9 @@ AddFwdOp:
# Output follows PyTorch broadcasting; numel uses the broadcast shape.
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.add_fwd_roofline"

Expand All @@ -178,8 +179,9 @@ SubFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.sub_fwd_roofline"

Expand All @@ -204,8 +206,9 @@ MulFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.mul_fwd_roofline"

Expand Down Expand Up @@ -233,8 +236,9 @@ DivFwdOp:
- "output.shape == broadcast_shapes(input.shape, other.shape)"
- "rounding_mode is None or rounding_mode in ('trunc', 'floor')"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.div_fwd_roofline"

Expand All @@ -259,8 +263,9 @@ RemainderFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.remainder_fwd_roofline"

Expand All @@ -285,8 +290,9 @@ PowFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, exponent.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], exponent_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], exponent_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.pow_fwd_roofline"

Expand All @@ -311,8 +317,9 @@ FloorDivideFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.floor_divide_fwd_roofline"

Expand Down Expand Up @@ -342,8 +349,9 @@ LerpFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, end.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], end_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], end_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.lerp_fwd_roofline"

Expand All @@ -368,8 +376,9 @@ MaximumFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.maximum_fwd_roofline"

Expand All @@ -394,8 +403,9 @@ MinimumFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.minimum_fwd_roofline"

Expand Down Expand Up @@ -424,8 +434,9 @@ EqFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.eq_fwd_roofline"

Expand All @@ -450,8 +461,9 @@ NeFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.ne_fwd_roofline"

Expand All @@ -476,8 +488,9 @@ GtFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.gt_fwd_roofline"

Expand All @@ -502,8 +515,9 @@ LtFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.lt_fwd_roofline"

Expand All @@ -528,8 +542,9 @@ GeFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.ge_fwd_roofline"

Expand All @@ -554,8 +569,9 @@ LeFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.le_fwd_roofline"

Expand Down Expand Up @@ -584,8 +600,9 @@ LogicalAndFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.logical_and_fwd_roofline"

Expand All @@ -610,8 +627,9 @@ LogicalOrFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, float16, bfloat16, float32], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, float16, bfloat16, float32], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.logical_or_fwd_roofline"

Expand Down Expand Up @@ -640,8 +658,9 @@ BitwiseAndFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, int32, int64], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, int32, int64], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.bitwise_and_fwd_roofline"

Expand All @@ -666,8 +685,9 @@ BitwiseOrFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, int32, int64], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, int32, int64], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.bitwise_or_fwd_roofline"

Expand All @@ -692,8 +712,9 @@ BitwiseXorFwdOp:
shape_rules:
- "output.shape == broadcast_shapes(input.shape, other.shape)"

workloads: []

workloads:
- {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, int32, int64], label: hidden-state-prefill}
- {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, int32, int64], label: cnn-feat-broadcast}
roofline:
func: "tileops.perf.formulas.bitwise_xor_fwd_roofline"

Expand Down
Loading