diff --git a/tileops/manifest/elementwise_binary.yaml b/tileops/manifest/elementwise_binary.yaml index efa858c1..186b6300 100644 --- a/tileops/manifest/elementwise_binary.yaml +++ b/tileops/manifest/elementwise_binary.yaml @@ -150,8 +150,9 @@ AddFwdOp: # Output follows PyTorch broadcasting; numel uses the broadcast shape. - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.add_fwd_roofline" @@ -178,8 +179,9 @@ SubFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.sub_fwd_roofline" @@ -204,8 +206,9 @@ MulFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.mul_fwd_roofline" @@ -233,8 +236,9 @@ DivFwdOp: - "output.shape == broadcast_shapes(input.shape, other.shape)" - "rounding_mode is None or rounding_mode in ('trunc', 'floor')" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.div_fwd_roofline" @@ -259,8 +263,9 @@ RemainderFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.remainder_fwd_roofline" @@ -285,8 +290,9 @@ PowFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, exponent.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], exponent_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], exponent_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.pow_fwd_roofline" @@ -311,8 +317,9 @@ FloorDivideFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.floor_divide_fwd_roofline" @@ -342,8 +349,9 @@ LerpFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, end.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], end_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], end_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.lerp_fwd_roofline" @@ -368,8 +376,9 @@ MaximumFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.maximum_fwd_roofline" @@ -394,8 +403,9 @@ MinimumFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.minimum_fwd_roofline" @@ -424,8 +434,9 @@ EqFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.eq_fwd_roofline" @@ -450,8 +461,9 @@ NeFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.ne_fwd_roofline" @@ -476,8 +488,9 @@ GtFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.gt_fwd_roofline" @@ -502,8 +515,9 @@ LtFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.lt_fwd_roofline" @@ -528,8 +542,9 @@ GeFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.ge_fwd_roofline" @@ -554,8 +569,9 @@ LeFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.le_fwd_roofline" @@ -584,8 +600,9 @@ LogicalAndFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.logical_and_fwd_roofline" @@ -610,8 +627,9 @@ LogicalOrFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, float16, bfloat16, float32], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, float16, bfloat16, float32], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.logical_or_fwd_roofline" @@ -640,8 +658,9 @@ BitwiseAndFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, int32, int64], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, int32, int64], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.bitwise_and_fwd_roofline" @@ -666,8 +685,9 @@ BitwiseOrFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, int32, int64], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, int32, int64], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.bitwise_or_fwd_roofline" @@ -692,8 +712,9 @@ BitwiseXorFwdOp: shape_rules: - "output.shape == broadcast_shapes(input.shape, other.shape)" - workloads: [] - + workloads: + - {input_shape: [2048, 4096], other_shape: [2048, 4096], dtypes: [bool, int32, int64], label: hidden-state-prefill} + - {input_shape: [16, 256, 56, 56], other_shape: [256, 1, 1], dtypes: [bool, int32, int64], label: cnn-feat-broadcast} roofline: func: "tileops.perf.formulas.bitwise_xor_fwd_roofline"