[Mosaic GPU][NFC] Move tests that don't require a GPU to platform agnostic tests.

allanrenucci · Google-ML-Automation · commit 1df8d4cf1771 · 2025-11-03T09:46:58.000-08:00
PiperOrigin-RevId: 827538093
diff --git a/tests/mosaic/gpu_dialect_test.py b/tests/mosaic/gpu_dialect_test.py
@@ -1435,6 +1435,59 @@ def test_memref_transforms_with_transpose(self):
       strides, _ = ty_transformed.get_strides_and_offset()
       self.assertEqual(strides, [512, 4096, 1, 16])
 
+  def test_optimized_gmem_transfers_are_not_supported(self):
+    def body(ctx, input, output, scratch):
+      del ctx, output, scratch
+      ref_type = ir.MemRefType(input.type)
+      zero = arith.constant(ir.IndexType.get(), 0)
+      zero_indices = [zero] * len(ref_type.shape)
+      vector_type = ir.VectorType.get(ref_type.shape, ref_type.element_type)
+      load = vector.LoadOp(vector_type, input, zero_indices)
+      load.attributes["optimized"] = ir.BoolAttr.get(True)
+      layout = layouts.to_layout_attr(mgpu.WGMMA_LAYOUT)
+      mgpu.dialect.layout_cast(load.result, layout)
+
+    shape = (128, 128)
+    dtype = jnp.bfloat16
+    with self.assertRaisesRegex(
+        NotImplementedError, "Only optimized transfers to SMEM supported"
+    ):
+      mgpu.as_gpu_kernel(
+          body,
+          grid=(1, 1, 1),
+          block=(128, 1, 1),
+          in_shape=jax.ShapeDtypeStruct(shape, dtype),
+          out_shape=jax.ShapeDtypeStruct(shape, dtype),
+          smem_scratch_shape=(),
+          thread_semantics=mgpu.LoweringSemantics.Warpgroup,
+      )
+
+  def test_inconsistent_collective_attributes_in_kernel_raise(self):
+    def body(ctx, out, smem_ptr):
+      del ctx, out
+      ref_ty = ir.MemRefType.get(
+          (128, 128),
+          ir.BF16Type.get(),
+          memory_space=mgpu_utils.tmem(),
+      )
+      mgpu.dialect.tmem_alloc(ref_ty, smem_ptr, collective=False)
+      mgpu.dialect.tmem_alloc(ref_ty, smem_ptr, collective=True)
+
+    with self.assertRaisesRegex(
+        ValueError,
+        "Collective attributes are inconsistent across operations in the"
+        " kernel",
+    ):
+      mgpu.as_gpu_kernel(
+          body,
+          grid=(1, 1, 1),
+          block=(128, 1, 1),
+          in_shape=(),
+          out_shape=(jax.ShapeDtypeStruct((), jnp.int32),),
+          smem_scratch_shape=jax.ShapeDtypeStruct((), jnp.int32),
+          thread_semantics=mgpu.LoweringSemantics.Warpgroup,
+      )
+
 
 if __name__ == "__main__":
   parameterized.absltest.main(testLoader=jtu.JaxTestLoader())
diff --git a/tests/mosaic/gpu_layout_inference_test.py b/tests/mosaic/gpu_layout_inference_test.py
@@ -889,6 +889,19 @@ def test_layout_cast_of_non_splat_constant_to_splat_raises(self):
     ):
       mgpu.infer_layout(self.module)
 
+  def test_layout_of_wgmma_layout_to_wgmma_row_layout_raises(self):
+    with ir.InsertionPoint(self.module.body):
+      [ref] = undefs(ir.VectorType.get((128, 128), ir.F32Type.get()))
+      wgmma_layout = layouts.to_layout_attr(fa.WGMMA_LAYOUT)
+      wgmma_row_layout = layouts.to_layout_attr(fa.WGMMA_ROW_LAYOUT)
+      ref = mgpu.dialect.layout_cast(ref, wgmma_layout)
+      mgpu.dialect.layout_cast(ref, wgmma_row_layout)
+
+    with self.assertRaisesRegex(
+        ValueError, "user-provided layout casts are unsatisfiable"
+    ):
+      mgpu.infer_layout(self.module)
+
   def test_infer_layout_for_tmem_alloc_by_default(self):
     f32 = ir.F32Type.get()
     i32 = ir.IntegerType.get_signless(32)
diff --git a/tests/mosaic/gpu_test.py b/tests/mosaic/gpu_test.py
@@ -4000,28 +4000,6 @@ def body(ctx, param: ir.Value, result: ir.Value, smem: list[ir.Value]):
     param = self.prng.uniform(-1, 1, shape).astype(dtype)
     self.assertArraysEqual(kernel(param), param)
 
-  def test_optimized_gmem_transfers_are_not_supported(self):
-    def body(ctx, input, output, scratch):
-      del ctx, output, scratch
-      reg = vector_load(input, optimized=True)
-      layout = layouts.to_layout_attr(fa.WGMMA_LAYOUT)
-      reg = mgpu_dialect.layout_cast(reg, layout)
-
-    shape = (128, 128)
-    dtype = jnp.bfloat16
-    with self.assertRaisesRegex(
-        NotImplementedError, "Only optimized transfers to SMEM supported"
-    ):
-      mgpu.as_gpu_kernel(
-          body,
-          grid=(1, 1, 1),
-          block=(128, 1, 1),
-          in_shape=jax.ShapeDtypeStruct(shape, dtype),
-          out_shape=jax.ShapeDtypeStruct(shape, dtype),
-          smem_scratch_shape=(),
-          thread_semantics=mgpu.LoweringSemantics.Warpgroup,
-      )
-
   def test_pointwise_kernel(self):
     def add(ctx, a, b, result, smem):
       del ctx, smem
@@ -4321,33 +4299,6 @@ def body(ctx, result_gmem_ref, scratch):
         kernel(), jax.lax.broadcast_in_dim(x, output_shape, bcast_dims)
     )
 
-  def test_bad_layout_cast_raises_in_inference(self):
-    shape = (128, 128)
-    def body(ctx, out, _):
-      del ctx, out
-      f32 = ir.F32Type.get()
-      x = vector.broadcast(
-          ir.VectorType.get(shape, f32), arith.constant(f32, 0.0)
-      )
-      wgmma_layout = layouts.to_layout_attr(fa.WGMMA_LAYOUT)
-      wgmma_row_layout = layouts.to_layout_attr(fa.WGMMA_ROW_LAYOUT)
-      lc1 = mgpu_dialect.layout_cast(x, wgmma_layout)
-      mgpu_dialect.layout_cast(lc1, wgmma_row_layout)
-
-    dtype = jnp.float32
-    with self.assertRaisesRegex(
-        ValueError, "user-provided layout casts are unsatisfiable"
-    ):
-      mgpu.as_gpu_kernel(
-          body,
-          grid=(1, 1, 1),
-          block=(128, 1, 1),
-          in_shape=(),
-          out_shape=jax.ShapeDtypeStruct(shape, dtype),
-          smem_scratch_shape=(),
-          thread_semantics=mgpu.LoweringSemantics.Warpgroup,
-      )
-
   @parameterized.parameters(
       (jnp.float32, 5.0, 2.0, vector.CombiningKind.ADD),
       (jnp.float32, 5.0, 2.0, vector.CombiningKind.MAXIMUMF),
@@ -5240,32 +5191,6 @@ def matmul(ctx, a_gmem, b_gmem, result_gmem, scratch):
         rtol=rtol,
     )
 
-  def test_inconsistent_collective_attributes_in_kernel_raise(self):
-    def body(ctx, out, smem_ptr):
-      del ctx, out
-      ref_ty = ir.MemRefType.get(
-          (128, 128),
-          ir.BF16Type.get(),
-          memory_space=utils.tmem(),
-      )
-      mgpu_dialect.tmem_alloc(ref_ty, smem_ptr, collective=False)
-      mgpu_dialect.tmem_alloc(ref_ty, smem_ptr, collective=True)
-
-    with self.assertRaisesRegex(
-        ValueError,
-        "Collective attributes are inconsistent across operations in the"
-        " kernel",
-    ):
-      mgpu.as_gpu_kernel(
-          body,
-          grid=(1, 1, 1),
-          block=(128, 1, 1),
-          in_shape=(),
-          out_shape=(jax.ShapeDtypeStruct((), jnp.int32),),
-          smem_scratch_shape=jax.ShapeDtypeStruct((), jnp.int32),
-          thread_semantics=mgpu.LoweringSemantics.Warpgroup,
-      )
-
   def test_slice_tmem(self):
     def tmem_type(ref: ir.Value):
       return ir.MemRefType.get(