|
| 1 | +/* |
| 2 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * This source code is licensed under the BSD-style license found in the |
| 6 | + * LICENSE file in the root directory of this source tree. |
| 7 | + */ |
| 8 | + |
| 9 | +// Device-free unit test for the pure 2D workgroup-count fold that lifts the |
| 10 | +// 65535 per-dim dispatch cap. Exercises the fold arithmetic only — no GPU. |
| 11 | + |
| 12 | +#include <executorch/backends/webgpu/runtime/WebGPUUtils.h> |
| 13 | + |
| 14 | +#include <gtest/gtest.h> |
| 15 | + |
| 16 | +#include <cmath> |
| 17 | +#include <cstdint> |
| 18 | + |
| 19 | +using executorch::backends::webgpu::utils::fold_workgroup_count_2d; |
| 20 | +using executorch::backends::webgpu::utils::WgCount; |
| 21 | + |
| 22 | +namespace { |
| 23 | + |
| 24 | +constexpr uint32_t kMax = 65535u; |
| 25 | + |
| 26 | +// count <= max -> {count, 1}: the 1D fast path, byte-identical to the old path. |
| 27 | +TEST(DispatchFold, FastPath1D) { |
| 28 | + for (uint32_t count : {1u, kMax - 1u, kMax}) { |
| 29 | + const WgCount got = fold_workgroup_count_2d(count, kMax, "test"); |
| 30 | + EXPECT_EQ(got.x, count); |
| 31 | + EXPECT_EQ(got.y, 1u); |
| 32 | + } |
| 33 | +} |
| 34 | + |
| 35 | +// count > max -> near-square {x, y}: fits the per-dim cap, covers every |
| 36 | +// workgroup, and stays near-square so few invocations are inactive (launched - |
| 37 | +// count is O(sqrt(count)); a flat {max, div_up} split would idle up to ~half). |
| 38 | +TEST(DispatchFold, NearSquareFold) { |
| 39 | + // Includes prefill-scale QK counts (Hq*ceil(S/4)*ceil(ctx/4)/wg) that fold: |
| 40 | + // 131072 = S=2048 (32*512*512/64); 2097152 = large-S stress. |
| 41 | + for (uint32_t count : |
| 42 | + {kMax + 1u, 2u * kMax, 2u * kMax + 1u, 131072u, 2097152u}) { |
| 43 | + const WgCount got = fold_workgroup_count_2d(count, kMax, "test"); |
| 44 | + const uint64_t launched = static_cast<uint64_t>(got.x) * got.y; |
| 45 | + const uint32_t root = |
| 46 | + static_cast<uint32_t>(std::ceil(std::sqrt(static_cast<double>(count)))); |
| 47 | + EXPECT_LE(got.x, kMax) << "count=" << count; |
| 48 | + EXPECT_LE(got.y, kMax) << "count=" << count; |
| 49 | + EXPECT_GE(launched, count) << "count=" << count; |
| 50 | + EXPECT_LT(launched - count, 2ull * root) |
| 51 | + << "count=" << count << " launched=" << launched; |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +// count > max^2 needs a 3rd dispatch dimension -> throws (out of scope). |
| 56 | +TEST(DispatchFold, ThrowsWhenNeeds3rdDimension) { |
| 57 | + EXPECT_ANY_THROW(fold_workgroup_count_2d(kMax * kMax + 1u, kMax, "test")); |
| 58 | +} |
| 59 | + |
| 60 | +} // namespace |
0 commit comments