Skip to content

Commit 0ec4904

Browse files
committed
Explicitly skipping local_laplacian tests for GPU targets
1 parent 997f082 commit 0ec4904

File tree

2 files changed

+54
-9
lines changed

2 files changed

+54
-9
lines changed

apps/local_laplacian/CMakeLists.txt

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,12 @@ add_halide_generator(local_laplacian.generator
1818

1919
set(_local_laplacian_autoscheduler_params autoscheduler.experimental_gpu_schedule=1)
2020

21-
if(Halide_TARGET MATCHES "cuda")
21+
if(Halide_TARGET MATCHES "cuda|metal|opencl|vulkan")
2222
# Last level cache size estimate of the Nvidia GPU on the Buildbot. Hand
2323
# tuned to pass the Builbot tests.
2424
list(APPEND _local_laplacian_autoscheduler_params
2525
autoscheduler.last_level_cache_size=10000
2626
)
27-
elseif(Halide_TARGET MATCHES "metal|opencl|vulkan")
28-
# The pipeline is shared GPU memory bounded. Limit the parallelism to
29-
# minimal value (=32) to cap the shared GPU memory size.
30-
list(APPEND _local_laplacian_autoscheduler_params
31-
autoscheduler.last_level_cache_size=1000
32-
autoscheduler.parallelism=32
33-
)
3427
endif()
3528

3629
# Filters
@@ -58,5 +51,9 @@ if (EXISTS ${IMAGE})
5851
set_tests_properties(local_laplacian_process PROPERTIES
5952
LABELS local_laplacian
6053
PASS_REGULAR_EXPRESSION "Success!"
61-
SKIP_REGULAR_EXPRESSION "\\[SKIP\\]")
54+
SKIP_REGULAR_EXPRESSION "\\[SKIP\\]"
55+
# Pass in the keyword "metal" etc to skip the test
56+
# explicitly. Buildbot can print a nice test report
57+
# for all skipped tests.
58+
ENVIRONMENT "HL_TARGET=${Halide_TARGET}")
6259
endif ()

apps/local_laplacian/process.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include <chrono>
22
#include <cstdio>
3+
#include <cstdlib>
4+
#include <regex>
35

46
#include "local_laplacian.h"
57
#ifndef NO_AUTO_SCHEDULE
@@ -13,13 +15,59 @@
1315
using namespace Halide::Runtime;
1416
using namespace Halide::Tools;
1517

18+
namespace {
19+
20+
enum DeviceState {
21+
IS_CUDA,
22+
NOT_CUDA,
23+
ENV_VARIABLE_ABSENT,
24+
};
25+
DeviceState ensure_cuda_device() {
26+
const auto hl_target = std::getenv("HL_TARGET");
27+
if (hl_target == nullptr) {
28+
printf("Warning: Environment variable HL_TARGET not specified. "
29+
"Proceeding to the tests...\n");
30+
return ENV_VARIABLE_ABSENT;
31+
}
32+
33+
if (std::regex_search(hl_target, std::regex{"cuda|metal|vulkan|opencl"})) {
34+
// note(antonysigma): Error messages if we don't skip the test:
35+
//
36+
// OpenCL error: CL_INVALID_WORK_GROUP_SIZE clEnqueueNDRangeKernel
37+
// failed
38+
//
39+
// 2025-07-17 17:24:32.170 local_laplacian_process[63513:6587844] Metal
40+
// API Validation Enabled -[MTLDebugComputeCommandEncoder
41+
// _validateThreadsPerThreadgroup:]:1266: failed assertion
42+
// `(threadsPerThreadgroup.width(62) * threadsPerThreadgroup.height(32)
43+
// * threadsPerThreadgroup.depth(1))(1984) must be <= 1024. (device
44+
// threadgroup size limit)'
45+
//
46+
// Vulkan: vkQueueWaitIdle returned VK_ERROR_DEVICE_LOST
47+
printf("[SKIP] Mullapudi2016 experimental GPU schedules "
48+
"over-estimates the gpu_threads where thread count per block "
49+
"is not an multiple of 32. Target = %s. Skipping...\n",
50+
hl_target);
51+
52+
return NOT_CUDA;
53+
}
54+
55+
return IS_CUDA;
56+
}
57+
58+
} // namespace
59+
1660
int main(int argc, char **argv) {
1761
if (argc < 7) {
1862
printf("Usage: ./process input.png levels alpha beta timing_iterations output.png\n"
1963
"e.g.: ./process input.png 8 1 1 10 output.png\n");
2064
return 1;
2165
}
2266

67+
if (ensure_cuda_device() == NOT_CUDA) {
68+
return 0;
69+
}
70+
2371
// Input may be a PNG8
2472
Buffer<uint16_t, 3> input = load_and_convert_image(argv[1]);
2573

0 commit comments

Comments
 (0)