|
1 | 1 | #include <chrono>
|
2 | 2 | #include <cstdio>
|
| 3 | +#include <cstdlib> |
| 4 | +#include <regex> |
3 | 5 |
|
4 | 6 | #include "local_laplacian.h"
|
5 | 7 | #ifndef NO_AUTO_SCHEDULE
|
|
13 | 15 | using namespace Halide::Runtime;
|
14 | 16 | using namespace Halide::Tools;
|
15 | 17 |
|
| 18 | +namespace { |
| 19 | + |
| 20 | +enum DeviceState { |
| 21 | + IS_CUDA, |
| 22 | + NOT_CUDA, |
| 23 | + ENV_VARIABLE_ABSENT, |
| 24 | +}; |
| 25 | +DeviceState ensure_cuda_device() { |
| 26 | + const auto hl_target = std::getenv("HL_TARGET"); |
| 27 | + if (hl_target == nullptr) { |
| 28 | + printf("Warning: Environment variable HL_TARGET not specified. " |
| 29 | + "Proceeding to the tests...\n"); |
| 30 | + return ENV_VARIABLE_ABSENT; |
| 31 | + } |
| 32 | + |
| 33 | + if (std::regex_search(hl_target, std::regex{"cuda|metal|vulkan|opencl"})) { |
| 34 | + // note(antonysigma): Error messages if we don't skip the test: |
| 35 | + // |
| 36 | + // OpenCL error: CL_INVALID_WORK_GROUP_SIZE clEnqueueNDRangeKernel |
| 37 | + // failed |
| 38 | + // |
| 39 | + // 2025-07-17 17:24:32.170 local_laplacian_process[63513:6587844] Metal |
| 40 | + // API Validation Enabled -[MTLDebugComputeCommandEncoder |
| 41 | + // _validateThreadsPerThreadgroup:]:1266: failed assertion |
| 42 | + // `(threadsPerThreadgroup.width(62) * threadsPerThreadgroup.height(32) |
| 43 | + // * threadsPerThreadgroup.depth(1))(1984) must be <= 1024. (device |
| 44 | + // threadgroup size limit)' |
| 45 | + // |
| 46 | + // Vulkan: vkQueueWaitIdle returned VK_ERROR_DEVICE_LOST |
| 47 | + printf("[SKIP] Mullapudi2016 experimental GPU schedules " |
| 48 | + "generates the gpu_threads where thread count per block " |
| 49 | + "is not an multiple of 32. Target = %s. Skipping...\n", |
| 50 | + hl_target); |
| 51 | + |
| 52 | + return NOT_CUDA; |
| 53 | + } |
| 54 | + |
| 55 | + return IS_CUDA; |
| 56 | +} |
| 57 | + |
| 58 | +} // namespace |
| 59 | + |
16 | 60 | int main(int argc, char **argv) {
|
17 | 61 | if (argc < 7) {
|
18 | 62 | printf("Usage: ./process input.png levels alpha beta timing_iterations output.png\n"
|
19 | 63 | "e.g.: ./process input.png 8 1 1 10 output.png\n");
|
20 | 64 | return 1;
|
21 | 65 | }
|
22 | 66 |
|
| 67 | + if (ensure_cuda_device() == NOT_CUDA) { |
| 68 | + return 0; |
| 69 | + } |
| 70 | + |
23 | 71 | // Input may be a PNG8
|
24 | 72 | Buffer<uint16_t, 3> input = load_and_convert_image(argv[1]);
|
25 | 73 |
|
|
0 commit comments