Skip to content

Commit 997f082

Browse files
committed
Hardcode input/output dimensions of local laplacian
1 parent 56bff19 commit 997f082

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

apps/harris/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ add_halide_generator(harris.generator SOURCES harris_generator.cpp)
1616

1717
set(_harris_autoscheduler_params autoscheduler.experimental_gpu_schedule=1)
1818

19-
if(Halide_TARGET MATCHES "opencl|metal")
19+
if(Halide_TARGET MATCHES "opencl|metal|cuda|vulkan")
2020
# Set last_level_cache per GPU block to an extremely small value. This
2121
# eliminates all `.compute_at` in the generated schedules, which in turn
2222
# eliminates all GPU shared memory allocations.

apps/local_laplacian/local_laplacian_generator.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,19 @@ class LocalLaplacian : public Halide::Generator<LocalLaplacian> {
9797
// Provide estimates on the pipeline output
9898
output.set_estimates({{0, 1536}, {0, 2560}, {0, 3}});
9999

100+
// Hardcode the input and output dimensions to suppress the OpenCL/Metal
101+
// launch failure:
102+
//
103+
// OpenCL error: CL_INVALID_WORK_GROUP_SIZE clEnqueueNDRangeKernel
104+
// failed
105+
input.dim(0).set_bounds(0, 1536).set_stride(1);
106+
input.dim(1).set_bounds(0, 2560).set_stride(1536);
107+
input.dim(2).set_bounds(0, 3).set_stride(1536 * 2560);
108+
109+
output.dim(0).set_bounds(0, 1536).set_stride(1);
110+
output.dim(1).set_bounds(0, 2560).set_stride(1536);
111+
output.dim(2).set_bounds(0, 3).set_stride(1536 * 2560);
112+
100113
/* THE SCHEDULE */
101114
if (using_autoscheduler()) {
102115
// Nothing.

0 commit comments

Comments
 (0)