@@ -18,25 +18,20 @@ KERNEL(col_to_im_opt)(const __global INPUT0_TYPE* input,
18
18
const uint pads_begin [2 ] = {PAD_BEGIN_SIZE_X , PAD_BEGIN_SIZE_Y };
19
19
const uint pads_end [2 ] = {PAD_END_SIZE_X , PAD_END_SIZE_Y };
20
20
21
- const uint num_blocks = INPUT0_SIZE_Y ;
22
- const uint kernel_product = KERNEL_SIZE_X * KERNEL_SIZE_Y ;
23
- const uint channels_per_column = INPUT0_FEATURE_NUM ;
24
- const uint channel_count = channels_per_column / kernel_product ;
25
-
26
21
const uint batch_count = INPUT0_BATCH_NUM ;
27
22
const uint batch = get_global_id (2 );
28
23
29
- // printf("batch(%d) num_blocks(%u) output(%u, %u), channel (%u, %u) original_height(%u) original_width(%u) \n",
24
+ // printf("batch(%d) num_blocks(%u) output(%u, %u), kernel (%u, %u) original_height(%u) original_width(%u) \n",
30
25
// batch, num_blocks, (uint)OUT_SIZE_X, (uint)OUT_SIZE_Y, (uint)KERNEL_SIZE_X, (uint)KERNEL_SIZE_Y, ORIG_HEIGHT, ORIG_WIDTH);
31
26
32
27
// for (uint batch = 0; batch < batch_count; ++batch) {
33
- for (uint column = 0 ; column < channels_per_column ; ++ column ) {
28
+ for (uint column = 0 ; column < NUM_ELEMENTS_FOR_BLOCK ; ++ column ) {
34
29
const uint width_offset = column % kernel_size [1 ];
35
30
const uint height_offset = (column / kernel_size [1 ]) % kernel_size [0 ];
36
- const uint channel_idx = column / kernel_product ;
31
+ const uint channel_idx = column / KERNEL_PRODUCT ;
37
32
38
- const uint out_idx = (batch * channel_count + channel_idx ) * output_size [0 ];
39
- const uint height_idx = (batch * channels_per_column + column ) * ORIG_HEIGHT ;
33
+ const uint out_idx = (batch * NUM_CHANNELS + channel_idx ) * output_size [0 ];
34
+ const uint height_idx = (batch * NUM_ELEMENTS_FOR_BLOCK + column ) * ORIG_HEIGHT ;
40
35
41
36
for (uint column_height_idx = 0 ; column_height_idx < ORIG_HEIGHT ; ++ column_height_idx ) {
42
37
// get_image_dimension_index(column_height_idx, height_offset, 0);
0 commit comments