@@ -163,14 +163,17 @@ int main(int argc, char **argv) {
163
163
}
164
164
printf (" Using kernel %d\n " , kernel_num);
165
165
166
- // set up block sizes
166
+ // first check the correctness of the kernel
167
+ encoder_backward_cpu (dwte, dwpe, dout, inp, B, T, C);
168
+
169
+ // time the kernel at different block sizes
167
170
int block_sizes[] = {32 , 64 , 128 , 256 , 512 , 1024 };
168
171
169
- // first check the correctness of the kernel
170
172
for (int j = 0 ; j < sizeof (block_sizes) / sizeof (int ); j++) {
171
173
int block_size = block_sizes[j];
174
+ cudaCheck (cudaMemset (d_dwte, 0 , V * C * sizeof (float )));
175
+ cudaCheck (cudaMemset (d_dwpe, 0 , T * C * sizeof (float )));
172
176
printf (" Checking block size %d.\n " , block_size);
173
- encoder_backward_cpu (dwte, dwpe, dout, inp, B, T, C);
174
177
encoder_backward (kernel_num, d_dwte, d_dwpe, d_dout, d_inp, B, T, C, block_size);
175
178
validate_result (d_dwte, dwte, " dwte" , V * C, 1e-5f );
176
179
validate_result (d_dwpe, dwpe, " dwpe" , T * C, 1e-5f );
0 commit comments