diff --git a/performance/build/perftest.h b/performance/build/perftest.h index adae08f61..852e30a53 100644 --- a/performance/build/perftest.h +++ b/performance/build/perftest.h @@ -1,7 +1,11 @@ #include -#include -#include +#include #include +#include + + +//#include +//#include #define RECORD_RESULT(name, value, units) { std::cout << " " << std::endl; } #define RECORD_TIME() RECORD_RESULT("Time", best_time, "seconds") @@ -40,6 +44,7 @@ inline void RECORD_PLATFORM_INFO(void) { +#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA int deviceCount; cudaGetDeviceCount(&deviceCount); if (deviceCount == 0){ @@ -76,6 +81,7 @@ inline void RECORD_PLATFORM_INFO(void) std::cout << " " << std::endl; std::cout << " " << std::endl; std::cout << "" << std::endl; +#endif } @@ -92,8 +98,10 @@ inline void PROCESS_ARGUMENTS(int argc, char **argv) exit(-1); } +#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA int device_index = atoi(argv[i]); cudaSetDevice(device_index); +#endif } } } diff --git a/performance/build/test_function_template.cxx b/performance/build/test_function_template.cxx index 0929816de..d86668bfb 100644 --- a/performance/build/test_function_template.cxx +++ b/performance/build/test_function_template.cxx @@ -9,24 +9,14 @@ void $FUNCTION(void) $INITIALIZE /************* END INITIALIZATION SECTION *************/ - cudaEvent_t start, end; - cudaEventCreate(&start); - cudaEventCreate(&end); - float warmup_time; + double warmup_time; { - cudaEventRecord(start, 0); - + timer t; /************ BEGIN TIMING SECTION ************/ $TIME /************* END TIMING SECTION *************/ - - cudaEventRecord(end, 0); - cudaEventSynchronize(end); - - float ms_elapsed; - cudaEventElapsedTime(&ms_elapsed, start, end); - warmup_time = ms_elapsed / float(1000); + warmup_time = t.elapsed(); } // only verbose @@ -34,7 +24,7 @@ void $FUNCTION(void) static const size_t NUM_TRIALS = 5; static const size_t MAX_ITERATIONS = 1000; - static const float MAX_TEST_TIME = 0.5; //TODO allow to be set by user + static const double MAX_TEST_TIME = 0.5; //TODO allow to be set by user size_t NUM_ITERATIONS; if (warmup_time == 0) @@ -42,10 +32,11 @@ void $FUNCTION(void) else NUM_ITERATIONS = std::min(MAX_ITERATIONS, std::max( (size_t) 1, (size_t) (MAX_TEST_TIME / warmup_time))); - float trial_times[NUM_TRIALS]; + double trial_times[NUM_TRIALS]; - for(size_t trial = 0; trial < NUM_TRIALS; trial++){ - cudaEventRecord(start, 0); + for(size_t trial = 0; trial < NUM_TRIALS; trial++) + { + timer t; for(size_t i = 0; i < NUM_ITERATIONS; i++){ /************ BEGIN TIMING SECTION ************/ @@ -53,12 +44,8 @@ void $FUNCTION(void) /************* END TIMING SECTION *************/ } - cudaEventRecord(end, 0); - cudaEventSynchronize(end); - float ms_elapsed; - cudaEventElapsedTime(&ms_elapsed, start, end); - trial_times[trial] = ms_elapsed / (float(1000) * float(NUM_ITERATIONS)); + trial_times[trial] = t.elapsed() / double(NUM_ITERATIONS); } // only verbose @@ -66,21 +53,22 @@ void $FUNCTION(void) // std::cout << "trial[" << trial << "] : " << trial_times[trial] << " seconds\n"; //} - float best_time = *std::min_element(trial_times, trial_times + NUM_TRIALS); + double best_time = *std::min_element(trial_times, trial_times + NUM_TRIALS); /************ BEGIN FINALIZE SECTION ************/ $FINALIZE /************* END FINALIZE SECTION *************/ - cudaEventDestroy(start); - cudaEventDestroy(end); - +#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA cudaError_t error = cudaGetLastError(); if(error){ RECORD_TEST_FAILURE(cudaGetErrorString(error)); } else { RECORD_TEST_SUCCESS(); } +#else + RECORD_TEST_SUCCESS(); +#endif } // end try catch (std::bad_alloc) { diff --git a/performance/build/timer.h b/performance/build/timer.h new file mode 100644 index 000000000..7690ff765 --- /dev/null +++ b/performance/build/timer.h @@ -0,0 +1,148 @@ +/* + * Copyright 2008-2009 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// A simple timer class + +#ifdef __CUDACC__ + +// use CUDA's high-resolution timers when possible +#include +#include +#include +#include + +void cuda_safe_call(cudaError_t error, const std::string& message = "") +{ + if(error) + throw thrust::system_error(error, thrust::cuda_category(), message); +} + +struct timer +{ + cudaEvent_t start; + cudaEvent_t end; + + timer(void) + { + cuda_safe_call(cudaEventCreate(&start)); + cuda_safe_call(cudaEventCreate(&end)); + restart(); + } + + ~timer(void) + { + cuda_safe_call(cudaEventDestroy(start)); + cuda_safe_call(cudaEventDestroy(end)); + } + + void restart(void) + { + cuda_safe_call(cudaEventRecord(start, 0)); + } + + double elapsed(void) + { + cuda_safe_call(cudaEventRecord(end, 0)); + cuda_safe_call(cudaEventSynchronize(end)); + + float ms_elapsed; + cuda_safe_call(cudaEventElapsedTime(&ms_elapsed, start, end)); + return ms_elapsed / 1e3; + } + + double epsilon(void) + { + return 0.5e-6; + } +}; + +#elif defined(__linux__) + +#include + +struct timer +{ + timeval start; + timeval end; + + timer(void) + { + restart(); + } + + ~timer(void) + { + } + + void restart(void) + { + gettimeofday(&start, NULL); + } + + double elapsed(void) + { + gettimeofday(&end, NULL); + + return static_cast(end.tv_sec - start.tv_sec) + 1e-6 * static_cast((int)end.tv_usec - (int)start.tv_usec); + } + + double epsilon(void) + { + return 0.5e-6; + } +}; + +#else + +// fallback to clock() +#include + +struct timer +{ + clock_t start; + clock_t end; + + timer(void) + { + restart(); + } + + ~timer(void) + { + } + + void restart(void) + { + start = clock(); + } + + double elapsed(void) + { + end = clock(); + + return static_cast(end - start) / static_cast(CLOCKS_PER_SEC); + } + + double epsilon(void) + { + return 1.0 / static_cast(CLOCKS_PER_SEC); + } +}; + +#endif +