Skip to content

Commit

Permalink
enabled performance/ tests to be used without CUDA
Browse files Browse the repository at this point in the history
  • Loading branch information
wnbell committed Feb 17, 2012
1 parent 05f0598 commit 1e189c1
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 28 deletions.
12 changes: 10 additions & 2 deletions performance/build/perftest.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#include <unittest/unittest.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include <build/timer.h>
#include <string>
#include <algorithm>


//#include <cuda_runtime.h>
//#include <cuda.h>

#define RECORD_RESULT(name, value, units) { std::cout << " <result name=\"" << name << "\" value=\"" << value << "\" units=\"" << units << "\"/>" << std::endl; }
#define RECORD_TIME() RECORD_RESULT("Time", best_time, "seconds")
Expand Down Expand Up @@ -40,6 +44,7 @@

inline void RECORD_PLATFORM_INFO(void)
{
#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
int deviceCount;
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0){
Expand Down Expand Up @@ -76,6 +81,7 @@ inline void RECORD_PLATFORM_INFO(void)
std::cout << " <property name=\"__TIME__\" value=\"" << __TIME__ << "\"/>" << std::endl;
std::cout << " </compilation>" << std::endl;
std::cout << "</platform>" << std::endl;
#endif
}


Expand All @@ -92,8 +98,10 @@ inline void PROCESS_ARGUMENTS(int argc, char **argv)
exit(-1);
}

#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
int device_index = atoi(argv[i]);
cudaSetDevice(device_index);
#endif
}
}
}
Expand Down
40 changes: 14 additions & 26 deletions performance/build/test_function_template.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -9,78 +9,66 @@ void $FUNCTION(void)
$INITIALIZE
/************* END INITIALIZATION SECTION *************/

cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventCreate(&end);

float warmup_time;
double warmup_time;
{
cudaEventRecord(start, 0);

timer t;
/************ BEGIN TIMING SECTION ************/
$TIME
/************* END TIMING SECTION *************/

cudaEventRecord(end, 0);
cudaEventSynchronize(end);

float ms_elapsed;
cudaEventElapsedTime(&ms_elapsed, start, end);
warmup_time = ms_elapsed / float(1000);
warmup_time = t.elapsed();
}

// only verbose
//std::cout << "warmup_time: " << warmup_time << " seconds" << std::endl;

static const size_t NUM_TRIALS = 5;
static const size_t MAX_ITERATIONS = 1000;
static const float MAX_TEST_TIME = 0.5; //TODO allow to be set by user
static const double MAX_TEST_TIME = 0.5; //TODO allow to be set by user

size_t NUM_ITERATIONS;
if (warmup_time == 0)
NUM_ITERATIONS = MAX_ITERATIONS;
else
NUM_ITERATIONS = std::min(MAX_ITERATIONS, std::max( (size_t) 1, (size_t) (MAX_TEST_TIME / warmup_time)));

float trial_times[NUM_TRIALS];
double trial_times[NUM_TRIALS];

for(size_t trial = 0; trial < NUM_TRIALS; trial++){
cudaEventRecord(start, 0);
for(size_t trial = 0; trial < NUM_TRIALS; trial++)
{
timer t;
for(size_t i = 0; i < NUM_ITERATIONS; i++){

/************ BEGIN TIMING SECTION ************/
$TIME
/************* END TIMING SECTION *************/

}
cudaEventRecord(end, 0);
cudaEventSynchronize(end);

float ms_elapsed;
cudaEventElapsedTime(&ms_elapsed, start, end);
trial_times[trial] = ms_elapsed / (float(1000) * float(NUM_ITERATIONS));
trial_times[trial] = t.elapsed() / double(NUM_ITERATIONS);
}

// only verbose
//for(size_t trial = 0; trial < NUM_TRIALS; trial++){
// std::cout << "trial[" << trial << "] : " << trial_times[trial] << " seconds\n";
//}

float best_time = *std::min_element(trial_times, trial_times + NUM_TRIALS);
double best_time = *std::min_element(trial_times, trial_times + NUM_TRIALS);

/************ BEGIN FINALIZE SECTION ************/
$FINALIZE
/************* END FINALIZE SECTION *************/
cudaEventDestroy(start);
cudaEventDestroy(end);


#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
cudaError_t error = cudaGetLastError();
if(error){
RECORD_TEST_FAILURE(cudaGetErrorString(error));
} else {
RECORD_TEST_SUCCESS();
}
#else
RECORD_TEST_SUCCESS();
#endif

} // end try
catch (std::bad_alloc) {
Expand Down
148 changes: 148 additions & 0 deletions performance/build/timer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* Copyright 2008-2009 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

// A simple timer class

#ifdef __CUDACC__

// use CUDA's high-resolution timers when possible
#include <cuda_runtime_api.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#include <string>

void cuda_safe_call(cudaError_t error, const std::string& message = "")
{
if(error)
throw thrust::system_error(error, thrust::cuda_category(), message);
}

struct timer
{
cudaEvent_t start;
cudaEvent_t end;

timer(void)
{
cuda_safe_call(cudaEventCreate(&start));
cuda_safe_call(cudaEventCreate(&end));
restart();
}

~timer(void)
{
cuda_safe_call(cudaEventDestroy(start));
cuda_safe_call(cudaEventDestroy(end));
}

void restart(void)
{
cuda_safe_call(cudaEventRecord(start, 0));
}

double elapsed(void)
{
cuda_safe_call(cudaEventRecord(end, 0));
cuda_safe_call(cudaEventSynchronize(end));

float ms_elapsed;
cuda_safe_call(cudaEventElapsedTime(&ms_elapsed, start, end));
return ms_elapsed / 1e3;
}

double epsilon(void)
{
return 0.5e-6;
}
};

#elif defined(__linux__)

#include <sys/time.h>

struct timer
{
timeval start;
timeval end;

timer(void)
{
restart();
}

~timer(void)
{
}

void restart(void)
{
gettimeofday(&start, NULL);
}

double elapsed(void)
{
gettimeofday(&end, NULL);

return static_cast<double>(end.tv_sec - start.tv_sec) + 1e-6 * static_cast<double>((int)end.tv_usec - (int)start.tv_usec);
}

double epsilon(void)
{
return 0.5e-6;
}
};

#else

// fallback to clock()
#include <ctime>

struct timer
{
clock_t start;
clock_t end;

timer(void)
{
restart();
}

~timer(void)
{
}

void restart(void)
{
start = clock();
}

double elapsed(void)
{
end = clock();

return static_cast<double>(end - start) / static_cast<double>(CLOCKS_PER_SEC);
}

double epsilon(void)
{
return 1.0 / static_cast<double>(CLOCKS_PER_SEC);
}
};

#endif

0 comments on commit 1e189c1

Please sign in to comment.