Skip to content

Commit

Permalink
Merge pull request #21 from CNugteren/development
Browse files Browse the repository at this point in the history
Minor bug fixes and improved error reporting
  • Loading branch information
CNugteren committed May 22, 2015
2 parents be163db + b29d0e3 commit 0f21d60
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 75 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@

Version 1.6.1
- Fixed a couple of issues related to exceptions
- Improved reporting of failed runs

Version 1.6.0
- Much cleaner API due to Pimpl idiom: only cltune.h header is now required
- Replaced Khronos' cl.hpp with a custom C++11 version tailored for CLTune
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ cmake_minimum_required(VERSION 2.8)
project("cltune" CXX)
set(cltune_VERSION_MAJOR 1)
set(cltune_VERSION_MINOR 6)
set(cltune_VERSION_PATCH 0)
set(cltune_VERSION_PATCH 1)

# Options
option(SAMPLES "Enable compilation of sample programs" ON)
Expand Down
2 changes: 1 addition & 1 deletion include/cltune.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class Tuner {

// Functions to add kernel-arguments for input buffers, output buffers, and scalars. Make sure to
// call these in the order in which the arguments appear in the OpenCL kernel.
template <typename T> void AddArgumentInput(std::vector<T> &source);
template <typename T> void AddArgumentInput(const std::vector<T> &source);
template <typename T> void AddArgumentOutput(const std::vector<T> &source);
template <typename T> void AddArgumentScalar(const T argument);

Expand Down
44 changes: 28 additions & 16 deletions include/internal/clpp11.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,25 +46,30 @@
#endif

namespace cltune {

// =================================================================================================

// Base class for any object
class Object {
protected:

// Error handling
void Error(const std::string &message) {
[[noreturn]] void Error(const std::string &message) {
throw std::runtime_error("Internal OpenCL error: "+message);
}
[[noreturn]] void Error(const cl_int status) {
throw std::runtime_error("Internal OpenCL error with status: "+std::to_string(status));
}
};

// =================================================================================================

// Base class for objects which require memory management
class ObjectWithState: public Object {

};

// =================================================================================================

// C++11 version of cl_event
class Event: public Object {
public:
Expand Down Expand Up @@ -114,12 +119,12 @@ class Platform: public Object {
Platform(const size_t platform_id) {
auto num_platforms = cl_uint{0};
auto status = clGetPlatformIDs(0, nullptr, &num_platforms);
if (status != CL_SUCCESS) { Error("status "+status); }
if (status != CL_SUCCESS) { Error(status); }
if (num_platforms == 0) { Error("no platforms found"); }
auto platforms = std::vector<cl_platform_id>(num_platforms);
status = clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
if (status != CL_SUCCESS) { Error("status "+status); }
if (platform_id >= num_platforms) { Error("invalid platform ID "+platform_id); }
if (status != CL_SUCCESS) { Error(status); }
if (platform_id >= num_platforms) { Error("invalid platform ID "+std::to_string(platform_id)); }
platform_ = platforms[platform_id];
}

Expand All @@ -143,12 +148,12 @@ class Device: public Object {
Device(const Platform &platform, const cl_device_type type, const size_t device_id) {
auto num_devices = cl_uint{0};
auto status = clGetDeviceIDs(platform(), type, 0, nullptr, &num_devices);
if (status != CL_SUCCESS) { Error("status "+status); }
if (status != CL_SUCCESS) { Error(status); }
if (num_devices == 0) { Error("no devices found"); }
auto devices = std::vector<cl_device_id>(num_devices);
status = clGetDeviceIDs(platform(), type, num_devices, devices.data(), nullptr);
if (status != CL_SUCCESS) { Error("status "+status); }
if (device_id >= num_devices) { Error("invalid device ID "+device_id); }
if (status != CL_SUCCESS) { Error(status); }
if (device_id >= num_devices) { Error("invalid device ID "+std::to_string(device_id)); }
device_ = devices[device_id];
}

Expand Down Expand Up @@ -229,7 +234,7 @@ class Context: public ObjectWithState {
auto status = CL_SUCCESS;
const cl_device_id dev = device();
context_ = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &status);
if (status != CL_SUCCESS) { Error("status "+status); }
if (status != CL_SUCCESS) { Error(status); }
}
~Context() {
clReleaseContext(context_);
Expand Down Expand Up @@ -268,7 +273,7 @@ class Program: public ObjectWithState {
source_ptr_ = source_.data();
auto status = CL_SUCCESS;
program_ = clCreateProgramWithSource(context(), 1, &source_ptr_, &length_, &status);
if (status != CL_SUCCESS) { Error("status "+status); }
if (status != CL_SUCCESS) { Error(status); }
}
~Program() {
clReleaseProgram(program_);
Expand Down Expand Up @@ -329,7 +334,7 @@ class Kernel: public ObjectWithState {
Kernel(const Program &program, const std::string &name) {
auto status = CL_SUCCESS;
kernel_ = clCreateKernel(program(), name.c_str(), &status);
if (status != CL_SUCCESS) { Error("status "+status); }
if (status != CL_SUCCESS) { Error(status); }
}
~Kernel() {
clReleaseKernel(kernel_);
Expand Down Expand Up @@ -381,7 +386,7 @@ class CommandQueue: public ObjectWithState {
CommandQueue(const Context &context, const Device &device) {
auto status = CL_SUCCESS;
queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
if (status != CL_SUCCESS) { Error("status "+status); }
if (status != CL_SUCCESS) { Error(status); }
}
~CommandQueue() {
clReleaseCommandQueue(queue_);
Expand Down Expand Up @@ -411,6 +416,13 @@ class CommandQueue: public ObjectWithState {
clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, bytes, &result, nullptr);
return Context(result);
}
Device GetDevice() const {
auto bytes = size_t{0};
clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, 0, nullptr, &bytes);
cl_device_id result;
clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, bytes, &result, nullptr);
return Device(result);
}
cl_int Finish() {
return clFinish(queue_);
}
Expand All @@ -437,7 +449,7 @@ class Buffer: public ObjectWithState {
Buffer(const Context &context, const cl_mem_flags flags, const size_t bytes) {
auto status = CL_SUCCESS;
buffer_ = clCreateBuffer(context(), flags, bytes, nullptr, &status);
if (status != CL_SUCCESS) { Error("status "+status); }
if (status != CL_SUCCESS) { Error(status); }
}
~Buffer() {
clReleaseMemObject(buffer_);
Expand All @@ -464,12 +476,12 @@ class Buffer: public ObjectWithState {
return ReadBuffer(queue, bytes, host.data());
}
template <typename T>
cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, T* host) {
cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const T* host) {
return clEnqueueWriteBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr);
}
template <typename T>
cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, std::vector<T> &host) {
return WriteBuffer(queue, bytes, host.data());
cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const std::vector<T> &host) {
return WriteBuffer(queue, bytes, &host[0]);
}

// Accessors to the private data-member
Expand Down
8 changes: 8 additions & 0 deletions include/internal/tuner_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <vector> // std::vector
#include <memory> // std::shared_ptr
#include <complex> // std::complex
#include <stdexcept> // std::runtime_error

namespace cltune {
// =================================================================================================
Expand Down Expand Up @@ -91,6 +92,13 @@ class TunerImpl {
KernelInfo::Configuration configuration;
};

// OpenCL exception with status printing
class OpenCLException: public std::runtime_error {
public:
OpenCLException(const std::string &message, int status):
std::runtime_error(message + std::to_string(status)) {}
};

// Initialize either with platform 0 and device 0 or with a custom platform/device
explicit TunerImpl();
explicit TunerImpl(size_t platform_id, size_t device_id);
Expand Down
16 changes: 8 additions & 8 deletions src/cltune.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ void Tuner::SetLocalMemoryUsage(const size_t id, LocalMemoryFunction amount,
// Creates a new buffer of type Memory (containing both host and device data) based on a source
// vector of data. Then, upload it to the device and store the argument in a list.
template <typename T>
void Tuner::AddArgumentInput(std::vector<T> &source) {
void Tuner::AddArgumentInput(const std::vector<T> &source) {
auto device_buffer = Buffer(pimpl->context(), CL_MEM_READ_ONLY, source.size()*sizeof(T));
auto status = device_buffer.WriteBuffer(pimpl->queue(), source.size()*sizeof(T), source);
if (status != CL_SUCCESS) { throw std::runtime_error("Write buffer error: " + status); }
Expand All @@ -171,12 +171,12 @@ void Tuner::AddArgumentInput(std::vector<T> &source) {
}

// Compiles the function for various data-types
template void Tuner::AddArgumentInput<int>(std::vector<int>&);
template void Tuner::AddArgumentInput<size_t>(std::vector<size_t>&);
template void Tuner::AddArgumentInput<float>(std::vector<float>&);
template void Tuner::AddArgumentInput<double>(std::vector<double>&);
template void Tuner::AddArgumentInput<float2>(std::vector<float2>&);
template void Tuner::AddArgumentInput<double2>(std::vector<double2>&);
template void Tuner::AddArgumentInput<int>(const std::vector<int>&);
template void Tuner::AddArgumentInput<size_t>(const std::vector<size_t>&);
template void Tuner::AddArgumentInput<float>(const std::vector<float>&);
template void Tuner::AddArgumentInput<double>(const std::vector<double>&);
template void Tuner::AddArgumentInput<float2>(const std::vector<float2>&);
template void Tuner::AddArgumentInput<double2>(const std::vector<double2>&);

// Similar to the above function, but now marked as output buffer. Output buffers are special in the
// sense that they will be checked in the verification process.
Expand Down Expand Up @@ -288,7 +288,7 @@ double Tuner::PrintToScreen() const {
// Prints all valid results and the one with the lowest execution time
pimpl->PrintHeader("Printing results to stdout");
for (auto &tuning_result: pimpl->tuning_results_) {
if (tuning_result.status) {
if (tuning_result.status && tuning_result.time != std::numeric_limits<double>::max()) {
pimpl->PrintResult(stdout, tuning_result, pimpl->kMessageResult);
}
}
Expand Down
102 changes: 53 additions & 49 deletions src/tuner_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ void TunerImpl::Tune() {
if (!tuning_result.status) {
PrintResult(stdout, tuning_result, kMessageWarning);
}
if (tuning_result.time == std::numeric_limits<double>::max()) {
tuning_result.time = 0.0;
PrintResult(stdout, tuning_result, kMessageFailure);
tuning_result.time = std::numeric_limits<double>::max();
}
}

// Prints a log of the searching process. This is disabled per default, but can be enabled
Expand Down Expand Up @@ -220,48 +225,48 @@ TunerImpl::TunerResult TunerImpl::RunKernel(const std::string &source, const Ker
auto processed_source = std::regex_replace(source, string_literal_start, "");
processed_source = std::regex_replace(processed_source, string_literal_end, "");

// Compiles the kernel and prints the compiler errors/warnings
auto status = CL_SUCCESS;
auto program = Program(context_, processed_source);
status = program.Build(device_, "");
if (status == CL_BUILD_PROGRAM_FAILURE) {
auto message = program.GetBuildInfo(device_);
fprintf(stdout, "OpenCL compiler error/warning: %s\n", message.c_str());
throw std::runtime_error("OpenCL compiler error/warning occurred ^^\n");
}
if (status != CL_SUCCESS) { throw std::runtime_error("Program build error: " + status); }

// Sets the output buffer(s) to zero
for (auto &output: arguments_output_) {
switch (output.type) {
case MemType::kInt: ResetMemArgument<int>(output); break;
case MemType::kFloat: ResetMemArgument<float>(output); break;
case MemType::kDouble: ResetMemArgument<double>(output); break;
case MemType::kFloat2: ResetMemArgument<float2>(output); break;
case MemType::kDouble2: ResetMemArgument<double2>(output); break;
default: throw std::runtime_error("Unsupported reference output data-type");
}
}

// Sets the kernel and its arguments
auto tune_kernel = Kernel(program, kernel.name());
if (status != CL_SUCCESS) { throw std::runtime_error("Kernel creation error: " + status); }
for (auto &i: arguments_input_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.index), i.buffer); }
for (auto &i: arguments_output_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.index), i.buffer); }
for (auto &i: arguments_int_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_size_t_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_float_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_double_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_float2_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_double2_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }

// Sets the global and local thread-sizes
auto global = kernel.global();
auto local = kernel.local();

// In case of an exception, skip this run
try {

// Compiles the kernel and prints the compiler errors/warnings
auto status = CL_SUCCESS;
auto program = Program(context_, processed_source);
status = program.Build(device_, "");
if (status == CL_BUILD_PROGRAM_FAILURE) {
auto message = program.GetBuildInfo(device_);
fprintf(stdout, "OpenCL compiler error/warning: %s\n", message.c_str());
throw std::runtime_error("OpenCL compiler error/warning occurred ^^\n");
}
if (status != CL_SUCCESS) { throw OpenCLException("Program build error: ", status); }

// Sets the output buffer(s) to zero
for (auto &output: arguments_output_) {
switch (output.type) {
case MemType::kInt: ResetMemArgument<int>(output); break;
case MemType::kFloat: ResetMemArgument<float>(output); break;
case MemType::kDouble: ResetMemArgument<double>(output); break;
case MemType::kFloat2: ResetMemArgument<float2>(output); break;
case MemType::kDouble2: ResetMemArgument<double2>(output); break;
default: throw std::runtime_error("Unsupported reference output data-type");
}
}

// Sets the kernel and its arguments
auto tune_kernel = Kernel(program, kernel.name());
if (status != CL_SUCCESS) { throw OpenCLException("Kernel creation error: ", status); }
for (auto &i: arguments_input_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.index), i.buffer); }
for (auto &i: arguments_output_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.index), i.buffer); }
for (auto &i: arguments_int_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_size_t_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_float_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_double_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_float2_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }
for (auto &i: arguments_double2_) { tune_kernel.SetArgument(static_cast<cl_uint>(i.first), i.second); }

// Sets the global and local thread-sizes
auto global = kernel.global();
auto local = kernel.local();

// Verifies the local memory usage of the kernel
auto local_mem_usage = tune_kernel.LocalMemUsage(device_);
if (!device_.IsLocalMemoryValid(local_mem_usage)) {
Expand All @@ -270,22 +275,19 @@ TunerImpl::TunerResult TunerImpl::RunKernel(const std::string &source, const Ker

// Prepares the kernel
status = queue_.Finish();
if (status != CL_SUCCESS) { throw std::runtime_error("Command queue error: " + status); }
if (status != CL_SUCCESS) { throw OpenCLException("Command queue error: ", status); }

// Runs the kernel (this is the timed part)
fprintf(stdout, "%s Running %s\n", kMessageRun.c_str(), kernel.name().c_str());
auto events = std::vector<Event>(kNumRuns);
for (auto t=0; t<kNumRuns; ++t) {
status = queue_.EnqueueKernel(tune_kernel, global, local, events[t]);
if (status != CL_SUCCESS) { throw std::runtime_error("Kernel launch error: " + status); }
if (status != CL_SUCCESS) { throw OpenCLException("Kernel launch error: ", status); }
status = events[t].Wait();
if (status != CL_SUCCESS) {
fprintf(stdout, "%s Kernel %s failed\n", kMessageFailure.c_str(), kernel.name().c_str());
throw std::runtime_error("Kernel error: " + status);
}
if (status != CL_SUCCESS) { throw OpenCLException("Kernel error: ", status); }
}
status = queue_.Finish();
if (status != CL_SUCCESS) { throw std::runtime_error("Command queue error: " + status); }
if (status != CL_SUCCESS) { throw OpenCLException("Command queue error: ", status); }

// Collects the timing information
auto elapsed_time = std::numeric_limits<double>::max();
Expand All @@ -309,6 +311,8 @@ TunerImpl::TunerResult TunerImpl::RunKernel(const std::string &source, const Ker

// There was an exception, now return an invalid tuner results
catch(std::exception& e) {
fprintf(stdout, "%s Kernel %s failed\n", kMessageFailure.c_str(), kernel.name().c_str());
fprintf(stdout, "%s catched exception: %s\n", kMessageFailure.c_str(), e.what());
TunerResult result = {kernel.name(), std::numeric_limits<double>::max(), 0, false, {}};
return result;
}
Expand All @@ -326,7 +330,7 @@ void TunerImpl::ResetMemArgument(MemArgument &argument) {
// Copy the new array to the OpenCL buffer on the device
auto bytes = sizeof(T)*argument.size;
auto status = argument.buffer.WriteBuffer(queue_, bytes, buffer);
if (status != CL_SUCCESS) { throw std::runtime_error("Write buffer error: " + status); }
if (status != CL_SUCCESS) { throw OpenCLException("Write buffer error: ", status); }
}

// =================================================================================================
Expand All @@ -350,7 +354,7 @@ template <typename T> void TunerImpl::DownloadReference(MemArgument &device_buff
auto host_buffer = new T[device_buffer.size];
auto bytes = sizeof(T)*device_buffer.size;
auto status = device_buffer.buffer.ReadBuffer(queue_, bytes, host_buffer);
if (status != CL_SUCCESS) { throw std::runtime_error("Read buffer error: " + status); }
if (status != CL_SUCCESS) { throw OpenCLException("Read buffer error: ", status); }
reference_outputs_.push_back(host_buffer);
}

Expand Down Expand Up @@ -388,7 +392,7 @@ bool TunerImpl::DownloadAndCompare(MemArgument &device_buffer, const size_t i) {
std::vector<T> host_buffer(device_buffer.size);
auto bytes = sizeof(T)*device_buffer.size;
auto status = device_buffer.buffer.ReadBuffer(queue_, bytes, host_buffer);
if (status != CL_SUCCESS) { throw std::runtime_error("Read buffer error: " + status); }
if (status != CL_SUCCESS) { throw OpenCLException("Read buffer error: ", status); }

// Compares the results (L2 norm)
T* reference_output = (T*)reference_outputs_[i];
Expand Down

0 comments on commit 0f21d60

Please sign in to comment.