diff --git a/CHANGELOG b/CHANGELOG index 26a632a..9b57cae 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,10 @@ +Version 1.3.2 +- Now prints OpenCL version when running on a device +- Added install targets to CMake +- Moved header files around and renamed the main include to "cltune.h" +- Catches OpenCL exceptions and skips those configurations + Version 1.3.1 - Fixed simulated annealing's random number generation - Added new FindOpenCL CMake script diff --git a/CMakeLists.txt b/CMakeLists.txt index edaf736..43ed5b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,17 +25,28 @@ # CMake project cmake_minimum_required(VERSION 2.8) project("cltune" C CXX) +set(cltune_VERSION_MAJOR 1) +set(cltune_VERSION_MINOR 3) +set(cltune_VERSION_PATCH 2) # Options -option(ENABLE_SAMPLES "Enable compilation of sample programs" ON) -option(ENABLE_TESTS "Enable compilation of the Google tests" OFF) +option(SAMPLES "Enable compilation of sample programs" ON) +option(TESTS "Enable compilation of the Google tests" OFF) + +# ================================================================================================== + +# RPATH settings +set(CMAKE_SKIP_BUILD_RPATH false) # Use, i.e. don't skip the full RPATH for the build tree +set(CMAKE_BUILD_WITH_INSTALL_RPATH false) # When building, don't use the install RPATH already +set(CMAKE_INSTALL_RPATH "") # The RPATH to be used when installing +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH false) # Don't add the automatically determined parts # ================================================================================================== # Compiler-version check if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7) - message(FATAL_ERROR "GCC version must be at least 4.7 (for C++11)") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9) + message(FATAL_ERROR "GCC version must be at least 4.9 (for full C++11 compatibility)") endif() endif() @@ -69,13 +80,26 @@ set(TUNER src/searchers/random_search.cc src/searchers/annealing.cc) -# Links the library +# Creates and links the library add_library(cltune SHARED ${TUNER}) target_link_libraries(cltune ${OPENCL_LIBRARIES}) +# Installs the library +install(TARGETS cltune DESTINATION lib) +install(FILES + include/cltune.h + include/cl.hpp + DESTINATION include) +install(FILES + include/cltune/opencl.h + include/cltune/memory.h + include/cltune/string_range.h + include/cltune/kernel_info.h + DESTINATION include/cltune) + # ================================================================================================== -# Optional: Enable compilation of sample programs -if (ENABLE_SAMPLES) +# Optional: Enables compilation of sample programs +if (SAMPLES) # Adds sample programs add_executable(sample_simple samples/simple.cc) @@ -85,10 +109,12 @@ if (ENABLE_SAMPLES) target_link_libraries(sample_gemm cltune ${OPENCL_LIBRARIES} ${OpenMP_LIBRARY}) target_link_libraries(sample_gemm_annealing cltune ${OPENCL_LIBRARIES} ${OpenMP_LIBRARY}) + # Note: these are not installed because they depend on their separate OpenCL kernel files + endif() # ================================================================================================== -# Optional: Enable compilation of the Google tests -if (ENABLE_TESTS) +# Optional: Enables compilation of the Google tests +if (TESTS) # Enables Google Test tests (source-code is shipped with the project) add_subdirectory(external/gtest-1.7.0) diff --git a/README.md b/README.md index 4a19acc..322c24d 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,11 @@ -CLTune: An automatic OpenCL kernel tuner +CLTune: Automatic OpenCL kernel tuning ================ -CLTune is a C++ library which can be used to automatically tune your OpenCL kernels. How does this -work? The only thing you'll need to provide is a tuneable kernel and a list of allowed parameters -and values. +CLTune is a C++ library which can be used to automatically tune your OpenCL kernels. The only thing you'll need to provide is a tuneable kernel and a list of allowed parameters and values. + +For example, if you would perform loop unrolling or local memory tiling through a pre-processor define, just remove the define from your kernel code, pass the kernel to CLTune and tell it what the name of your parameter(s) are and what values you want to try. CLTune will take care of the rest: it will iterate over all possible permutations, test them, and report the best combination. -For example, if you would perform loop unrolling or local memory tiling through a pre- -processor define, just remove the define from your kernel code, pass the kernel to CLTune and tell -it what the name of your parameter(s) are and what values you want to try. CLTune will take care of -the rest: it will iterate over all possible permutations, test them, and report the best -combination. Compilation ------------- @@ -18,55 +13,54 @@ Compilation CLTune can be compiled as a shared library using CMake. The pre-requisites are: * CMake version 2.8 or higher -* A C++11 compiler [_tested with icc, gcc, and clang_] -* An OpenCL library [_tested with the Apple OpenCL framework, the NVIDIA CUDA SDK, and the AMD APP - SDK_] +* A C++11 compiler, for example: + - GCC 4.9.0 or newer + - Clang 3.3 or newer + - ICC 14.0 or newer +* An OpenCL library. CLTune has been tested with: + - Apple OpenCL + - NVIDIA CUDA SDK + - AMD APP SDK -An example of an out-of-source build follows (starting from the root of the cltune folder): +An example of an out-of-source build (starting from the root of the cltune folder): mkdir build cd build cmake .. make + sudo make install + +A custom installation folder can be specified when calling CMake: + + cmake -DCMAKE_INSTALL_PREFIX=/path/to/install/directory .. -You can then link your own programs against the CLTune library. An example for a Linux-system -follows: +You can then link your own programs against the CLTune library. An example for a Linux-system: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/libcltune.so g++ example.cc -o example -L/path/to/libcltune.so -lcltune -lOpenCL + Example of using the tuner ------------- -Before we start using the tuner, we'll have to create one. The constructor takes two arguments: -the first specifying the OpenCL platform number, and the second the device ID on that platform: +Before we start using the tuner, we'll have to create one. The constructor takes two arguments: the first specifying the OpenCL platform number, and the second the device ID on that platform: cltune::Tuner my_tuner(0, 1); // Tuner on device 1 of OpenCL platform 0 -Now that we have a tuner, we can add a tuning kernel. This is done by providing the path to an -OpenCL kernel (first argument), the name of the kernel (second argument), a list of global thread -dimensions (third argument), and a list of local thread or workgroup dimensions (fourth argument). -Here is an example: +Now that we have a tuner, we can add a tuning kernel. This is done by providing the path to an OpenCL kernel (first argument), the name of the kernel (second argument), a list of global thread dimensions (third argument), and a list of local thread or workgroup dimensions (fourth argument). Here is an example: auto id = my_tuner.AddKernel("path/to/kernel.opencl", "my_kernel", {1024,512}, {16,8}); -Notice that the AddKernel function returns an integer: it is the ID of the added kernel. We'll need -this ID when we want to add tuning parameters to this kernel. Let's say that our kernel has two -pre-processor parameters named `PARAM_1` and `PARAM_2`: +Notice that the AddKernel function returns an integer: it is the ID of the added kernel. We'll need this ID when we want to add tuning parameters to this kernel. Let's say that our kernel has two pre-processor parameters named `PARAM_1` and `PARAM_2`: my_tuner.AddParameter(id, "PARAM_1", {16, 24}); my_tuner.AddParameter(id, "PARAM_2", {0, 1, 2, 3, 4}); -Now that we've added a kernel and its parameters, we can add another one if we wish. When we're -done, there are a couple of things left to be done. Let's start with adding an reference kernel. -This reference kernel can provide the tuner with the ground-truth and is optional - only when it is -provided will the tuner perform verification checks to ensure correctness. +Now that we've added a kernel and its parameters, we can add another one if we wish. When we're done, there are a couple of things left to be done. Let's start with adding an reference kernel. This reference kernel can provide the tuner with the ground-truth and is optional - only when it is provided will the tuner perform verification checks to ensure correctness. my_tuner.SetReference("path/to/reference.opencl", "my_reference", {8192}, {128}); -The tuner also needs to know which arguments the kernels take. Scalar arguments can be provided -as-is and are passed-by-value, whereas arrays have to be provided as C++ `std::vector`s. That's -right, we won't have to create OpenCL buffers, CLTune will handle that for us! Here is an example: +The tuner also needs to know which arguments the kernels take. Scalar arguments can be provided as-is and are passed-by-value, whereas arrays have to be provided as C++ `std::vector`s. That's right, we won't have to create OpenCL buffers, CLTune will handle that for us! Here is an example: auto my_variable = 900; std::vector input_vector(8192); @@ -81,17 +75,17 @@ Now that we've configured the tuner, it is time to start it and ask it to report my_tuner.Tune(); // Starts the tuner my_tuner.PrintToScreen(); // Prints the results + Other examples ------------- -Two examples are included as part of the CLTune distribution. They illustrate some more advanced -features, such as modifying the thread dimensions based on the parameters and adding user-defined -parameter constraints. The examples are compiled when providing `-ENABLE_SAMPLES=ON` to CMake -(default option). The two included examples are: +Examples are included as part of the CLTune distribution. They illustrate some more advanced features, such as modifying the thread dimensions based on the parameters and adding user-defined parameter constraints. The examples are compiled when providing `-ENABLE_SAMPLES=ON` to CMake (default option). The included examples are: * `simple.cc` providing a basic example of matrix-vector multiplication * `gemm.cc` providing a more advanced and heavily tuned implementation of matrix-matrix multiplication or SGEMM +* `gemm_annealing.cc` demonstrating an alternative search technique: simulated annealing + Development and tests ------------- @@ -104,7 +98,7 @@ licensed under the MIT license by SURFsara, (c) 2014. The contributing authors s * Cedric Nugteren CLTune is packaged with Google Test 1.7.0 and a custom test suite. The tests will be compiled when -providing the `-DENABLE_TESTS=ON` option to CMake. Running the tests goes as follows: +providing the `-TESTS=ON` option to CMake. Running the tests goes as follows: ./unit_tests diff --git a/include/tuner/tuner.h b/include/cltune.h similarity index 95% rename from include/tuner/tuner.h rename to include/cltune.h index 4dd83af..d1a0509 100644 --- a/include/tuner/tuner.h +++ b/include/cltune.h @@ -28,8 +28,8 @@ // // ================================================================================================= -#ifndef CLTUNE_TUNER_TUNER_H_ -#define CLTUNE_TUNER_TUNER_H_ +#ifndef CLTUNE_CLTUNE_H_ +#define CLTUNE_CLTUNE_H_ #include #include @@ -37,14 +37,8 @@ #include #include -// Include other classes -#include "tuner/internal/memory.h" -#include "tuner/internal/opencl.h" -#include "tuner/internal/kernel_info.h" -#include "tuner/internal/string_range.h" -#include "tuner/internal/searchers/full_search.h" -#include "tuner/internal/searchers/random_search.h" -#include "tuner/internal/searchers/annealing.h" +#include "cltune/memory.h" +#include "cltune/kernel_info.h" namespace cltune { // ================================================================================================= @@ -218,5 +212,5 @@ class Tuner { // ================================================================================================= } // namespace cltune -// CLTUNE_TUNER_TUNER_H_ +// CLTUNE_CLTUNE_H_ #endif diff --git a/include/tuner/internal/kernel_info.h b/include/cltune/kernel_info.h similarity index 96% rename from include/tuner/internal/kernel_info.h rename to include/cltune/kernel_info.h index 56697b5..19142d3 100644 --- a/include/tuner/internal/kernel_info.h +++ b/include/cltune/kernel_info.h @@ -28,8 +28,8 @@ // // ================================================================================================= -#ifndef CLBLAS_TUNER_KERNEL_INFO_H_ -#define CLBLAS_TUNER_KERNEL_INFO_H_ +#ifndef CLTUNE_KERNEL_INFO_H_ +#define CLTUNE_KERNEL_INFO_H_ #include #include @@ -37,11 +37,8 @@ #include #include -// The C++ OpenCL wrapper #include "cl.hpp" - -// Include other classes and structures -#include "tuner/internal/string_range.h" +#include "cltune/string_range.h" namespace cltune { // ================================================================================================= @@ -162,5 +159,5 @@ class KernelInfo { // ================================================================================================= } // namespace cltune -// CLBLAS_TUNER_KERNEL_INFO_H_ +// CLTUNE_KERNEL_INFO_H_ #endif diff --git a/include/tuner/internal/memory.h b/include/cltune/memory.h similarity index 94% rename from include/tuner/internal/memory.h rename to include/cltune/memory.h index 599dbb1..7d4023c 100644 --- a/include/tuner/internal/memory.h +++ b/include/cltune/memory.h @@ -27,18 +27,15 @@ // // ================================================================================================= -#ifndef CLBLAS_TUNER_MEMORY_H_ -#define CLBLAS_TUNER_MEMORY_H_ +#ifndef CLTUNE_MEMORY_H_ +#define CLTUNE_MEMORY_H_ #include #include #include #include -// The C++ OpenCL wrapper -#include "tuner/internal/opencl.h" - -#include "cl.hpp" +#include "cltune/opencl.h" namespace cltune { // ================================================================================================= @@ -82,5 +79,5 @@ class Memory { // ================================================================================================= } // namespace cltune -// CLBLAS_TUNER_MEMORY_H_ +// CLTUNE_MEMORY_H_ #endif diff --git a/include/tuner/internal/opencl.h b/include/cltune/opencl.h similarity index 95% rename from include/tuner/internal/opencl.h rename to include/cltune/opencl.h index 60d574d..13e3dc4 100644 --- a/include/tuner/internal/opencl.h +++ b/include/cltune/opencl.h @@ -27,14 +27,13 @@ // // ================================================================================================= -#ifndef CLBLAS_TUNER_OPENCL_H_ -#define CLBLAS_TUNER_OPENCL_H_ +#ifndef CLTUNE_OPENCL_H_ +#define CLTUNE_OPENCL_H_ #include #include #include -// The C++ OpenCL wrapper #include "cl.hpp" namespace cltune { @@ -43,6 +42,12 @@ namespace cltune { // See comment at top of file for a description of the class class OpenCL { public: + + // Messages printed to stdout (in colours) + static const std::string kMessageFull; + + // Types of devices to consider + const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL; // Converts an unsigned integer to a string by first casting it to a long long integer. This is // required for older compilers that do not fully implement std::to_string (part of C++11). @@ -58,9 +63,6 @@ class OpenCL { }; }; - // Types of devices to consider - const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL; - // Initializes the OpenCL platform, device, and creates a context and a queue explicit OpenCL(const size_t platform_id, const size_t device_id); @@ -97,5 +99,5 @@ class OpenCL { // ================================================================================================= } // namespace cltune -// CLBLAS_TUNER_OPENCL_H_ +// CLTUNE_OPENCL_H_ #endif diff --git a/include/tuner/internal/searcher.h b/include/cltune/searcher.h similarity index 95% rename from include/tuner/internal/searcher.h rename to include/cltune/searcher.h index f7baac8..07d6a72 100644 --- a/include/tuner/internal/searcher.h +++ b/include/cltune/searcher.h @@ -28,12 +28,12 @@ // // ================================================================================================= -#ifndef CLBLAS_TUNER_SEARCHER_H_ -#define CLBLAS_TUNER_SEARCHER_H_ +#ifndef CLTUNE_SEARCHER_H_ +#define CLTUNE_SEARCHER_H_ #include -#include "tuner/internal/kernel_info.h" +#include "cltune/kernel_info.h" namespace cltune { // ================================================================================================= @@ -71,5 +71,5 @@ class Searcher { // ================================================================================================= } // namespace cltune -// CLBLAS_TUNER_SEARCHER_H_ +// CLTUNE_SEARCHER_H_ #endif diff --git a/include/tuner/internal/searchers/annealing.h b/include/cltune/searchers/annealing.h similarity index 95% rename from include/tuner/internal/searchers/annealing.h rename to include/cltune/searchers/annealing.h index b7a72f2..a68787e 100644 --- a/include/tuner/internal/searchers/annealing.h +++ b/include/cltune/searchers/annealing.h @@ -25,13 +25,13 @@ // // ================================================================================================= -#ifndef CLBLAS_TUNER_SEARCHERS_ANNEALING_H_ -#define CLBLAS_TUNER_SEARCHERS_ANNEALING_H_ +#ifndef CLTUNE_SEARCHERS_ANNEALING_H_ +#define CLTUNE_SEARCHERS_ANNEALING_H_ #include #include -#include "tuner/internal/searcher.h" +#include "cltune/searcher.h" namespace cltune { // ================================================================================================= @@ -91,5 +91,5 @@ class Annealing: public Searcher { // ================================================================================================= } // namespace cltune -// CLBLAS_TUNER_SEARCHERS_ANNEALING_H_ +// CLTUNE_SEARCHERS_ANNEALING_H_ #endif diff --git a/include/tuner/internal/searchers/full_search.h b/include/cltune/searchers/full_search.h similarity index 92% rename from include/tuner/internal/searchers/full_search.h rename to include/cltune/searchers/full_search.h index fcad094..6aa5899 100644 --- a/include/tuner/internal/searchers/full_search.h +++ b/include/cltune/searchers/full_search.h @@ -26,12 +26,12 @@ // // ================================================================================================= -#ifndef CLBLAS_TUNER_SEARCHERS_FULL_SEARCH_H_ -#define CLBLAS_TUNER_SEARCHERS_FULL_SEARCH_H_ +#ifndef CLTUNE_SEARCHERS_FULL_SEARCH_H_ +#define CLTUNE_SEARCHERS_FULL_SEARCH_H_ #include -#include "tuner/internal/searcher.h" +#include "cltune/searcher.h" namespace cltune { // ================================================================================================= @@ -56,5 +56,5 @@ class FullSearch: public Searcher { // ================================================================================================= } // namespace cltune -// CLBLAS_TUNER_SEARCHERS_FULL_SEARCH_H_ +// CLTUNE_SEARCHERS_FULL_SEARCH_H_ #endif diff --git a/include/tuner/internal/searchers/random_search.h b/include/cltune/searchers/random_search.h similarity index 92% rename from include/tuner/internal/searchers/random_search.h rename to include/cltune/searchers/random_search.h index 7153bed..c48d3cf 100644 --- a/include/tuner/internal/searchers/random_search.h +++ b/include/cltune/searchers/random_search.h @@ -26,12 +26,12 @@ // // ================================================================================================= -#ifndef CLBLAS_TUNER_SEARCHERS_RANDOM_SEARCH_H_ -#define CLBLAS_TUNER_SEARCHERS_RANDOM_SEARCH_H_ +#ifndef CLTUNE_SEARCHERS_RANDOM_SEARCH_H_ +#define CLTUNE_SEARCHERS_RANDOM_SEARCH_H_ #include -#include "tuner/internal/searcher.h" +#include "cltune/searcher.h" namespace cltune { // ================================================================================================= @@ -59,5 +59,5 @@ class RandomSearch: public Searcher { // ================================================================================================= } // namespace cltune -// CLBLAS_TUNER_SEARCHERS_RANDOM_SEARCH_H_ +// CLTUNE_SEARCHERS_RANDOM_SEARCH_H_ #endif diff --git a/include/tuner/internal/string_range.h b/include/cltune/string_range.h similarity index 95% rename from include/tuner/internal/string_range.h rename to include/cltune/string_range.h index 1f5fcac..9ba0bf6 100644 --- a/include/tuner/internal/string_range.h +++ b/include/cltune/string_range.h @@ -26,8 +26,8 @@ // // ================================================================================================= -#ifndef CLBLAS_TUNER_STRING_RANGE_H_ -#define CLBLAS_TUNER_STRING_RANGE_H_ +#ifndef CLTUNE_STRING_RANGE_H_ +#define CLTUNE_STRING_RANGE_H_ #include #include @@ -38,6 +38,7 @@ namespace cltune { // See comment at top of file for a description of the class class StringRange { public: + // Initializes the class with 0, 1, 2, or 3 dimensions. These constructors are not explicit // because they are used by clients in the form of initializer lists when for example calling // cltuner::MulGlobalSize. @@ -58,5 +59,5 @@ class StringRange { // ================================================================================================= } // namespace cltune -// CLBLAS_TUNER_STRING_RANGE_H_ +// CLTUNE_STRING_RANGE_H_ #endif diff --git a/samples/gemm.cc b/samples/gemm.cc index 6ac85db..a18991e 100644 --- a/samples/gemm.cc +++ b/samples/gemm.cc @@ -32,7 +32,7 @@ #include // Includes the OpenCL tuner library -#include "tuner/tuner.h" +#include "cltune.h" // Helper function to determine whether or not 'a' is a multiple of 'b' bool IsMultiple(int a, int b) { diff --git a/samples/gemm_annealing.cc b/samples/gemm_annealing.cc index 7cecf51..f7daac1 100644 --- a/samples/gemm_annealing.cc +++ b/samples/gemm_annealing.cc @@ -34,7 +34,7 @@ #include // Includes the OpenCL tuner library -#include "tuner/tuner.h" +#include "cltune.h" // Helper function to determine whether or not 'a' is a multiple of 'b' bool IsMultiple(int a, int b) { diff --git a/samples/simple.cc b/samples/simple.cc index c5362fb..e76a5a2 100644 --- a/samples/simple.cc +++ b/samples/simple.cc @@ -30,7 +30,7 @@ #include // Includes the OpenCL tuner library -#include "tuner/tuner.h" +#include "cltune.h" // ================================================================================================= diff --git a/src/kernel_info.cc b/src/kernel_info.cc index d1ab732..4c23f76 100644 --- a/src/kernel_info.cc +++ b/src/kernel_info.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/internal/kernel_info.h" +#include "cltune/kernel_info.h" #include diff --git a/src/memory.cc b/src/memory.cc index ebd61a8..6cbd56d 100644 --- a/src/memory.cc +++ b/src/memory.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/internal/memory.h" +#include "cltune/memory.h" namespace cltune { // ================================================================================================= diff --git a/src/opencl.cc b/src/opencl.cc index ea5f43a..5dfe320 100644 --- a/src/opencl.cc +++ b/src/opencl.cc @@ -25,14 +25,16 @@ // // ================================================================================================= -#include "tuner/internal/opencl.h" - -// Include other classes -#include "tuner/tuner.h" +#include "cltune/opencl.h" namespace cltune { // ================================================================================================= +// Messages printed to stdout (in colours) +const std::string OpenCL::kMessageFull = "\x1b[32m[==========]\x1b[0m"; + +// ================================================================================================= + // Gets a list of all platforms/devices and chooses the selected ones. Initializes OpenCL and also // downloads properties of the device for later use. OpenCL::OpenCL(const size_t platform_id, const size_t device_id): @@ -41,7 +43,7 @@ OpenCL::OpenCL(const size_t platform_id, const size_t device_id): // Starting on a new platform/device if (!suppress_output_) { fprintf(stdout, "\n%s Initializing OpenCL on platform %lu device %lu\n", - Tuner::kMessageFull.c_str(), platform_id, device_id); + kMessageFull.c_str(), platform_id, device_id); } // Initializes the OpenCL platform @@ -70,16 +72,18 @@ OpenCL::OpenCL(const size_t platform_id, const size_t device_id): context_ = cl::Context({device_}); queue_ = cl::CommandQueue(context_, device_, CL_QUEUE_PROFILING_ENABLE); - // Gets device properties - device_name_ = device_.getInfo(); - max_local_dims_ = device_.getInfo(); - max_local_threads_ = device_.getInfo(); - max_local_sizes_ = device_.getInfo(); - local_memory_size_ = device_.getInfo(); + // Gets platform and device properties + auto opencl_version = device_.getInfo(); + device_name_ = device_.getInfo(); + max_local_dims_ = device_.getInfo(); + max_local_threads_ = device_.getInfo(); + max_local_sizes_ = device_.getInfo(); + local_memory_size_ = device_.getInfo(); // Prints the device name if (!suppress_output_) { - fprintf(stdout, "%s Device name: '%s'\n", Tuner::kMessageFull.c_str(), device_name_.c_str()); + fprintf(stdout, "%s Device name: '%s' (%s)\n", kMessageFull.c_str(), + device_name_.c_str(), opencl_version.c_str()); } } diff --git a/src/searcher.cc b/src/searcher.cc index 1d3cb00..12b07f1 100644 --- a/src/searcher.cc +++ b/src/searcher.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/internal/searcher.h" +#include "cltune/searcher.h" #include diff --git a/src/searchers/annealing.cc b/src/searchers/annealing.cc index 3722473..2f9e30e 100644 --- a/src/searchers/annealing.cc +++ b/src/searchers/annealing.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/internal/searchers/annealing.h" +#include "cltune/searchers/annealing.h" #include #include diff --git a/src/searchers/full_search.cc b/src/searchers/full_search.cc index c48831d..4eb0ce5 100644 --- a/src/searchers/full_search.cc +++ b/src/searchers/full_search.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/internal/searchers/full_search.h" +#include "cltune/searchers/full_search.h" namespace cltune { // ================================================================================================= diff --git a/src/searchers/random_search.cc b/src/searchers/random_search.cc index 4202ddc..3f15cd1 100644 --- a/src/searchers/random_search.cc +++ b/src/searchers/random_search.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/internal/searchers/random_search.h" +#include "cltune/searchers/random_search.h" #include diff --git a/src/string_range.cc b/src/string_range.cc index 22cbcdd..cf91ff1 100644 --- a/src/string_range.cc +++ b/src/string_range.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/internal/string_range.h" +#include "cltune/string_range.h" #include diff --git a/src/tuner.cc b/src/tuner.cc index aa45aff..19eb757 100644 --- a/src/tuner.cc +++ b/src/tuner.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/tuner.h" +#include "cltune.h" #include #include @@ -35,6 +35,10 @@ #include #include +#include "cltune/searchers/full_search.h" +#include "cltune/searchers/random_search.h" +#include "cltune/searchers/annealing.h" + namespace cltune { // ================================================================================================= @@ -470,49 +474,59 @@ Tuner::TunerResult Tuner::RunKernel(const std::string &source, const KernelInfo auto global = kernel.global(); auto local = kernel.local(); - // Verifies the global/local thread-sizes against device properties - auto local_threads = opencl_->VerifyThreadSizes(global, local); - - // Obtains and verifies the local memory usage of the kernel - auto local_memory = static_cast(0); - status = tune_kernel.getWorkGroupInfo(opencl_->device(), CL_KERNEL_LOCAL_MEM_SIZE, &local_memory); - if (status != CL_SUCCESS) { throw OpenCL::Exception("Get kernel information error", status); } - opencl_->VerifyLocalMemory(local_memory); - - // Prepares the kernel - status = opencl_->queue().finish(); - if (status != CL_SUCCESS) { throw OpenCL::Exception("Command queue error", status); } - - // Runs the kernel (this is the timed part) - fprintf(stdout, "%s Running %s\n", kMessageRun.c_str(), kernel.name().c_str()); - std::vector events(kNumRuns); - for (auto t=0; tqueue().enqueueNDRangeKernel(tune_kernel, cl::NullRange, global, local, NULL, &events[t]); - if (status != CL_SUCCESS) { throw OpenCL::Exception("Kernel launch error", status); } - status = events[t].wait(); - if (status != CL_SUCCESS) { - fprintf(stdout, "%s Kernel %s failed\n", kMessageFailure.c_str(), kernel.name().c_str()); - throw OpenCL::Exception("Kernel error", status); + // In case of an exception, skip this run + try { + + // Verifies the global/local thread-sizes against device properties + auto local_threads = opencl_->VerifyThreadSizes(global, local); + + // Obtains and verifies the local memory usage of the kernel + auto local_memory = static_cast(0); + status = tune_kernel.getWorkGroupInfo(opencl_->device(), CL_KERNEL_LOCAL_MEM_SIZE, &local_memory); + if (status != CL_SUCCESS) { throw OpenCL::Exception("Get kernel information error", status); } + opencl_->VerifyLocalMemory(local_memory); + + // Prepares the kernel + status = opencl_->queue().finish(); + if (status != CL_SUCCESS) { throw OpenCL::Exception("Command queue error", status); } + + // Runs the kernel (this is the timed part) + fprintf(stdout, "%s Running %s\n", kMessageRun.c_str(), kernel.name().c_str()); + std::vector events(kNumRuns); + for (auto t=0; tqueue().enqueueNDRangeKernel(tune_kernel, cl::NullRange, global, local, NULL, &events[t]); + if (status != CL_SUCCESS) { throw OpenCL::Exception("Kernel launch error", status); } + status = events[t].wait(); + if (status != CL_SUCCESS) { + fprintf(stdout, "%s Kernel %s failed\n", kMessageFailure.c_str(), kernel.name().c_str()); + throw OpenCL::Exception("Kernel error", status); + } } - } - opencl_->queue().finish(); - - // Collects the timing information - auto elapsed_time = std::numeric_limits::max(); - for (auto t=0; t(&status); - auto end_time = events[t].getProfilingInfo(&status); - elapsed_time = std::min(elapsed_time, (end_time - start_time) / (1000.0 * 1000.0)); - } + opencl_->queue().finish(); + + // Collects the timing information + auto elapsed_time = std::numeric_limits::max(); + for (auto t=0; t(&status); + auto end_time = events[t].getProfilingInfo(&status); + elapsed_time = std::min(elapsed_time, (end_time - start_time) / (1000.0 * 1000.0)); + } + + // Prints diagnostic information + fprintf(stdout, "%s Completed %s (%.0lf ms) - %lu out of %lu\n", + kMessageOK.c_str(), kernel.name().c_str(), elapsed_time, + configuration_id+1, num_configurations); - // Prints diagnostic information - fprintf(stdout, "%s Completed %s (%.0lf ms) - %lu out of %lu\n", - kMessageOK.c_str(), kernel.name().c_str(), elapsed_time, - configuration_id+1, num_configurations); + // Computes the result of the tuning + TunerResult result = {kernel.name(), elapsed_time, local_threads, false, {}}; + return result; + } - // Computes the result of the tuning - TunerResult result = {kernel.name(), elapsed_time, local_threads, false, {}}; - return result; + // There was an exception, now return an invalid tuner results + catch(std::exception& e) { + TunerResult result = {kernel.name(), std::numeric_limits::max(), 0, false, {}}; + return result; + } } // ================================================================================================= diff --git a/test/kernel_info.cc b/test/kernel_info.cc index 2fde325..4999e7b 100644 --- a/test/kernel_info.cc +++ b/test/kernel_info.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/internal/kernel_info.h" +#include "cltune/kernel_info.h" #include diff --git a/test/tuner.cc b/test/tuner.cc index 787a052..f2152c5 100644 --- a/test/tuner.cc +++ b/test/tuner.cc @@ -25,7 +25,7 @@ // // ================================================================================================= -#include "tuner/tuner.h" +#include "cltune.h" #include "gtest/gtest.h"