From 5219183297229111e26a7b5023652cba5de13e1e Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 2 Oct 2016 13:38:45 +0200 Subject: [PATCH 1/7] Execution time measurements is no longer based on events but uses CPU timers instead to also include the (varying) kernel launch time overhead and other overheads (if any) --- CHANGELOG | 3 +++ src/tuner_impl.cc | 21 +++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5fcb1a6..3524d74 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,7 @@ +Development version (next release) +- Changed timing measurements to now also include the (varying) kernel launch overhead + Version 2.5.0 - Updated to version 8.0 of the CLCudaAPI header - Made it possible to configure the number of times each kernel is run (to average results) diff --git a/src/tuner_impl.cc b/src/tuner_impl.cc index 58bb171..eb360a8 100644 --- a/src/tuner_impl.cc +++ b/src/tuner_impl.cc @@ -325,25 +325,30 @@ TunerImpl::TunerResult TunerImpl::RunKernel(const std::string &source, const Ker // Prepares the kernel queue_.Finish(); - // Runs the kernel (this is the timed part) + // Multiple runs of the kernel to find the minimum execution time fprintf(stdout, "%s Running %s\n", kMessageRun.c_str(), kernel.name().c_str()); auto events = std::vector(num_runs_); + auto elapsed_time = std::numeric_limits::max(); for (auto t=size_t{0}; t::max(); - for (auto t=size_t{0}; t(cpu_timer).count(); + #ifdef VERBOSE + fprintf(stdout, "%s Completed kernel in %.2lf ms\n", kMessageVerbose.c_str(), cpu_timing); + #endif + elapsed_time = std::min(elapsed_time, cpu_timing); } + queue_.Finish(); // Prints diagnostic information fprintf(stdout, "%s Completed %s (%.1lf ms) - %zu out of %zu\n", From 0ed56a1b0acb6b080aa908aac88b47433548f457 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 2 Oct 2016 13:44:46 +0200 Subject: [PATCH 2/7] It is now possible to set the OpenCL compiler options through an environment variable --- CHANGELOG | 1 + README.md | 2 ++ src/tuner_impl.cc | 13 ++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3524d74..707bb45 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ Development version (next release) - Changed timing measurements to now also include the (varying) kernel launch overhead +- It is now possible to set OpenCL compiler options through the env variable CLTUNE_BUILD_OPTIONS Version 2.5.0 - Updated to version 8.0 of the CLCudaAPI header diff --git a/README.md b/README.md index 0445696..474ad8e 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,8 @@ You can then link your own programs against the CLTune library. An example for a export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/libcltune.so g++ example.cc -o example -L/path/to/libcltune.so -lcltune -lOpenCL +Furthermore, it is possible to optionally set an OS environmental variable `CLTUNE_BUILD_OPTIONS` to pass specific build options to the OpenCL compiler at run-time. + Example of using the tuner ------------- diff --git a/src/tuner_impl.cc b/src/tuner_impl.cc index eb360a8..1930c01 100644 --- a/src/tuner_impl.cc +++ b/src/tuner_impl.cc @@ -44,6 +44,7 @@ #include // std::min #include // std::unique_ptr #include // std::tuple +#include // std::getenv namespace cltune { // ================================================================================================= @@ -250,13 +251,19 @@ TunerImpl::TunerResult TunerImpl::RunKernel(const std::string &source, const Ker // In case of an exception, skip this run try { - - // Compiles the kernel and prints the compiler errors/warnings #ifdef VERBOSE fprintf(stdout, "%s Starting compilation\n", kMessageVerbose.c_str()); #endif + + // Sets the build options from an environmental variable (if set) + auto options = std::vector(); + const auto environment_variable = std::getenv("CLTUNE_BUILD_OPTIONS"); + if (environment_variable != nullptr) { + options.push_back(std::string(environment_variable)); + } + + // Compiles the kernel and prints the compiler errors/warnings auto program = Program(context_, source); - auto options = std::vector{}; auto build_status = program.Build(device_, options); if (build_status == BuildStatus::kError) { auto message = program.GetBuildInfo(device_); From 083a5e2380ac73b5a317c9230656dd6ef5668ff1 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 12 Oct 2016 21:36:20 +0200 Subject: [PATCH 3/7] Added support for compilation under Visual Studio 2013 (MSVC++ 12.0) --- CHANGELOG | 1 + README.md | 2 +- include/internal/kernel_info.h | 19 +++++----- include/internal/msvc.h | 38 ++++++++++++++++++++ include/internal/searchers/annealing.h | 4 +-- include/internal/tuner_impl.h | 3 +- samples/conv/conv.cc | 18 +++++----- samples/conv_simple/conv_simple.cc | 4 +-- samples/gemm/gemm.cc | 16 ++++----- samples/multiple_kernels/multiple_kernels.cc | 4 +-- samples/simple/simple.cc | 2 +- src/searchers/annealing.cc | 7 ++++ src/tuner_impl.cc | 3 ++ 13 files changed, 86 insertions(+), 35 deletions(-) create mode 100644 include/internal/msvc.h diff --git a/CHANGELOG b/CHANGELOG index 707bb45..c8f5b91 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,7 @@ Development version (next release) - Changed timing measurements to now also include the (varying) kernel launch overhead - It is now possible to set OpenCL compiler options through the env variable CLTUNE_BUILD_OPTIONS +- Added support for compilation under Visual Studio 2013 (MSVC++ 12.0) Version 2.5.0 - Updated to version 8.0 of the CLCudaAPI header diff --git a/README.md b/README.md index 474ad8e..ea27d2d 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ CLTune can be compiled as a shared library using CMake. The pre-requisites are: - Clang 3.3 or newer - AppleClang 5.0 or newer - ICC 14.0 or newer - - MSVC (Visual Studio) 2015 or newer + - MSVC (Visual Studio) 2013 or newer * An OpenCL library. CLTune has been tested with: - Apple OpenCL - NVIDIA CUDA SDK (requires version 7.5 or newer for the CUDA back-end) diff --git a/include/internal/kernel_info.h b/include/internal/kernel_info.h index 90c2fb2..27232f8 100644 --- a/include/internal/kernel_info.h +++ b/include/internal/kernel_info.h @@ -45,6 +45,7 @@ #endif #include "cltune.h" +#include "internal/msvc.h" namespace cltune { // ================================================================================================= @@ -100,7 +101,7 @@ class KernelInfo { }; // Initializes the class with a given name and a string of kernel source-code - explicit KernelInfo(const std::string name, const std::string source, const Device &device); + explicit PUBLIC_API KernelInfo(const std::string name, const std::string source, const Device &device); // Accessors (getters) std::string name() const { return name_; } @@ -117,36 +118,36 @@ class KernelInfo { void set_local_base(IntRange local) { local_base_ = local; local_ = local; } // Prepend to the source-code - void PrependSource(const std::string &extra_source); + void PUBLIC_API PrependSource(const std::string &extra_source); // Adds a new parameter with a name and a vector of possible values - void AddParameter(const std::string &name, const std::vector &values); + void PUBLIC_API AddParameter(const std::string &name, const std::vector &values); // Checks wheter a parameter exists, returns "true" if it does exist - bool ParameterExists(const std::string parameter_name); + bool PUBLIC_API ParameterExists(const std::string parameter_name); // Specifies a modifier in the form of a StringRange to the global/local thread-sizes. This // modifier has to contain (per-dimension) the name of a single parameter or an empty string. The // supported modifiers are given by the ThreadSizeModifierType enumeration. - void AddModifier(const StringRange range, const ThreadSizeModifierType type); + void PUBLIC_API AddModifier(const StringRange range, const ThreadSizeModifierType type); // Adds a new constraint to the set of parameters (e.g. must be equal or larger than). The // constraints come in the form of a function object which takes a number of tuning parameters, // given as a vector of strings (parameter names). Their names are later substituted by actual // values. - void AddConstraint(ConstraintFunction valid_if, const std::vector ¶meters); + void PUBLIC_API AddConstraint(ConstraintFunction valid_if, const std::vector ¶meters); // As above, but for local memory usage - void SetLocalMemoryUsage(LocalMemoryFunction amount, const std::vector ¶meters); + void PUBLIC_API SetLocalMemoryUsage(LocalMemoryFunction amount, const std::vector ¶meters); // Computes the global/local ranges (in NDRange-form) based on all global/local thread-sizes (in // StringRange-form) and a single permutation (i.e. a configuration) containing a list of all // parameter names and their current values. - void ComputeRanges(const Configuration &config); + void PUBLIC_API ComputeRanges(const Configuration &config); // Computes all permutations based on the parameters and their values (the configuration list). // The result is stored as a member variable. - void SetConfigurations(); + void PUBLIC_API SetConfigurations(); private: // Called recursively internally by SetConfigurations diff --git a/include/internal/msvc.h b/include/internal/msvc.h new file mode 100644 index 0000000..bf0fc81 --- /dev/null +++ b/include/internal/msvc.h @@ -0,0 +1,38 @@ + +// ================================================================================================= +// This file is part of the CLTune project, which loosely follows the Google C++ styleguide and uses +// a tab-size of two spaces and a max-width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file provides macro's and definitions to make compilation work on Microsoft Visual Studio, +// in particular for versions older than 2015 with limited C++11 support. +// MSVC++ 14.0 _MSC_VER == 1900 (Visual Studio 2015) +// MSVC++ 12.0 _MSC_VER == 1800 (Visual Studio 2013) +// MSVC++ 11.0 _MSC_VER == 1700 (Visual Studio 2012) +// MSVC++ 10.0 _MSC_VER == 1600 (Visual Studio 2010) +// MSVC++ 9.0 _MSC_VER == 1500 (Visual Studio 2008) +// +// ================================================================================================= + +#ifndef CLTUNE_MSVC_H_ +#define CLTUNE_MSVC_H_ + +namespace cltune { +// ================================================================================================= +#ifdef _MSC_VER + +// No support for constexpr prior to 2015. Note that this only works with constants, not with +// constexpr functions (unused in this project). +#if _MSC_VER < 1900 +#define constexpr const +#endif + +// _MSC_VER +#endif +// ================================================================================================= +} // namespace cltune + +// CLTUNE_MSVC_H_ +#endif diff --git a/include/internal/searchers/annealing.h b/include/internal/searchers/annealing.h index eb029c3..5924da5 100644 --- a/include/internal/searchers/annealing.h +++ b/include/internal/searchers/annealing.h @@ -42,10 +42,10 @@ class Annealing: public Searcher { // Maximum number of successive visits to already visited states. If this number is exceeded, the // algorithm ends - static constexpr auto kMaxAlreadyVisitedStates = size_t{10}; + static constexpr size_t kMaxAlreadyVisitedStates; // Maximum number of differences to consider this still a neighbour - static constexpr auto kMaxDifferences = size_t{3}; + static constexpr size_t kMaxDifferences; // Takes additionally a fraction of configurations to consider Annealing(const Configurations &configurations, diff --git a/include/internal/tuner_impl.h b/include/internal/tuner_impl.h index b471fd3..dda06b7 100644 --- a/include/internal/tuner_impl.h +++ b/include/internal/tuner_impl.h @@ -40,6 +40,7 @@ #endif #include "internal/kernel_info.h" +#include "internal/msvc.h" // Host data-type for half-precision floating-point (16-bit) #include "internal/half.h" @@ -73,7 +74,7 @@ class TunerImpl { public: // Parameters - static constexpr auto kMaxL2Norm = 1e-4; // This is the threshold for 'correctness' + static constexpr double kMaxL2Norm; // This is the threshold for 'correctness' // Messages printed to stdout (in colours) static const std::string kMessageFull; diff --git a/samples/conv/conv.cc b/samples/conv/conv.cc index a774635..aedb93a 100644 --- a/samples/conv/conv.cc +++ b/samples/conv/conv.cc @@ -45,18 +45,18 @@ bool IsMultiple(size_t a, size_t b) { }; // Constants -constexpr auto kDefaultDevice = size_t{0}; -constexpr auto kDefaultPlatform = size_t{0}; -constexpr auto kDefaultSearchMethod = size_t{1}; -constexpr auto kDefaultSearchParameter1 = size_t{4}; +const auto kDefaultDevice = size_t{0}; +const auto kDefaultPlatform = size_t{0}; +const auto kDefaultSearchMethod = size_t{1}; +const auto kDefaultSearchParameter1 = size_t{4}; // Settings (synchronise these with "conv.cc", "conv.opencl" and "conv_reference.opencl") #define HFS (3) // Half filter size #define FS (HFS+HFS+1) // Filter size // Settings (sizes) -constexpr auto kSizeX = size_t{8192}; // Matrix dimension X -constexpr auto kSizeY = size_t{4096}; // Matrix dimension Y +const auto kSizeX = size_t{8192}; // Matrix dimension X +const auto kSizeY = size_t{4096}; // Matrix dimension Y // ================================================================================================= @@ -91,7 +91,7 @@ int main(int argc, char* argv[]) { } // Creates data structures - constexpr auto kExtraSize = size_t{FS*8}; + const auto kExtraSize = size_t{FS*8}; auto mat_a = std::vector((kExtraSize+kSizeX)*(kExtraSize+kSizeY)); auto mat_b = std::vector(kSizeX*kSizeY); auto coeff = std::vector(FS*FS); @@ -230,8 +230,8 @@ int main(int argc, char* argv[]) { tuner.PrintJSON("output.json", {{"sample","convolution"}}); // Also prints the performance of the best-case in terms of GB/s and GFLOPS - constexpr auto kMB = (sizeof(float)*2*kSizeX*kSizeY) * 1.0e-6; - constexpr auto kMFLOPS = ((1+2*FS*FS)*kSizeX*kSizeY) * 1.0e-6; + const auto kMB = (sizeof(float)*2*kSizeX*kSizeY) * 1.0e-6; + const auto kMFLOPS = ((1+2*FS*FS)*kSizeX*kSizeY) * 1.0e-6; if (time_ms != 0.0) { printf("[ -------> ] %.1lf ms or %.1lf GB/s or %1.lf GFLOPS\n", time_ms, kMB/time_ms, kMFLOPS/time_ms); diff --git a/samples/conv_simple/conv_simple.cc b/samples/conv_simple/conv_simple.cc index 4e3f19e..e6980ff 100644 --- a/samples/conv_simple/conv_simple.cc +++ b/samples/conv_simple/conv_simple.cc @@ -37,8 +37,8 @@ int main() { #endif // Input/output sizes - constexpr auto kSizeX = size_t{8192}; // Matrix dimension X - constexpr auto kSizeY = size_t{4096}; // Matrix dimension Y + const auto kSizeX = size_t{8192}; // Matrix dimension X + const auto kSizeY = size_t{4096}; // Matrix dimension Y // Creates the input/output matrices and fills them with some example data std::vector mat_a(kSizeX*kSizeY, 2.0f); diff --git a/samples/gemm/gemm.cc b/samples/gemm/gemm.cc index 5a2f111..f2c3c82 100644 --- a/samples/gemm/gemm.cc +++ b/samples/gemm/gemm.cc @@ -45,15 +45,15 @@ bool IsMultiple(size_t a, size_t b) { }; // Constants -constexpr auto kDefaultDevice = size_t{0}; -constexpr auto kDefaultPlatform = size_t{0}; -constexpr auto kDefaultSearchMethod = size_t{1}; -constexpr auto kDefaultSearchParameter1 = size_t{4}; +const auto kDefaultDevice = size_t{0}; +const auto kDefaultPlatform = size_t{0}; +const auto kDefaultSearchMethod = size_t{1}; +const auto kDefaultSearchParameter1 = size_t{4}; // Settings (sizes) -constexpr auto kSizeM = size_t{2048}; -constexpr auto kSizeN = size_t{2048}; -constexpr auto kSizeK = size_t{2048}; +const auto kSizeM = size_t{2048}; +const auto kSizeN = size_t{2048}; +const auto kSizeK = size_t{2048}; // ================================================================================================= @@ -203,7 +203,7 @@ int main(int argc, char* argv[]) { tuner.PrintFormatted(); // Also prints the performance of the best-case in terms of GFLOPS - constexpr auto kMGFLOP = (2*kSizeM*kSizeN*kSizeK) * 1.0e-6; + const auto kMGFLOP = (2*kSizeM*kSizeN*kSizeK) * 1.0e-6; if (time_ms != 0.0) { printf("[ -------> ] %.1lf ms or %.3lf GFLOPS\n", time_ms, kMGFLOP/time_ms); } diff --git a/samples/multiple_kernels/multiple_kernels.cc b/samples/multiple_kernels/multiple_kernels.cc index 6978e34..45730a3 100644 --- a/samples/multiple_kernels/multiple_kernels.cc +++ b/samples/multiple_kernels/multiple_kernels.cc @@ -51,8 +51,8 @@ int main() { #endif // Matrix size - constexpr auto kSizeM = size_t{2048}; - constexpr auto kSizeN = size_t{4096}; + const auto kSizeM = size_t{2048}; + const auto kSizeN = size_t{4096}; // Creates data structures std::vector mat_a(kSizeN*kSizeM); // Assumes matrix A is transposed diff --git a/samples/simple/simple.cc b/samples/simple/simple.cc index 0493746..5c957d6 100644 --- a/samples/simple/simple.cc +++ b/samples/simple/simple.cc @@ -28,7 +28,7 @@ int main() { #endif // Vector dimension - constexpr auto kVectorSize = size_t{16*1024*1024}; + const auto kVectorSize = size_t{16*1024*1024}; // Creates the vectors and fills them with some example data std::vector vec_a(kVectorSize, 1.0f); diff --git a/src/searchers/annealing.cc b/src/searchers/annealing.cc index 412cc99..efd9a12 100644 --- a/src/searchers/annealing.cc +++ b/src/searchers/annealing.cc @@ -34,6 +34,13 @@ namespace cltune { // ================================================================================================= +// Maximum number of successive visits to already visited states. If this number is exceeded, the +// algorithm ends +constexpr size_t Annealing::kMaxAlreadyVisitedStates = size_t{10}; + +// Maximum number of differences to consider this still a neighbour +constexpr size_t Annealing::kMaxDifferences = size_t{3}; + // Initializes the simulated annealing searcher by specifying the fraction of the total search space // to consider and the maximum annealing 'temperature'. Annealing::Annealing(const Configurations &configurations, diff --git a/src/tuner_impl.cc b/src/tuner_impl.cc index 1930c01..11985dc 100644 --- a/src/tuner_impl.cc +++ b/src/tuner_impl.cc @@ -49,6 +49,9 @@ namespace cltune { // ================================================================================================= +// This is the threshold for 'correctness' +constexpr double TunerImpl::kMaxL2Norm = 1e-4; + // Messages printed to stdout (in colours) const std::string TunerImpl::kMessageFull = "\x1b[32m[==========]\x1b[0m"; const std::string TunerImpl::kMessageHead = "\x1b[32m[----------]\x1b[0m"; From bdbf35340d3e2466e1611991a5a16cfc20362dc1 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 12 Oct 2016 21:43:01 +0200 Subject: [PATCH 4/7] Fixed a const/constexpr issue caused by the previous commit --- include/internal/searchers/annealing.h | 4 ++-- include/internal/tuner_impl.h | 2 +- src/searchers/annealing.cc | 4 ++-- src/tuner_impl.cc | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/internal/searchers/annealing.h b/include/internal/searchers/annealing.h index 5924da5..36f2808 100644 --- a/include/internal/searchers/annealing.h +++ b/include/internal/searchers/annealing.h @@ -42,10 +42,10 @@ class Annealing: public Searcher { // Maximum number of successive visits to already visited states. If this number is exceeded, the // algorithm ends - static constexpr size_t kMaxAlreadyVisitedStates; + static const size_t kMaxAlreadyVisitedStates; // Maximum number of differences to consider this still a neighbour - static constexpr size_t kMaxDifferences; + static const size_t kMaxDifferences; // Takes additionally a fraction of configurations to consider Annealing(const Configurations &configurations, diff --git a/include/internal/tuner_impl.h b/include/internal/tuner_impl.h index dda06b7..8b703a7 100644 --- a/include/internal/tuner_impl.h +++ b/include/internal/tuner_impl.h @@ -74,7 +74,7 @@ class TunerImpl { public: // Parameters - static constexpr double kMaxL2Norm; // This is the threshold for 'correctness' + static const double kMaxL2Norm; // This is the threshold for 'correctness' // Messages printed to stdout (in colours) static const std::string kMessageFull; diff --git a/src/searchers/annealing.cc b/src/searchers/annealing.cc index efd9a12..cdbcad6 100644 --- a/src/searchers/annealing.cc +++ b/src/searchers/annealing.cc @@ -36,10 +36,10 @@ namespace cltune { // Maximum number of successive visits to already visited states. If this number is exceeded, the // algorithm ends -constexpr size_t Annealing::kMaxAlreadyVisitedStates = size_t{10}; +const size_t Annealing::kMaxAlreadyVisitedStates = size_t{10}; // Maximum number of differences to consider this still a neighbour -constexpr size_t Annealing::kMaxDifferences = size_t{3}; +const size_t Annealing::kMaxDifferences = size_t{3}; // Initializes the simulated annealing searcher by specifying the fraction of the total search space // to consider and the maximum annealing 'temperature'. diff --git a/src/tuner_impl.cc b/src/tuner_impl.cc index 11985dc..37df3f7 100644 --- a/src/tuner_impl.cc +++ b/src/tuner_impl.cc @@ -50,7 +50,7 @@ namespace cltune { // ================================================================================================= // This is the threshold for 'correctness' -constexpr double TunerImpl::kMaxL2Norm = 1e-4; +const double TunerImpl::kMaxL2Norm = 1e-4; // Messages printed to stdout (in colours) const std::string TunerImpl::kMessageFull = "\x1b[32m[==========]\x1b[0m"; From 73ed6c31121ba997be4791c9fcba4bd96158e5c3 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 22 Oct 2016 16:42:10 +0200 Subject: [PATCH 5/7] Added an option to compile a static library --- CHANGELOG | 1 + CMakeLists.txt | 25 ++++++++++++++++++++++--- include/cltune.h | 8 ++++++-- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c8f5b91..2f460c6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,7 @@ Development version (next release) - Changed timing measurements to now also include the (varying) kernel launch overhead - It is now possible to set OpenCL compiler options through the env variable CLTUNE_BUILD_OPTIONS - Added support for compilation under Visual Studio 2013 (MSVC++ 12.0) +- Added an option to build a static version of the library Version 2.5.0 - Updated to version 8.0 of the CLCudaAPI header diff --git a/CMakeLists.txt b/CMakeLists.txt index 14a106a..82cc2c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ # # ================================================================================================== -cmake_minimum_required(VERSION 2.8.10) +cmake_minimum_required(VERSION 2.8.11) # Overrides for MSVC static runtime set(CMAKE_USER_MAKE_RULES_OVERRIDE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/c_flag_overrides.cmake) @@ -35,7 +35,8 @@ set(cltune_VERSION_MAJOR 2) set(cltune_VERSION_MINOR 5) set(cltune_VERSION_PATCH 0) -# Options +# Options and their default values +option(BUILD_SHARED_LIBS "Build a shared (ON) or static library (OFF)" ON) option(SAMPLES "Enable compilation of sample programs" ON) option(TESTS "Enable compilation of the Google tests" OFF) @@ -85,6 +86,13 @@ elseif(MSVC) endif() endif() +# DLL Settings +if(MSVC) + if(BUILD_SHARED_LIBS) + add_definitions(" /DCLTUNE_DLL") + endif() +endif(MSVC) + # C++ compiler settings if(MSVC) set(FLAGS "/Ox") @@ -143,9 +151,20 @@ set(TUNER src/ml_models/neural_network.cc) # Creates and links the library -add_library(cltune SHARED ${TUNER}) +if(BUILD_SHARED_LIBS) + add_library(cltune SHARED ${TUNER}) +else(BUILD_SHARED_LIBS) + add_library(cltune STATIC ${TUNER}) +endif() target_link_libraries(cltune ${FRAMEWORK_LIBRARIES}) +# Sets the proper __declspec(dllexport) keyword for Visual Studio when the library is built +if(MSVC) + if(BUILD_SHARED_LIBS) + target_compile_definitions(cltune PRIVATE COMPILING_DLL=1) # requires at least CMake 2.8.11 + endif() +endif() + # Installs the library install(TARGETS cltune DESTINATION lib) install(FILES include/cltune.h DESTINATION include) diff --git a/include/cltune.h b/include/cltune.h index 9837556..8af3dff 100644 --- a/include/cltune.h +++ b/include/cltune.h @@ -37,8 +37,12 @@ // Exports library functions under Windows when building a DLL. See also: // https://msdn.microsoft.com/en-us/library/a90k134d.aspx -#ifdef _WIN32 - #define PUBLIC_API __declspec(dllexport) +#if defined(_WIN32) && defined(CLTUNE_DLL) + #if defined(COMPILING_DLL) + #define PUBLIC_API __declspec(dllexport) + #else + #define PUBLIC_API __declspec(dllimport) + #endif #else #define PUBLIC_API #endif From a8c68713ca9c024559545de8b50bf7654710bd31 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 23 Oct 2016 15:29:27 +0200 Subject: [PATCH 6/7] Added support for pkg-config installation on Linux --- CMakeLists.txt | 8 ++++++++ cltune.pc.in | 10 ++++++++++ 2 files changed, 18 insertions(+) create mode 100644 cltune.pc.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 82cc2c8..a54c4d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,6 +169,14 @@ endif() install(TARGETS cltune DESTINATION lib) install(FILES include/cltune.h DESTINATION include) +# Install pkg-config file on Linux +if(UNIX) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cltune.pc.in" + "${CMAKE_CURRENT_BINARY_DIR}/cltune.pc" @ONLY IMMEDIATE) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/cltune.pc + DESTINATION lib/pkgconfig) +endif() + # ================================================================================================== # Optional: Enables compilation of sample programs diff --git a/cltune.pc.in b/cltune.pc.in new file mode 100644 index 0000000..41dd746 --- /dev/null +++ b/cltune.pc.in @@ -0,0 +1,10 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +includedir=${prefix}/include +libdir=${exec_prefix}/lib + +Name: CLTune +Description: CLTune: An automatic OpenCL & CUDA kernel tuner +Version: @cltune_VERSION_MAJOR@.@cltune_VERSION_MINOR@.@cltune_VERSION_PATCH@ +Libs: -L${libdir} -lcltune +Cflags: -I${includedir} From dc1cb0b00e97432c7792e9b409d07574f6a18eb2 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 23 Oct 2016 15:29:58 +0200 Subject: [PATCH 7/7] Updated to version 2.6.0 --- CHANGELOG | 2 +- CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 2f460c6..19c7709 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,5 @@ -Development version (next release) +Version 2.6.0 - Changed timing measurements to now also include the (varying) kernel launch overhead - It is now possible to set OpenCL compiler options through the env variable CLTUNE_BUILD_OPTIONS - Added support for compilation under Visual Studio 2013 (MSVC++ 12.0) diff --git a/CMakeLists.txt b/CMakeLists.txt index a54c4d5..a3c3882 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_fla # CMake project details project("cltune" CXX) set(cltune_VERSION_MAJOR 2) -set(cltune_VERSION_MINOR 5) +set(cltune_VERSION_MINOR 6) set(cltune_VERSION_PATCH 0) # Options and their default values