Skip to content

Commit

Permalink
Merge pull request #24 from CNugteren/reduced_requirements
Browse files Browse the repository at this point in the history
Reduced requirements and warning fixes
  • Loading branch information
CNugteren committed May 28, 2015
2 parents 9edbebc + 12a7f4f commit 039d9ea
Show file tree
Hide file tree
Showing 19 changed files with 95 additions and 66 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@

Version 1.6.4
- Reduced the requirements from GCC 4.8.0 to 4.7.0
- Fixes various warnings on Clang

Version 1.6.3
- Reduced the requirements from GCC 4.9.0 to 4.8.0
- Minor updates to the CMake file
Expand Down
13 changes: 9 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ cmake_minimum_required(VERSION 2.8.10)
project("cltune" CXX)
set(cltune_VERSION_MAJOR 1)
set(cltune_VERSION_MINOR 6)
set(cltune_VERSION_PATCH 3)
set(cltune_VERSION_PATCH 4)

# Options
option(SAMPLES "Enable compilation of sample programs" ON)
Expand All @@ -45,8 +45,8 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH false) # Don't add the automatically deter

# Compiler-version check (requires at least CMake 2.8.10)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
message(FATAL_ERROR "GCC version must be at least 4.8")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
message(FATAL_ERROR "GCC version must be at least 4.7")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3)
Expand Down Expand Up @@ -75,6 +75,9 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
set(FLAGS "${FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded")
set(FLAGS "${FLAGS} -Wno-missing-prototypes -Wno-float-equal -Wno-weak-vtables")
set(FLAGS "${FLAGS} -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes")
set(FLAGS "${FLAGS} -Wno-missing-noreturn -Wno-covered-switch-default")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")

Expand Down Expand Up @@ -129,6 +132,9 @@ endif()
# Optional: Enables compilation of the Google tests
if (TESTS)

# The tests use specific flags to reduce the amount of warnings from GTest.
set(CMAKE_CXX_FLAGS "-O3 -std=c++11")

# Enables Google Test tests (source-code is shipped with the project)
add_subdirectory(external/gtest-1.7.0)
enable_testing()
Expand All @@ -140,6 +146,5 @@ if (TESTS)

# Adds the tests
add_test(name unit_tests command unit_tests)

endif()
# ==================================================================================================
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ CLTune can be compiled as a shared library using CMake. The pre-requisites are:

* CMake version 2.8.10 or higher
* A C++11 compiler, for example:
- GCC 4.8.0 or newer
- GCC 4.7.0 or newer
- Clang 3.3 or newer
- AppleClang 5.0 or newer
- ICC 14.0 or newer
Expand Down
6 changes: 3 additions & 3 deletions include/internal/clpp11.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ namespace cltune {
class Object {
protected:

// Error handling
[[noreturn]] void Error(const std::string &message) const {
// Error handling (NOTE: these functions are [[noreturn]])
void Error(const std::string &message) const {
throw std::runtime_error("Internal OpenCL error: "+message);
}
[[noreturn]] void Error(const cl_int status) const {
void Error(const cl_int status) const {
throw std::runtime_error("Internal OpenCL error with status: "+std::to_string(status));
}
};
Expand Down
2 changes: 1 addition & 1 deletion include/internal/kernel_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class KernelInfo {
// Exception of the KernelInfo class
class Exception : public std::runtime_error {
public:
Exception(const std::string &message): std::runtime_error(message) { };
Exception(const std::string &message): std::runtime_error(message) { }
};

// Initializes the class with a given name and a string of OpenCL source-code
Expand Down
5 changes: 3 additions & 2 deletions include/internal/searcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Searcher {

// Base constructor
Searcher(const Configurations &configurations);
virtual ~Searcher() { }

// Pushes feedback (in the form of execution time) from the tuner to the search algorithm
virtual void PushExecutionTime(const double execution_time);
Expand All @@ -63,10 +64,10 @@ class Searcher {
protected:

// Pseudo-random seed based on the time
unsigned long RandomSeed() const {
unsigned int RandomSeed() const {
// std::random_device rd;
// return rd();
return std::chrono::system_clock::now().time_since_epoch().count();
return static_cast<unsigned int>(std::chrono::system_clock::now().time_since_epoch().count());
}

// Protected member variables accessible by derived classes
Expand Down
5 changes: 3 additions & 2 deletions include/internal/searchers/annealing.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ class Annealing: public Searcher {

// Maximum number of successive visits to already visited states. If this number is exceeded, the
// algorithm ends
static constexpr auto kMaxAlreadyVisitedStates = 10;
static constexpr auto kMaxAlreadyVisitedStates = size_t{10};

// Maximum number of differences to consider this still a neighbour
static constexpr auto kMaxDifferences = 3;
static constexpr auto kMaxDifferences = size_t{3};

// Takes additionally a fraction of configurations to consider
Annealing(const Configurations &configurations,
const double fraction, const double max_temperature);
~Annealing() {}

// Retrieves the next configuration to test
virtual KernelInfo::Configuration GetConfiguration() override;
Expand Down
1 change: 1 addition & 0 deletions include/internal/searchers/full_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ namespace cltune {
class FullSearch: public Searcher {
public:
FullSearch(const Configurations &configurations);
~FullSearch() {}

// Retrieves the next configuration to test
virtual KernelInfo::Configuration GetConfiguration() override;
Expand Down
7 changes: 4 additions & 3 deletions include/internal/searchers/pso.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class PSO: public Searcher {
PSO(const Configurations &configurations, const Parameters &parameters,
const double fraction, const size_t swarm_size, const double influence_global,
const double influence_local, const double influence_random);
~PSO() { }

// Retrieves the next configuration to test
virtual KernelInfo::Configuration GetConfiguration() override;
Expand All @@ -77,9 +78,9 @@ class PSO: public Searcher {

// Percentages of influence on the whole swarm's best (global), the particle's best (local), and
// the random values. The remainder fraction is the chance of staying in the current position.
float influence_global_;
float influence_local_;
float influence_random_;
double influence_global_;
double influence_local_;
double influence_random_;

// Locations of the particles in the swarm
size_t particle_index_;
Expand Down
1 change: 1 addition & 0 deletions include/internal/searchers/random_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class RandomSearch: public Searcher {

// Takes additionally a fraction of configurations to try (1.0 == full search)
RandomSearch(const Configurations &configurations, const double fraction);
~RandomSearch() {}

// Retrieves the next configuration to test
virtual KernelInfo::Configuration GetConfiguration() override;
Expand Down
2 changes: 1 addition & 1 deletion include/internal/tuner_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class TunerImpl {

// Parameters
static constexpr auto kMaxL2Norm = 1e-4; // This is the threshold for 'correctness'
static constexpr auto kNumRuns = 1; // This is used for more-accurate execution time measurement
static constexpr auto kNumRuns = size_t{1}; // This is used for more-accurate execution time measurement

// Messages printed to stdout (in colours)
static const std::string kMessageFull;
Expand Down
38 changes: 22 additions & 16 deletions samples/conv/conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include <iostream>
#include <sstream>
#include <vector>
#include <chrono>
#include <random>
#include <cmath>
#include <numeric>

Expand All @@ -52,8 +54,8 @@ constexpr auto kDefaultSearchParameter1 = 4;
#define FS (HFS+HFS+1) // Filter size

// Settings (sizes)
constexpr auto kSizeX = 8192; // Matrix dimension X
constexpr auto kSizeY = 4096; // Matrix dimension Y
constexpr auto kSizeX = size_t{8192}; // Matrix dimension X
constexpr auto kSizeY = size_t{4096}; // Matrix dimension Y

// =================================================================================================

Expand Down Expand Up @@ -81,19 +83,23 @@ int main(int argc, char* argv[]) {
auto mat_b = std::vector<float>(kSizeX*kSizeY);
auto coeff = std::vector<float>(FS*FS);

// Populates data structures
srand(time(nullptr));
for (auto &item: mat_a) { item = (float)rand() / (float)RAND_MAX; }
// Create a random number generator
const auto random_seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(static_cast<unsigned int>(random_seed));
std::uniform_real_distribution<float> distribution(-2.0f, 2.0f);

// Populates input data structures
for (auto &item: mat_a) { item = distribution(generator); }
for (auto &item: mat_b) { item = 0.0; }

// Creates the filter coefficients (gaussian blur)
auto sigma = 1.0f;
auto mean = FS/2.0f;
auto sum = 0.0f;
for (auto x=0; x<FS; ++x) {
for (auto y=0; y<FS; ++y) {
auto exponent = -0.5 * (pow((x-mean)/sigma, 2.0) + pow((y-mean)/sigma,2.0));
coeff[y*FS + x] = exp(exponent) / (2 * M_PI * sigma * sigma);
for (auto x=size_t{0}; x<FS; ++x) {
for (auto y=size_t{0}; y<FS; ++y) {
auto exponent = -0.5f * (pow((x-mean)/sigma, 2.0f) + pow((y-mean)/sigma, 2.0f));
coeff[y*FS + x] = static_cast<float>(exp(exponent) / (2.0f * M_PI * sigma * sigma));
sum += coeff[y*FS + x];
}
}
Expand All @@ -102,7 +108,7 @@ int main(int argc, char* argv[]) {
// ===============================================================================================

// Initializes the tuner (platform 0, device 'device_id')
cltune::Tuner tuner(0, device_id);
cltune::Tuner tuner(0, static_cast<size_t>(device_id));

// Sets one of the following search methods:
// 0) Random search
Expand All @@ -111,8 +117,8 @@ int main(int argc, char* argv[]) {
// 3) Full search
auto fraction = 1/32.0f;
if (method == 0) { tuner.UseRandomSearch(fraction); }
else if (method == 1) { tuner.UseAnnealing(fraction, search_param_1); }
else if (method == 2) { tuner.UsePSO(fraction, search_param_1, 0.4, 0.0, 0.4); }
else if (method == 1) { tuner.UseAnnealing(fraction, static_cast<size_t>(search_param_1)); }
else if (method == 2) { tuner.UsePSO(fraction, static_cast<size_t>(search_param_1), 0.4, 0.0, 0.4); }
else { tuner.UseFullSearch(); }

// Outputs the search process to a file
Expand Down Expand Up @@ -187,8 +193,8 @@ int main(int argc, char* argv[]) {

// Sets the function's arguments. Note that all kernels have to accept (but not necessarily use)
// all input arguments.
tuner.AddArgumentScalar(kSizeX);
tuner.AddArgumentScalar(kSizeY);
tuner.AddArgumentScalar(static_cast<int>(kSizeX));
tuner.AddArgumentScalar(static_cast<int>(kSizeY));
tuner.AddArgumentInput(mat_a);
tuner.AddArgumentInput(coeff);
tuner.AddArgumentOutput(mat_b);
Expand All @@ -201,8 +207,8 @@ int main(int argc, char* argv[]) {
tuner.PrintToFile("output.csv");

// Also prints the performance of the best-case in terms of GB/s and GFLOPS
constexpr auto kMB = (sizeof(float)*2*(long)kSizeX*(long)kSizeY) / (1.0e6);
constexpr auto kMFLOPS = ((1+2*FS*FS)*(long)kSizeX*(long)kSizeY) / (1.0e6);
constexpr auto kMB = (sizeof(float)*2*kSizeX*kSizeY) * 1.0e-6;
constexpr auto kMFLOPS = ((1+2*FS*FS)*kSizeX*kSizeY) * 1.0e-6;
if (time_ms != 0.0) {
printf("[ -------> ] %.1lf ms or %.1lf GB/s or %1.lf GFLOPS\n",
time_ms, kMB/time_ms, kMFLOPS/time_ms);
Expand Down
34 changes: 20 additions & 14 deletions samples/gemm/gemm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <iostream>
#include <sstream>
#include <vector>
#include <chrono>
#include <random>

// Includes the OpenCL tuner library
#include "cltune.h"
Expand All @@ -48,9 +50,9 @@ constexpr auto kDefaultSearchMethod = 1;
constexpr auto kDefaultSearchParameter1 = 4;

// Settings (sizes)
constexpr auto kSizeM = 2048;
constexpr auto kSizeN = 2048;
constexpr auto kSizeK = 2048;
constexpr auto kSizeM = size_t{2048};
constexpr auto kSizeN = size_t{2048};
constexpr auto kSizeK = size_t{2048};

// =================================================================================================

Expand Down Expand Up @@ -78,14 +80,18 @@ int main(int argc, char* argv[]) {
auto mat_b = std::vector<float>(kSizeN*kSizeK);
auto mat_c = std::vector<float>(kSizeM*kSizeN);

// Create a random number generator
const auto random_seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(static_cast<unsigned int>(random_seed));
std::uniform_real_distribution<float> distribution(-2.0f, 2.0f);

// Populates input data structures
srand(time(nullptr));
for (auto &item: mat_a) { item = (float)rand() / (float)RAND_MAX; }
for (auto &item: mat_b) { item = (float)rand() / (float)RAND_MAX; }
for (auto &item: mat_a) { item = distribution(generator); }
for (auto &item: mat_b) { item = distribution(generator); }
for (auto &item: mat_c) { item = 0.0; }

// Initializes the tuner (platform 0, device 'device_id')
cltune::Tuner tuner(0, device_id);
cltune::Tuner tuner(0, static_cast<size_t>(device_id));

// Sets one of the following search methods:
// 0) Random search
Expand All @@ -94,8 +100,8 @@ int main(int argc, char* argv[]) {
// 3) Full search
auto fraction = 1/2048.0f;
if (method == 0) { tuner.UseRandomSearch(fraction); }
else if (method == 1) { tuner.UseAnnealing(fraction, search_param_1); }
else if (method == 2) { tuner.UsePSO(fraction, search_param_1, 0.4, 0.0, 0.4); }
else if (method == 1) { tuner.UseAnnealing(fraction, static_cast<size_t>(search_param_1)); }
else if (method == 2) { tuner.UsePSO(fraction, static_cast<size_t>(search_param_1), 0.4, 0.0, 0.4); }
else { tuner.UseFullSearch(); }

// Outputs the search process to a file
Expand Down Expand Up @@ -168,9 +174,9 @@ int main(int argc, char* argv[]) {

// Sets the function's arguments. Note that all kernels have to accept (but not necessarily use)
// all input arguments.
tuner.AddArgumentScalar(kSizeM);
tuner.AddArgumentScalar(kSizeN);
tuner.AddArgumentScalar(kSizeK);
tuner.AddArgumentScalar(static_cast<int>(kSizeM));
tuner.AddArgumentScalar(static_cast<int>(kSizeN));
tuner.AddArgumentScalar(static_cast<int>(kSizeK));
tuner.AddArgumentInput(mat_a);
tuner.AddArgumentInput(mat_b);
tuner.AddArgumentOutput(mat_c);
Expand All @@ -184,9 +190,9 @@ int main(int argc, char* argv[]) {
tuner.PrintFormatted();

// Also prints the performance of the best-case in terms of GFLOPS
constexpr auto kGFLOP = (2*(long)kSizeM*(long)kSizeN*(long)kSizeK) / (1000.0*1000.0*1000.0);
constexpr auto kMGFLOP = (2*kSizeM*kSizeN*kSizeK) * 1.0e-6;
if (time_ms != 0.0) {
printf("[ -------> ] %.1lf ms or %.3lf GFLOPS\n", time_ms, 1000*kGFLOP/time_ms);
printf("[ -------> ] %.1lf ms or %.3lf GFLOPS\n", time_ms, kMGFLOP/time_ms);
}

// End of the tuner example
Expand Down
2 changes: 1 addition & 1 deletion samples/simple/simple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ int main() {

// Create a random number generator
const auto random_seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(random_seed);
std::default_random_engine generator(static_cast<unsigned int>(random_seed));
std::uniform_real_distribution<float> distribution(-2.0f, 2.0f);

// Populates input data structures
Expand Down
2 changes: 1 addition & 1 deletion src/kernel_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ KernelInfo::KernelInfo(const std::string name, const std::string source, const D
parameters_(),
configurations_(),
constraints_(),
local_memory_(LocalMemory{[] (std::vector<size_t> v) { return 0UL; }, std::vector<std::string>(0)}),
local_memory_(LocalMemory{[] (std::vector<size_t> v) { return size_t{0}; }, std::vector<std::string>(0)}),
device_(device),
global_base_(), local_base_(),
global_(), local_(),
Expand Down
Loading

0 comments on commit 039d9ea

Please sign in to comment.