Skip to content

Commit

Permalink
Merge pull request #42 from CNugteren/development
Browse files Browse the repository at this point in the history
Update to version 2.4.0
  • Loading branch information
CNugteren authored Jun 29, 2016
2 parents f1b0900 + a001605 commit 86dbb2e
Show file tree
Hide file tree
Showing 12 changed files with 209 additions and 45 deletions.
65 changes: 65 additions & 0 deletions .appveyor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
environment:
global:
CLTUNE_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\cltune"
OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"
OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl"

platform:
- x64
- x86

configuration:
- Release

init:
- cmake --version
- C:\"Program Files (x86)"\"Microsoft Visual Studio 14.0"\VC\vcvarsall.bat %PLATFORM%

# Creates an OpenCL library to link against. Taken from clMathLibraries/clBLAS
install:
- ps: mkdir $env:OPENCL_ROOT
- ps: pushd $env:OPENCL_ROOT
- ps: $opencl_registry = $env:OPENCL_REGISTRY
# This downloads the source to the Khronos ICD library
- git clone --depth 1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
- ps: mv ./OpenCL-ICD-Loader/* .
# This downloads all the opencl header files
# The cmake build files expect a directory called inc
- ps: mkdir inc/CL
- git clone --depth 1 https://github.com/KhronosGroup/OpenCL-Headers.git inc/CL
- ps: wget $opencl_registry/api/2.1/cl.hpp -OutFile inc/CL/cl.hpp
# - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom }
# Create the static import lib in a directory called lib, so findopencl() will find it
- ps: mkdir lib
- ps: pushd lib
- cmake -G "NMake Makefiles" ..
- nmake
- ps: popd
# Switch to OpenCL 1.2 headers
- ps: pushd inc/CL
- git fetch origin opencl12:opencl12
- git checkout opencl12
- ps: popd
# Rename the inc directory to include, so FindOpencl() will find it
- ps: ren inc include
- ps: popd

before_build:
- ps: mkdir $env:CLTUNE_ROOT
- ps: pushd $env:CLTUNE_ROOT
- ps: mkdir install_dir
- cmake -G "NMake Makefiles" -DCMAKE_INSTALL_PREFIX=install_dir -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DSAMPLES=ON %APPVEYOR_BUILD_FOLDER%

build_script:
- nmake
- nmake install

after_build:
- ps: pushd $env:CLTUNE_ROOT
- 7z a CLTune-Windows.zip .\install_dir\*
- ps: mv CLTune-Windows.zip $env:APPVEYOR_BUILD_FOLDER

artifacts:
- path: '*.zip'
name: release
type: zip
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
build/
build
.*
8 changes: 6 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ language: cpp
sudo: required
dist: trusty

os:
- linux
- osx

compiler:
- gcc
- clang
Expand All @@ -16,7 +20,7 @@ addons:

env:
global:
- CLTUNE_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
- CLTUNE_ROOT=${TRAVIS_BUILD_DIR}/bin/cltune
- OPENCL_REGISTRY=https://www.khronos.org/registry/cl
- OPENCL_ROOT=${TRAVIS_BUILD_DIR}/bin/opencl

Expand Down Expand Up @@ -55,7 +59,7 @@ install:
before_script:
- mkdir -p ${CLTUNE_ROOT}
- pushd ${CLTUNE_ROOT}
- cmake -DSAMPLES=ON -DTESTS=ON -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}
- cmake -DOPENCL_ROOT=${OPENCL_ROOT} -DSAMPLES=ON -DTESTS=ON ${TRAVIS_BUILD_DIR}

script:
- make
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@

Version 2.4.0
- Made it possible to run the unit-tests independently of the provided OpenCL kernel samples
- Added an option to compile in verbose mode for additional diagnostic messages (-DVERBOSE=ON)
- Now using version 6.0 of the CLCudaAPI header
- Fixed the RPATH settings on OSX
- Added Appveyor continuous integration and increased coverage of the Travis builds

Version 2.3.1
- Fixed a bug where an output buffer could not be used as input at the same time
- Fixed computing the validation error for half-precision fp16 data-types
Expand Down
16 changes: 10 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_fla
# CMake project details
project("cltune" CXX)
set(cltune_VERSION_MAJOR 2)
set(cltune_VERSION_MINOR 3)
set(cltune_VERSION_PATCH 1)
set(cltune_VERSION_MINOR 4)
set(cltune_VERSION_PATCH 0)

# Options
option(SAMPLES "Enable compilation of sample programs" ON)
Expand All @@ -48,13 +48,17 @@ else()
message("-- Building with CUDA")
endif()

# Compile in verbose mode with additional diagnostic messages
option(VERBOSE "Compile in verbose mode for additional diagnostic messages" OFF)
if(VERBOSE)
message("-- Building in verbose mode")
add_definitions(-DVERBOSE)
endif()

# ==================================================================================================

# RPATH settings
set(CMAKE_SKIP_BUILD_RPATH false) # Use, i.e. don't skip the full RPATH for the build tree
set(CMAKE_BUILD_WITH_INSTALL_RPATH false) # When building, don't use the install RPATH already
set(CMAKE_INSTALL_RPATH "") # The RPATH to be used when installing
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH false) # Don't add the automatically determined parts
set(CMAKE_MACOSX_RPATH 1)

# ==================================================================================================

Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
CLTune: Automatic OpenCL kernel tuning
================

[![Build Status](https://travis-ci.org/CNugteren/CLTune.svg?branch=master)](https://travis-ci.org/CNugteren/CLTune)
| | master | development |
|-----|-----|-----|
| Linux/OS X | [![Build Status](https://travis-ci.org/CNugteren/CLTune.svg?branch=master)](https://travis-ci.org/CNugteren/CLTune/branches) | [![Build Status](https://travis-ci.org/CNugteren/CLTune.svg?branch=development)](https://travis-ci.org/CNugteren/CLTune/branches) |
| Windows | [![Build Status](https://ci.appveyor.com/api/projects/status/github/cnugteren/cltune?branch=master&svg=true)](https://ci.appveyor.com/project/CNugteren/cltune) | [![Build Status](https://ci.appveyor.com/api/projects/status/github/cnugteren/cltune?branch=development&svg=true)](https://ci.appveyor.com/project/CNugteren/cltune) |

CLTune is a C++ library which can be used to automatically tune your OpenCL and CUDA kernels. The only thing you'll need to provide is a tuneable kernel and a list of allowed parameters and values.

Expand Down
51 changes: 40 additions & 11 deletions include/internal/clpp11.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// Portability here means that a similar header exists for CUDA with the same classes and
// interfaces. In other words, moving from the OpenCL API to the CUDA API becomes a one-line change.
//
// This is version 4.0 of CLCudaAPI <https://github.com/CNugteren/CLCudaAPI>.
// This is version 6.0 of CLCudaAPI <https://github.com/CNugteren/CLCudaAPI>.
//
// =================================================================================================
//
Expand Down Expand Up @@ -214,6 +214,14 @@ class Device {
return true;
}

// Query for a specific type of device or brand
bool IsCPU() const { return Type() == "CPU"; }
bool IsGPU() const { return Type() == "GPU"; }
bool IsAMD() const { return Vendor() == "AMD" || Vendor() == "Advanced Micro Devices, Inc."; }
bool IsNVIDIA() const { return Vendor() == "NVIDIA" || Vendor() == "NVIDIA Corporation"; }
bool IsIntel() const { return Vendor() == "Intel" || Vendor() == "GenuineIntel"; }
bool IsARM() const { return Vendor() == "ARM"; }

// Accessor to the private data-member
const cl_device_id& operator()() const { return device_; }
private:
Expand Down Expand Up @@ -276,10 +284,14 @@ class Context {

// Accessor to the private data-member
const cl_context& operator()() const { return *context_; }
cl_context* pointer() const { return &(*context_); }
private:
std::shared_ptr<cl_context> context_;
};

// Pointer to an OpenCL context
using ContextPointer = cl_context*;

// =================================================================================================

// Enumeration of build statuses of the run-time compilation process
Expand All @@ -290,7 +302,7 @@ class Program {
public:
// Note that there is no constructor based on the regular OpenCL data-type because of extra state

// Regular constructor with memory management
// Source-based constructor with memory management
explicit Program(const Context &context, std::string source):
program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
length_(source.length()),
Expand All @@ -301,6 +313,22 @@ class Program {
CheckError(status);
}

// Binary-based constructor with memory management
explicit Program(const Device &device, const Context &context, const std::string& binary):
program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
length_(binary.length()),
source_(binary),
source_ptr_(&source_[0]) {
auto status1 = CL_SUCCESS;
auto status2 = CL_SUCCESS;
const cl_device_id dev = device();
*program_ = clCreateProgramWithBinary(context(), 1, &dev, &length_,
reinterpret_cast<const unsigned char**>(&source_ptr_),
&status1, &status2);
CheckError(status1);
CheckError(status2);
}

// Compiles the device program and returns whether or not there where any warnings/errors
BuildStatus Build(const Device &device, std::vector<std::string> &options) {
auto options_string = std::accumulate(options.begin(), options.end(), std::string{" "});
Expand Down Expand Up @@ -329,7 +357,7 @@ class Program {
return result;
}

// Retrieves an intermediate representation of the compiled program
// Retrieves a binary or an intermediate representation of the compiled program
std::string GetIR() const {
auto bytes = size_t{0};
CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bytes, nullptr));
Expand All @@ -345,7 +373,7 @@ class Program {
private:
std::shared_ptr<cl_program> program_;
size_t length_;
std::string source_;
std::string source_; // Note: the source can also be a binary or IR
const char* source_ptr_;
};

Expand Down Expand Up @@ -627,15 +655,15 @@ class Kernel {

// Launches a kernel onto the specified queue
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, Event &event) {
const std::vector<size_t> &local, EventPointer event) {
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), local.data(),
0, nullptr, &(event())));
0, nullptr, event));
}

// As above, but with an event waiting list
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, Event &event,
const std::vector<size_t> &local, EventPointer event,
std::vector<Event>& waitForEvents) {
if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); }

Expand All @@ -648,15 +676,16 @@ class Kernel {
// Launches the kernel while waiting for other events
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), local.data(),
waitForEventsPlain.size(), waitForEventsPlain.data(),
&(event())));
static_cast<cl_uint>(waitForEventsPlain.size()),
waitForEventsPlain.data(),
event));
}

// As above, but with the default local workgroup size
void Launch(const Queue &queue, const std::vector<size_t> &global, Event &event) {
void Launch(const Queue &queue, const std::vector<size_t> &global, EventPointer event) {
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), nullptr,
0, nullptr, &(event())));
0, nullptr, event));
}

// Accessor to the private data-member
Expand Down
34 changes: 26 additions & 8 deletions include/internal/cupp11.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// Portability here means that a similar header exists for OpenCL with the same classes and
// interfaces. In other words, moving from the CUDA API to the OpenCL API becomes a one-line change.
//
// This is version 4.0 of CLCudaAPI <https://github.com/CNugteren/CLCudaAPI>.
// This is version 6.0 of CLCudaAPI <https://github.com/CNugteren/CLCudaAPI>.
//
// =================================================================================================
//
Expand Down Expand Up @@ -106,13 +106,14 @@ class Event {
// Accessors to the private data-members
const CUevent& start() const { return *start_; }
const CUevent& end() const { return *end_; }
Event* pointer() { return this; }
private:
std::shared_ptr<CUevent> start_;
std::shared_ptr<CUevent> end_;
};

// Pointer to a CUDA event
using EventPointer = CUevent*;
using EventPointer = Event*;

// =================================================================================================

Expand Down Expand Up @@ -159,6 +160,11 @@ class Device {
CheckError(cuDriverGetVersion(&result));
return "CUDA driver "+std::to_string(result);
}
size_t VersionNumber() const {
auto result = 0;
CheckError(cuDriverGetVersion(&result));
return static_cast<size_t>(result);
}
std::string Vendor() const { return "NVIDIA Corporation"; }
std::string Name() const {
auto result = std::string{};
Expand Down Expand Up @@ -206,6 +212,14 @@ class Device {
return true;
}

// Query for a specific type of device or brand
bool IsCPU() const { return false; }
bool IsGPU() const { return true; }
bool IsAMD() const { return false; }
bool IsNVIDIA() const { return true; }
bool IsIntel() const { return false; }
bool IsARM() const { return false; }

// Accessor to the private data-member
const CUdevice& operator()() const { return device_; }
private:
Expand Down Expand Up @@ -239,10 +253,14 @@ class Context {

// Accessor to the private data-member
const CUcontext& operator()() const { return *context_; }
CUcontext* pointer() const { return &(*context_); }
private:
std::shared_ptr<CUcontext> context_;
};

// Pointer to an OpenCL context
using ContextPointer = CUcontext*;

// =================================================================================================

// Enumeration of build statuses of the run-time compilation process
Expand All @@ -253,7 +271,7 @@ class Program {
public:
// Note that there is no constructor based on the regular CUDA data-type because of extra state

// Regular constructor with memory management
// Source-based constructor with memory management
explicit Program(const Context &, std::string source):
program_(new nvrtcProgram, [](nvrtcProgram* p) { CheckError(nvrtcDestroyProgram(p));
delete p; }),
Expand Down Expand Up @@ -555,7 +573,7 @@ class Kernel {

// Launches a kernel onto the specified queue
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, Event &event) {
const std::vector<size_t> &local, EventPointer event) {

// Creates the grid (number of threadblocks) and sets the block sizes (threads per block)
auto grid = std::vector<size_t>{1, 1, 1};
Expand All @@ -571,24 +589,24 @@ class Kernel {
}

// Launches the kernel, its execution time is recorded by events
CheckError(cuEventRecord(event.start(), queue()));
CheckError(cuEventRecord(event->start(), queue()));
CheckError(cuLaunchKernel(kernel_, grid[0], grid[1], grid[2], block[0], block[1], block[2],
0, queue(), pointers.data(), nullptr));
CheckError(cuEventRecord(event.end(), queue()));
CheckError(cuEventRecord(event->end(), queue()));
}

// As above, but with an event waiting list
// TODO: Implement this function
void Launch(const Queue &queue, const std::vector<size_t> &global,
const std::vector<size_t> &local, Event &event,
const std::vector<size_t> &local, EventPointer event,
std::vector<Event>& waitForEvents) {
if (waitForEvents.size() == 0) { return Launch(queue, global, local, event); }
Error("launching with an event waiting list is not implemented for the CUDA back-end");
}

// As above, but with the default local workgroup size
// TODO: Implement this function
void Launch(const Queue &, const std::vector<size_t> &, Event &) {
void Launch(const Queue &, const std::vector<size_t> &, EventPointer) {
Error("launching with a default workgroup size is not implemented for the CUDA back-end");
}

Expand Down
Loading

0 comments on commit 86dbb2e

Please sign in to comment.