Skip to content

Commit

Permalink
arm_compute v18.08
Browse files Browse the repository at this point in the history
  • Loading branch information
Jenkins authored and mdigiorgio committed Aug 30, 2018
1 parent e2542c9 commit 52ba29e
Show file tree
Hide file tree
Showing 7,385 changed files with 238,553 additions and 206,814 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2017 ARM Software
Copyright (c) 2017-2018 ARM Software

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@

:warning: **Deprecation notice: QS8 and QS16 data types will be removed in the next release** (As far as we know nobody uses these data types, if you do or think they are useful please open an Issue or send us an email):warning:

Please report issues here: https://github.com/ARM-software/ComputeLibrary/issues
**Make sure you are using the latest version of the library before opening an issue. Thanks**

News:

- We're hiring: Senior Machine Learning C++ Software Engineer in Cambridge (UK)
- We're hiring: Staff Machine Learning C++ Software Engineer in Cambridge (UK)
- Required skills:
- Proficient in C++11.
- Preferred skills:
Expand All @@ -16,7 +14,7 @@ News:
- Experience programming in assembly language.

Interested ? Contact us: [email protected]
- Come talk to us: [Gian Marco will be presenting his work at the EVS](https://www.embedded-vision.com/summit/even-faster-cnns-exploring-new-class-winograd-algorithms)
- [Gian Marco's talk on optimizing CNNs with Winograd algorithms at the EVS](https://www.embedded-vision.com/platinum-members/arm/embedded-vision-training/videos/pages/may-2018-embedded-vision-summit-iodice)

Related projects:

Expand All @@ -27,6 +25,7 @@ Related projects:

Documentation available here:

- [v18.08](https://arm-software.github.io/ComputeLibrary/v18.08/)
- [v18.05](https://arm-software.github.io/ComputeLibrary/v18.05/)
- [v18.03](https://arm-software.github.io/ComputeLibrary/v18.03/)
- [v18.02](https://arm-software.github.io/ComputeLibrary/v18.02/)
Expand All @@ -41,6 +40,8 @@ Documentation available here:

Binaries available here:

- [v18.08-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.08/arm_compute-v18.08-bin-linux.tar.gz)
- [v18.08-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.08/arm_compute-v18.08-bin-android.tar.gz)
- [v18.05-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.05/arm_compute-v18.05-bin-linux.tar.gz)
- [v18.05-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.05/arm_compute-v18.05-bin-android.tar.gz)
- [v18.03-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.03/arm_compute-v18.03-bin-linux.tar.gz)
Expand Down
13 changes: 8 additions & 5 deletions SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import os.path
import re
import subprocess

VERSION = "v18.05"
SONAME_VERSION="11.0.0"
VERSION = "v18.08"
SONAME_VERSION="12.0.0"

Import('env')
Import('vars')
Expand All @@ -43,7 +43,7 @@ def build_library(name, sources, static=False, libs=[]):
library_prefix = obj[0].path[:-(1 + len(SONAME_VERSION))]
real_lib = "%s.%s" % (library_prefix, SONAME_VERSION)

for f in Glob("#%s*" % library_prefix):
for f in Glob("#%s.*" % library_prefix):
if str(f) != real_lib:
symlinks.append("%s/%s" % (directory,str(f)))

Expand Down Expand Up @@ -118,15 +118,16 @@ def create_version_file(target, source, env):
except (OSError, subprocess.CalledProcessError):
git_hash="unknown"

version_filename = "%s/arm_compute_version.embed" % Dir("src/core").path
build_info = "\"arm_compute_version=%s Build options: %s Git hash=%s\"" % (VERSION, vars.args, git_hash.strip())
with open(target[0].get_path(), "w") as fd:
fd.write(build_info)

arm_compute_env = env.Clone()
version_file = arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file)
arm_compute_env.AlwaysBuild(version_file)

# Generate embed files
generate_embed = [ arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file) ]
generate_embed = [ version_file ]
if env['opencl'] and env['embed_kernels']:
cl_files = Glob('src/core/CL/cl_kernels/*.cl')
cl_files += Glob('src/core/CL/cl_kernels/*.h')
Expand Down Expand Up @@ -190,6 +191,7 @@ if env['opencl']:
if env['neon']:
core_files += Glob('src/core/NEON/*.cpp')
core_files += Glob('src/core/NEON/kernels/*.cpp')
core_files += Glob('src/core/NEON/kernels/assembly/*.cpp')

core_files += Glob('src/core/NEON/kernels/arm_gemm/*.cpp')

Expand All @@ -209,6 +211,7 @@ if env['neon']:

runtime_files += Glob('src/runtime/NEON/*.cpp')
runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
runtime_files += Glob('src/runtime/NEON/functions/assembly/*.cpp')

if env['gles_compute']:
if env['os'] != 'android':
Expand Down
4 changes: 2 additions & 2 deletions SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ env.Append(CXXFLAGS = ['-Wno-deprecated-declarations','-Wall','-DARCH_ARM',
'-Wextra','-Wno-unused-parameter','-pedantic','-Wdisabled-optimization','-Wformat=2',
'-Winit-self','-Wstrict-overflow=2','-Wswitch-default',
'-fpermissive','-std=gnu++11','-Wno-vla','-Woverloaded-virtual',
'-Wctor-dtor-privacy','-Wsign-promo','-Weffc++','-Wno-format-nonliteral','-Wno-overlength-strings','-Wno-strict-overflow','-Wno-implicit-fallthrough'])
'-Wctor-dtor-privacy','-Wsign-promo','-Weffc++','-Wno-format-nonliteral','-Wno-overlength-strings','-Wno-strict-overflow'])

env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP'])

Expand All @@ -104,7 +104,7 @@ if env['os'] == 'android' and ( 'clang++' not in cpp_compiler or 'clang' not in
if 'clang++' in cpp_compiler:
env.Append(CXXFLAGS = ['-Wno-format-nonliteral','-Wno-deprecated-increment-bool','-Wno-vla-extension','-Wno-mismatched-tags'])
else:
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel'])
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel','-Wno-implicit-fallthrough'])

if env['cppthreads']:
env.Append(CPPDEFINES = [('ARM_COMPUTE_CPP_SCHEDULER', 1)])
Expand Down
27 changes: 26 additions & 1 deletion arm_compute/core/CL/CLHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ std::string get_underlying_cl_type_from_data_type(const DataType &dt);
*
* @return the GPU target
*/
GPUTarget get_target_from_device(cl::Device &device);
GPUTarget get_target_from_device(const cl::Device &device);

/** Helper function to get the highest OpenCL version supported
*
Expand Down Expand Up @@ -102,5 +102,30 @@ bool fp16_supported(const cl::Device &device);
* @return True if the extension is supported
*/
bool arm_non_uniform_workgroup_supported(const cl::Device &device);
/** Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported
*
* @param[in] device A CL device
*
* @return True if the extension is supported
*/
bool dot8_supported(const cl::Device &device);

/** Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supported
*
* @param[in] device A CL device
*
* @return True if the extension is supported
*/
bool dot8_acc_supported(const cl::Device &device);

/** This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL
*
* @param[in] output_tile Output tile for the Winograd filtering algorithm
* @param[in] kernel_size Kernel size for the Winograd filtering algorithm
* @param[in] data_layout Data layout of the input tensor
*
* @return True if the configuration is supported
*/
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout);
}
#endif /* __ARM_COMPUTE_CLHELPERS_H__ */
26 changes: 22 additions & 4 deletions arm_compute/core/CL/CLKernelLibrary.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,11 @@ class CLKernelLibrary
static CLKernelLibrary &get();
/** Initialises the kernel library.
*
* @param[in] kernel_path (Optional) Path of the directory from which kernel sources are loaded.
* @param[in] context (Optional) CL context used to create programs.
* @param[in] device (Optional) CL device for which the programs are created.
* @param[in] kernel_path Path of the directory from which kernel sources are loaded.
* @param[in] context CL context used to create programs.
* @param[in] device CL device for which the programs are created.
*/
void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault())
void init(std::string kernel_path, cl::Context context, cl::Device device)
{
_kernel_path = std::move(kernel_path);
_context = std::move(context);
Expand Down Expand Up @@ -277,6 +277,12 @@ class CLKernelLibrary
return _context;
}

/** Gets the CL device for which the programs are created. */
cl::Device &get_device()
{
return _device;
}

/** Sets the CL device for which the programs are created.
*
* @param[in] device A CL device.
Expand Down Expand Up @@ -329,6 +335,18 @@ class CLKernelLibrary
*/
void add_built_program(const std::string &built_program_name, cl::Program program);

/** Returns true if FP16 is supported by the CL device
*
* @return true if the CL device supports FP16
*/
bool fp16_supported() const;

/** Returns true if int64_base_atomics extension is supported by the CL device
*
* @return true if the CL device supports int64_base_atomics extension
*/
bool int64_base_atomics_supported() const;

private:
/** Load program and its dependencies.
*
Expand Down
2 changes: 2 additions & 0 deletions arm_compute/core/CL/CLKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h"
#include "arm_compute/core/CL/kernels/CLArithmeticDivisionKernel.h"
#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h"
#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
Expand Down Expand Up @@ -61,6 +62,7 @@
#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
Expand Down
61 changes: 61 additions & 0 deletions arm_compute/core/CL/CLValidate.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __ARM_COMPUTE_CL_VALIDATE_H__
#define __ARM_COMPUTE_CL_VALIDATE_H__

#include "arm_compute/core/Validate.h"

namespace arm_compute
{
#define ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(tensor) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))

#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))

/** Return an error if int64_base_atomics extension is not supported by the device.
*
* @param[in] function Function in which the error occurred.
* @param[in] file Name of the file where the error occurred.
* @param[in] line Line on which the error occurred.
*
* @return Status
*/
inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line)
{
if(!CLKernelLibrary::get().int64_base_atomics_supported())
{
return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported");
}
return arm_compute::Status{};
}

#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));

#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));

} // namespace arm_compute
#endif /* __ARM_COMPUTE_CL_VALIDATE_H__ */
18 changes: 16 additions & 2 deletions arm_compute/core/CL/ICLKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,23 @@ class ICLKernel : public IKernel
{
return 2 + 2 * dimension_size;
}
using IKernel::configure; //Prevent children from calling IKernel::configure() directly
protected:
/** Configure the kernel's window and local workgroup size hint.
*
* @param[in] window The maximum window which will be returned by window()
* @param[in] lws_hint (Optional) Local-Workgroup-Size to use.
*/
void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange())
{
_lws_hint = lws_hint;
IKernel::configure(window);
}

public:
/** Constructor */
ICLKernel()
: _kernel(nullptr), _lws_hint(CLKernelLibrary::get().default_ndrange()), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0)
: _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint()
{
}
/** Returns a reference to the OpenCL kernel of this object.
Expand Down Expand Up @@ -196,6 +208,7 @@ class ICLKernel : public IKernel
*/
void set_lws_hint(const cl::NDRange &lws_hint)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()
_lws_hint = lws_hint;
}

Expand Down Expand Up @@ -282,10 +295,11 @@ class ICLKernel : public IKernel

protected:
cl::Kernel _kernel; /**< OpenCL kernel to run */
cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
GPUTarget _target; /**< The targeted GPU */
std::string _config_id; /**< Configuration ID */
size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
private:
cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
};

/** Add the kernel to the command queue with the given window.
Expand Down
7 changes: 6 additions & 1 deletion arm_compute/core/CL/OpenCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,14 @@
#ifndef ARM_COMPUTE_NO_EXCEPTIONS
#define CL_HPP_ENABLE_EXCEPTIONS
#endif // ARM_COMPUTE_NO_EXCEPTIONS
#define CL_HPP_CL_1_2_DEFAULT_BUILD
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 110
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
#include <CL/cl2.hpp>
#pragma GCC diagnostic pop

namespace cl
{
Expand Down Expand Up @@ -78,6 +82,7 @@ class CLSymbols final
#define DECLARE_FUNCTION_PTR(func_name) \
std::function<decltype(func_name)> func_name##_ptr = nullptr

DECLARE_FUNCTION_PTR(clCreateContext);
DECLARE_FUNCTION_PTR(clCreateContextFromType);
DECLARE_FUNCTION_PTR(clCreateCommandQueue);
DECLARE_FUNCTION_PTR(clGetContextInfo);
Expand Down
4 changes: 2 additions & 2 deletions arm_compute/core/CL/kernels/CLActivationLayerKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ class CLActivationLayerKernel : public ICLKernel
* @note If the output tensor is a nullptr, the activation function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
* of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
* of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*/
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
* of the activation function. Data types supported: QS8/QASYMM8/QS16/F16/F32.
* of the activation function. Data types supported: QASYMM8/F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] act_info Activation layer information.
*
Expand Down
12 changes: 6 additions & 6 deletions arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ class CLArithmeticAdditionKernel : public ICLKernel
~CLArithmeticAdditionKernel() = default;
/** Initialise the kernel's inputs, output and convertion policy.
*
* @param[in] input1 First tensor input. Data types supported: U8/QS8/QS16/S16/F16/F32.
* @param[in] input2 Second tensor input. Data types supported: U8/QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16/F32.
* @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32.
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32.
* @param[in] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
* @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticAdditionKernel
*
* @param[in] input1 First tensor input info. Data types supported: U8/QS8/QS16/S16/F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: U8/QS8 (only if @p input1 is QS8), QS16 (only if @p input1 is QS16), S16/F16/F32.
* @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QS8 (only if both inputs are QS8), QS16 (only if both inputs are QS16), S16/F16/F32.
* @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
* @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
* @param[in] policy Policy to use to handle overflow.
*
* @return a status
Expand Down
Loading

0 comments on commit 52ba29e

Please sign in to comment.