diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..01a1f0eadc --- /dev/null +++ b/.clang-format @@ -0,0 +1,48 @@ +--- +Language: Cpp +AccessModifierOffset: '0' +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: 'true' +AlignConsecutiveDeclarations: 'true' +AlignEscapedNewlinesLeft: 'true' +AlignTrailingComments: 'true' +AllowShortBlocksOnASingleLine: 'false' +AllowShortCaseLabelsOnASingleLine: 'false' +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: 'false' +AllowShortLoopsOnASingleLine: 'false' +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: 'true' +AlwaysBreakTemplateDeclarations: 'true' +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Allman +BreakBeforeTernaryOperators: 'false' +BreakConstructorInitializersBeforeComma: 'false' +#BreakStringLiterals: 'true' +ConstructorInitializerAllOnOneLineOrOnePerLine: 'true' +Cpp11BracedListStyle: 'false' +DerivePointerAlignment: 'false' +IndentCaseLabels: 'true' +IndentWidth: '4' +IndentWrappedFunctionNames: 'false' +KeepEmptyLinesAtTheStartOfBlocks: 'false' +MaxEmptyLinesToKeep: '1' +NamespaceIndentation: None +PointerAlignment: Right +SortIncludes: 'true' +SpaceAfterCStyleCast: 'false' +SpaceBeforeAssignmentOperators: 'true' +SpaceBeforeParens: Never +SpaceInEmptyParentheses: 'false' +SpacesInAngles: 'false' +SpacesInCStyleCastParentheses: 'false' +SpacesInParentheses: 'false' +SpacesInSquareBrackets: 'false' +Standard: Cpp11 +TabWidth: '4' +UseTab: Never +ReflowComments: 'false' +ContinuationIndentWidth: '4' +ColumnLimit: 0 +--- diff --git a/README.md b/README.md new file mode 100644 index 0000000000..389cb02e6d --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ + +Please report issues here: https://github.com/ARM-software/ComputeLibrary/issues + +Documentation available here: [v17.03.1](https://arm-software.github.io/ComputeLibrary/v17.03.1/) + +Support: developer@arm.com + diff --git a/SConstruct b/SConstruct new file mode 100644 index 0000000000..7a1caa32b8 --- /dev/null +++ b/SConstruct @@ -0,0 +1,23 @@ +# Copyright (c) 2016, 2017 ARM Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +SConscript('sconscript', variant_dir='build', duplicate=0) diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h new file mode 100644 index 0000000000..cf6555296b --- /dev/null +++ b/arm_compute/core/AccessWindowAutoPadding.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__ +#define __ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class Window; +class TensorInfo; + +/** Dummy access window. + * + * This implementation always uses the auto padding of the tensor info and + * never updates the window. The valid region is always set to cover the entire + * tensor. + * + * @note This access window is only used during the migration to the new + * padding system. It will be removed once all kernels have been ported. + * + * */ +class AccessWindowAutoPadding : public IAccessWindow +{ +public: + /** Default constructor. + * + * @param[in,out] info Tensor info of the accessed kernel. + */ + AccessWindowAutoPadding(TensorInfo *info); + AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete; + AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete; + AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default; + AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default; + ~AccessWindowAutoPadding() = default; + + void set_valid_region(); + + // Inherited methods overridden: + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + void set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) override; + +private: + TensorInfo *_info; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__*/ diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h new file mode 100644 index 0000000000..3898eb2199 --- /dev/null +++ b/arm_compute/core/AccessWindowStatic.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class Window; +class TensorInfo; + +/** Implementation of a static rectangular access pattern. + * + * In this implementation the access offsets and sizes are not relative to the + * current element. Instead they are considered to be absolute coordinates + * within the accessed tensor's shape. + * + * */ +class AccessWindowStatic : public IAccessWindow +{ +public: + /** Constructor for a static access pattern. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] start_x Start of the access in X direction. + * @param[in] start_y Start of the access in Y direction. + * @param[in] end_x End of the access in X direction. + * @param[in] end_y End of the access in Y direction. + */ + AccessWindowStatic(TensorInfo *info, int start_x, int start_y, int end_x, int end_y); + + AccessWindowStatic(const AccessWindowStatic &) = delete; + AccessWindowStatic &operator=(const AccessWindowStatic &) = delete; + AccessWindowStatic(AccessWindowStatic &&) = default; + AccessWindowStatic &operator=(AccessWindowStatic &&) = default; + ~AccessWindowStatic() = default; + + // Inherited methods overriden: + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + void set_valid_region(const Window &window, ValidRegion input_valid_region); + void set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) override; + + TensorInfo *_info; + int _start_x; + int _start_y; + int _end_x; + int _end_y; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__*/ diff --git a/arm_compute/core/AccessWindowTranspose.h b/arm_compute/core/AccessWindowTranspose.h new file mode 100644 index 0000000000..d3803aad54 --- /dev/null +++ b/arm_compute/core/AccessWindowTranspose.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class Window; +class TensorInfo; + +/** Implementation of a XY-transpose access pattern. */ +class AccessWindowTranspose : public AccessWindowRectangle +{ +public: + using AccessWindowRectangle::AccessWindowRectangle; + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + using AccessWindowRectangle::set_valid_region; + void set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) override; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__*/ diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h new file mode 100644 index 0000000000..230685cb7e --- /dev/null +++ b/arm_compute/core/CL/CLHelpers.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHELPERS_H__ +#define __ARM_COMPUTE_CLHELPERS_H__ + +#include + +namespace arm_compute +{ +enum class DataType; + +/** Max vector width of an OpenCL vector */ +static constexpr const unsigned int max_cl_vector_width = 16; + +/** Translates a tensor data type to the appropriate OpenCL type. + * + * @param[in] dt @ref DataType to be translated to OpenCL type. + * + * @return The string specifying the OpenCL type to be used. + */ +std::string get_cl_type_from_data_type(const DataType &dt); +} +#endif diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h new file mode 100644 index 0000000000..c29610c252 --- /dev/null +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_H__ +#define __ARM_COMPUTE_CLKERNELLIBRARY_H__ + +#include "arm_compute/core/CL/OpenCL.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Program class */ +class Program +{ +public: + /** Default constructor. */ + Program(); + /** Construct program from source file. + * + * @param[in] context CL context used to create the program. + * @param[in] name Program name. + * @param[in] source Program source. + */ + Program(cl::Context context, std::string name, std::string source); + /** Construct program from binary file. + * + * @param[in] context CL context used to create the program. + * @param[in] device CL device for which the programs are created. + * @param[in] name Program name. + * @param[in] binary Program binary. + */ + Program(cl::Context context, cl::Device device, std::string name, std::vector binary); + /** Default Copy Constructor. */ + Program(const Program &) = default; + /** Default Move Constructor. */ + Program(Program &&) = default; + /** Default copy assignment operator. */ + Program &operator=(const Program &) = default; + /** Default move assignment operator. */ + Program &operator=(Program &&) = default; + /**Returns program name. + * + * @return Program's name. + */ + std::string name() const + { + return _name; + } + /** User-defined conversion to the underlying CL program. + * + * @return The CL program object. + */ + explicit operator cl::Program() const; + + static bool build(const cl::Program &program, const std::string &build_options = ""); + /** Build the underlying CL program. + * + * @param[in] build_options Options used to build the CL program. + * + * @return A reference to itself. + */ + cl::Program build(const std::string &build_options = "") const; + +private: + cl::Context _context; /**< Underlying CL context. */ + cl::Device _device; /**< CL device for which the programs are created. */ + bool _is_binary; /**< Create program from binary? */ + std::string _name; /**< Program name. */ + std::string _source; /**< Source code for the program. */ + std::vector _binary; /**< Binary from which to create the program. */ +}; + +/** Kernel class */ +class Kernel +{ +public: + /** Default Constructor. */ + Kernel(); + /** Default Copy Constructor. */ + Kernel(const Kernel &) = default; + /** Default Move Constructor. */ + Kernel(Kernel &&) = default; + /** Default copy assignment operator. */ + Kernel &operator=(const Kernel &) = default; + /** Default move assignment operator. */ + Kernel &operator=(Kernel &&) = default; + /** Constructor. + * + * @param[in] name Kernel name. + * @param[in] program Built program. + */ + Kernel(std::string name, const cl::Program &program); + /** Returns kernel name. + * + * @return Kernel's name. + */ + std::string name() const + { + return _name; + } + /** Returns OpenCL kernel. + * + * @return OpenCL Kernel. + */ + explicit operator cl::Kernel() const + { + return _kernel; + } + +private: + std::string _name; /**< Kernel name */ + cl::Kernel _kernel; /**< OpenCL Kernel */ +}; + +/** CLKernelLibrary class */ +class CLKernelLibrary +{ + using StringSet = std::set; + +private: + /** Default Constructor. */ + CLKernelLibrary(); + +public: + /** Prevent instances of this class from being copied. */ + CLKernelLibrary(const CLKernelLibrary &) = delete; + /** Prevent instances of this class from being copied. */ + const CLKernelLibrary &operator=(const CLKernelLibrary &) = delete; + /** Access the KernelLibrary singleton. + * @return The KernelLibrary instance. + */ + static CLKernelLibrary &get(); + /** Initialises the kernel library. + * + * @param[in] kernel_path (Optional) Path of the directory from which kernel sources are loaded. + * @param[in] context (Optional) CL context used to create programs. + * @param[in] device (Optional) CL device for which the programs are created. + */ + void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault()) + { + _kernel_path = std::move(kernel_path); + _context = std::move(context); + _device = std::move(device); + } + /** Sets the path that the kernels reside in. + * + * @param[in] kernel_path Path of the kernel. + */ + void set_kernel_path(const std::string &kernel_path) + { + _kernel_path = kernel_path; + }; + /** Sets the CL context used to create programs. + * + * @note Setting the context also resets the device to the + * first one available in the new context. + * + * @param[in] context A CL context. + */ + void set_context(cl::Context context) + { + _context = std::move(context); + + const auto cl_devices = _context.getInfo(); + + if(cl_devices.empty()) + { + _device = cl::Device(); + } + else + { + _device = cl_devices[0]; + } + }; + /** Sets the CL device for which the programs are created. + * + * @param[in] device A CL device. + */ + void set_device(cl::Device device) + { + _device = std::move(device); + }; + /** Creates a kernel from the kernel library. + * + * @param[in] kernel_name Kernel name. + * @param[in] build_options_set Kernel build options as a set. + * + * @return The created kernel. + */ + Kernel create_kernel(const std::string &kernel_name, const StringSet &build_options_set = {}) const; + /** Serializes and saves programs to a binary. + * + */ + void save_binary(); + /** Load serialized binary with all the programs. + * + */ + void load_binary(); + +private: + /** Load program and its dependencies. + * + * @param[in] program_name Name of the program to load. + */ + const Program &load_program(const std::string &program_name) const; + /** Concatenates contents of a set into a single string. + * + * @param[in] s Input set to concatenate. + * + * @return Concatenated string. + */ + std::string stringify_set(const StringSet &s) const; + + cl::Context _context; /**< Underlying CL context. */ + cl::Device _device; /**< Underlying CL device. */ + std::string _kernel_path; /**< Path to the kernels folder. */ + mutable std::map _programs_map; /**< Map with all already loaded program data. */ + mutable std::map _built_programs_map; /**< Map with all already built program data. */ + static const std::map _kernel_program_map; /**< Map that associates kernel names with programs. */ + static const std::map _program_source_map; /**< Contains sources for all programs. + Used for compile-time kernel inclusion. >*/ +}; +} +#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */ diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h new file mode 100644 index 0000000000..7da1bb5bf0 --- /dev/null +++ b/arm_compute/core/CL/CLKernels.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLKERNELS_H__ +#define __ARM_COMPUTE_CLKERNELS_H__ + +/* Header regrouping all the CL kernels */ + +#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" +#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" +#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" +#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" +#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" +#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" +#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" +#include "arm_compute/core/CL/kernels/CLDilateKernel.h" +#include "arm_compute/core/CL/kernels/CLErodeKernel.h" +#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" +#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" +#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" +#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLRemapKernel.h" +#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" +#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" +#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" + +#endif /* __ARM_COMPUTE_CLKERNELS_H__ */ diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h new file mode 100644 index 0000000000..1b676ed5a3 --- /dev/null +++ b/arm_compute/core/CL/ICLArray.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLARRAY_H__ +#define __ARM_COMPUTE_ICLARRAY_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/ITensor.h" + +namespace arm_compute +{ +/** Interface for OpenCL Array */ +template +class ICLArray : public IArray +{ +public: + /* Constructor */ + explicit ICLArray(size_t max_num_values) + : IArray(max_num_values), _mapping(nullptr) + { + } + + ICLArray(const ICLArray &) = delete; + ICLArray &operator=(const ICLArray &) = delete; + virtual ~ICLArray() = default; + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the array's data. + * + * @return A reference to an OpenCL buffer containing the array's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true) + { + _mapping = do_map(q, blocking); + } + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q) + { + do_unmap(q, _mapping); + _mapping = nullptr; + } + + // Inherited methods overridden: + T *buffer() const override + { + return reinterpret_cast(_mapping); + } + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping Pointer to the buffer to be unmapped. + */ + virtual void do_unmap(cl::CommandQueue &q, uint8_t *mapping) = 0; + +private: + uint8_t *_mapping; +}; + +using ICLKeyPointArray = ICLArray; +using ICLCoordinates2DArray = ICLArray; +using ICLDetectionWindowArray = ICLArray; +using ICLSize2DArray = ICLArray; +using ICLUInt8Array = ICLArray; +using ICLUInt16Array = ICLArray; +using ICLUInt32Array = ICLArray; +using ICLInt16Array = ICLArray; +using ICLInt32Array = ICLArray; +using ICLFloatArray = ICLArray; +} +#endif /*__ARM_COMPUTE_ICLARRAY_H__*/ diff --git a/arm_compute/core/CL/ICLDistribution1D.h b/arm_compute/core/CL/ICLDistribution1D.h new file mode 100644 index 0000000000..8fbbbbf548 --- /dev/null +++ b/arm_compute/core/CL/ICLDistribution1D.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution1D.h" + +#include +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** ICLDistribution1D interface class */ +class ICLDistribution1D : public IDistribution1D +{ +public: + /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + ICLDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLDistribution1D(const ICLDistribution1D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + const ICLDistribution1D &operator=(const ICLDistribution1D &) = delete; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the distribution's data. + * + * @return A reference to an OpenCL buffer containing the distribution's data. + */ + virtual cl::Buffer &cl_buffer() = 0; + // Inherited methods overridden: + uint32_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint32_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +protected: + uint32_t *_mapping; /**< The distribution data. */ +}; +} +#endif /* __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h new file mode 100644 index 0000000000..f2cbb2b219 --- /dev/null +++ b/arm_compute/core/CL/ICLKernel.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLKERNEL_H__ +#define __ARM_COMPUTE_ICLKERNEL_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/IKernel.h" + +namespace arm_compute +{ +class ICLTensor; +class Window; + +/** Common interface for all the OpenCL kernels */ +class ICLKernel : public IKernel +{ +public: + /** Constructor */ + ICLKernel(); + /** Returns a reference to the OpenCL kernel of this object. + * + * @return A reference to the OpenCL kernel of this object. + */ + cl::Kernel &kernel(); + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per 1D tensor object. + * + * @return The number of arguments enqueues per 1D tensor object. + */ + unsigned int num_arguments_per_1D_tensor() const; + /** Returns the number of arguments enqueued per 2D tensor object. + * + * @return The number of arguments enqueues per 2D tensor object. + */ + unsigned int num_arguments_per_2D_tensor() const; + /** Returns the number of arguments enqueued per 3D tensor object. + * + * @return The number of arguments enqueues per 3D tensor object. + */ + unsigned int num_arguments_per_3D_tensor() const; + /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue. + * + * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + virtual void run(const Window &window, cl::CommandQueue &queue) = 0; + /** Add the passed parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set. + * @param[in] value Value to set as an argument of the object's kernel. + */ + template + void add_argument(unsigned int &idx, T value) + { + _kernel.setArg(idx++, value); + } + +private: + /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per tensor object. + * + * @return The number of arguments enqueued per tensor object. + */ + template + unsigned int num_arguments_per_tensor() const; + +protected: + cl::Kernel _kernel; /**< OpenCL kernel to run */ + cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ +}; + +/** Add the kernel to the command queue with the given window. + * + * @note Depending on the size of the window, this might translate into several jobs being enqueued. + * + * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. + * + * @param[in,out] queue OpenCL command queue. + * @param[in] kernel Kernel to enqueue + * @param[in] window Window the kernel has to process. + * @param[in] lws_hint Local workgroup size requested, by default (128,1) + * + * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. + */ +void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = cl::Range_128_1); +} +#endif /*__ARM_COMPUTE_ICLKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLLut.h b/arm_compute/core/CL/ICLLut.h new file mode 100644 index 0000000000..2016ebb5c3 --- /dev/null +++ b/arm_compute/core/CL/ICLLut.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLLUT_H__ +#define __ARM_COMPUTE_ICLLUT_H__ + +#include "arm_compute/core/ILut.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL LUT */ +class ICLLut : public ILut +{ +public: + ICLLut(); + ICLLut(const ICLLut &) = delete; + ICLLut &operator=(const ICLLut &) = delete; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the lut's data. + * + * @return A reference to an OpenCL buffer containing the lut's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + + // Inherited methods overridden: + uint8_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; +} +#endif /*__ARM_COMPUTE_ICLLUT_H__ */ diff --git a/arm_compute/core/CL/ICLMultiImage.h b/arm_compute/core/CL/ICLMultiImage.h new file mode 100644 index 0000000000..e8705b1824 --- /dev/null +++ b/arm_compute/core/CL/ICLMultiImage.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLMULTIIMAGE_H__ +#define __ARM_COMPUTE_ICLMULTIIMAGE_H__ + +#include "arm_compute/core/IMultiImage.h" + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for OpenCL multi-planar images */ +class ICLMultiImage : public IMultiImage +{ +public: + /** Return a pointer to the requested OpenCL plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A pointer pointed to the OpenCL plane + */ + virtual ICLImage *cl_plane(unsigned int index) = 0; + /** Return a constant pointer to the requested OpenCL plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A constant pointer pointed to the OpenCL plane + */ + virtual const ICLImage *cl_plane(unsigned int index) const = 0; + + // Inherited methods overridden: + IImage *plane(unsigned int index) override; + const IImage *plane(unsigned int index) const override; +}; +} +#endif /*__ARM_COMPUTE_ICLMULTIIMAGE_H__ */ diff --git a/arm_compute/core/CL/ICLSimple2DKernel.h b/arm_compute/core/CL/ICLSimple2DKernel.h new file mode 100644 index 0000000000..a1366fb211 --- /dev/null +++ b/arm_compute/core/CL/ICLSimple2DKernel.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ +class ICLSimple2DKernel : public ICLSimpleKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLSimple3DKernel.h b/arm_compute/core/CL/ICLSimple3DKernel.h new file mode 100644 index 0000000000..5e981027de --- /dev/null +++ b/arm_compute/core/CL/ICLSimple3DKernel.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. + * Both input tensor and output tensor must have at least 3 dimensions. + */ +class ICLSimple3DKernel : public ICLSimple2DKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/arm_compute/core/CL/ICLSimpleKernel.h new file mode 100644 index 0000000000..986c86fcb8 --- /dev/null +++ b/arm_compute/core/CL/ICLSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLEKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" + +namespace arm_compute +{ +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */ +class ICLSimpleKernel : public ICLKernel +{ +public: + /** Constructor. */ + ICLSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLSimpleKernel(const ICLSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete; + /** Allow instances of this class to be moved. */ + ICLSimpleKernel(ICLSimpleKernel &&) = default; + /** Allow instances of this class to be moved. */ + ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default; + /** Default destructor */ + ~ICLSimpleKernel() = default; + + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] processed_elements Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const ICLTensor *input, ICLTensor *output, unsigned int processed_elements, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const ICLTensor *_input; + ICLTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_ICLSIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h new file mode 100644 index 0000000000..301f66667f --- /dev/null +++ b/arm_compute/core/CL/ICLTensor.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLTENSOR_H__ +#define __ARM_COMPUTE_ICLTENSOR_H__ + +#include "arm_compute/core/ITensor.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL tensor */ +class ICLTensor : public ITensor +{ +public: + ICLTensor(); + ICLTensor(const ICLTensor &) = delete; + ICLTensor &operator=(const ICLTensor &) = delete; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the image's data. + * + * @return A reference to an OpenCL buffer containing the image's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + /** Clear the contents of the tensor synchronously. + * + * @param[in,out] q The CL command queue to use for the clear operation. + */ + void clear(cl::CommandQueue &q); + + // Inherited methods overridden: + uint8_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; + +using ICLImage = ICLTensor; +} +#endif /*__ARM_COMPUTE_ICLTENSOR_H__ */ diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h new file mode 100644 index 0000000000..9d39e5dfa3 --- /dev/null +++ b/arm_compute/core/CL/OpenCL.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_OPENCL_H__ +#define __ARM_COMPUTE_OPENCL_H__ + +/* Configure the Khronos C++ wrapper to target OpenCL 1.2: */ +#define CL_HPP_ENABLE_EXCEPTIONS +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 110 +#define CL_HPP_MINIMUM_OPENCL_VERSION 110 +#include + +namespace cl +{ +static const NDRange Range_128_1 = NDRange(128, 1); +} +#endif /* __ARM_COMPUTE_OPENCL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..e8bd6aac7f --- /dev/null +++ b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ +#define __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the absolute difference kernel. + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class CLAbsoluteDifferenceKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved. */ + CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved. */ + CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~CLAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output images. + * + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1. */ + const ICLTensor *_input2; /**< Source tensor 2. */ + ICLTensor *_output; /**< Destination tensor. */ +}; +} +#endif /* __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h new file mode 100644 index 0000000000..5c8ffdb404 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLAccumulateKernel.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_CLACCUMULATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the accumulate kernel. + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class CLAccumulateKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, ICLTensor *accum); +}; + +/** Interface for the accumulate weighted kernel. + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class CLAccumulateWeightedKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation images, and the scale value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, float alpha, ICLTensor *accum); +}; + +/** Interface for the accumulate squared kernel. + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class CLAccumulateSquaredKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); +}; +} +#endif /*__ARM_COMPUTE_CLACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h new file mode 100644 index 0000000000..887d31f852 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the activation layer kernel. */ +class CLActivationLayerKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer information. + */ + void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); +}; +} +#endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h new file mode 100644 index 0000000000..7d736cdf44 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ +#define __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the arithmetic addition kernel + * + * Arithmetic addition is computed by: + * @f[ output(x,y) = input1(x,y) + input2(x,y) @f] + */ +class CLArithmeticAdditionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArithmeticAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticAdditionKernel(const CLArithmeticAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticAdditionKernel &operator=(const CLArithmeticAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArithmeticAdditionKernel(CLArithmeticAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArithmeticAdditionKernel &operator=(CLArithmeticAdditionKernel &&) = default; + /** Default destructor */ + ~CLArithmeticAdditionKernel() = default; + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h new file mode 100644 index 0000000000..afecf6ed7d --- /dev/null +++ b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ +#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the arithmetic subtraction kernel + * + * Arithmetic subtraction is computed by: + * @f[ output(x,y) = input1(x,y) - input2(x,y) @f] + */ +class CLArithmeticSubtractionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArithmeticSubtractionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticSubtractionKernel(const CLArithmeticSubtractionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticSubtractionKernel &operator=(const CLArithmeticSubtractionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArithmeticSubtractionKernel(CLArithmeticSubtractionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArithmeticSubtractionKernel &operator=(CLArithmeticSubtractionKernel &&) = default; + /** Default destructor */ + ~CLArithmeticSubtractionKernel() = default; + + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h new file mode 100644 index 0000000000..624c422abc --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise AND operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class CLBitwiseAndKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h new file mode 100644 index 0000000000..c9026022e1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise NOT operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class CLBitwiseNotKernel : public ICLSimple2DKernel +{ +public: + /** Set the inputs and output images. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h new file mode 100644 index 0000000000..fe8710fbc1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEORKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise OR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class CLBitwiseOrKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h new file mode 100644 index 0000000000..f4e0b4df60 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise XOR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class CLBitwiseXorKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h new file mode 100644 index 0000000000..0960f7487a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBOX3X3KERNEL_H__ +#define __ARM_COMPUTE_CLBOX3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the box 3x3 filter kernel. + * + */ +class CLBox3x3Kernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + //Inherited methods overriden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLBOX3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h new file mode 100644 index 0000000000..ea27d264c2 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ +#define __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform Gradient computation. + */ +class CLGradientKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLGradientKernel(const CLGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLGradientKernel &operator=(const CLGradientKernel &) = delete; + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and mag must all be the same size (either 16 or 32). + * + * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. + * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. + * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + * @param[in] num_pixel_to_skip_prev Number of pixels to skip of previous stage if border_mode = UNDEFINED + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type, int32_t num_pixel_to_skip_prev, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_gx; /**< Source tensor - Gx component */ + const ICLTensor *_gy; /**< Source tensor - Gy component */ + ICLTensor *_magnitude; /**< Destination tensor - Magnitude */ + ICLTensor *_phase; /**< Destination tensor - Quantized phase */ + unsigned int _pixels_to_skip; /**< Pixels to skip around the border. */ +}; + +/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. + * + * @note Hysteresis is computed in @ref CLEdgeTraceKernel + */ +class CLEdgeNonMaxSuppressionKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. + * @param[out] output Destination tensor + * @param[in] lower_thr Lower threshold. + * @param[in] num_pixel_to_skip_prev Number of pixels to skip of previous stage if border_mode = UNDEFINED + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, int32_t num_pixel_to_skip_prev, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */ + const ICLTensor *_phase; /**< Source tensor - Quantized phase. */ + ICLTensor *_output; /**< Destination tensor. */ + unsigned int _pixels_to_skip; /**< Pixels to skip around the border. */ +}; + +/** OpenCL kernel to perform Edge tracing. + */ +class CLEdgeTraceKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. + * Expected to be initialized to 0 before each run. + * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. + * Expected to be initialized to 0 before each run. + * @param[in] num_pixel_to_skip_prev Number of pixels to skip of previous stage if border_mode = UNDEFINED. + public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter, + int32_t num_pixel_to_skip_prev, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Source tensor. */ + ICLTensor *_output; /**< Destination tensor. */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */ + ICLTensor *_visited; /**< Marks visited elements */ + ICLTensor *_recorded; /**< Marks recorded elements */ + ICLTensor *_l1_stack; /**< L1 hysteris stack */ + ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */ + unsigned int _pixels_to_skip; /**< Pixels to skip */ +}; +} +#endif /* __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h new file mode 100644 index 0000000000..3e718a2f1a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ +#define __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel combine kernel */ +class CLChannelCombineKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel(const CLChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel(CLChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default; + /** Default destructor */ + ~CLChannelCombineKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output tensor. + */ + void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + std::array _planes; + ICLTensor *_output; + ICLMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; +}; +} +#endif /* __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h new file mode 100644 index 0000000000..3e9e699a50 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ +#define __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel extract kernel */ +class CLChannelExtractKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel(const CLChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel(CLChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default; + /** Default destructor */ + ~CLChannelExtractKernel() = default; + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Must be of U8 format. + */ + void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. + * @param[in] channel Channel to extract. + * @param[out] output Single-planar 2D destination image. Must be of U8 format. + */ + void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _num_elems_processed_per_iteration; + uint32_t _subsampling; +}; +} +#endif /* __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h new file mode 100644 index 0000000000..9d445e3004 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOL2IMKERNEL_H__ +#define __ARM_COMPUTE_CLCOL2IMKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the col2im reshaping kernel. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class CLCol2ImKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel(const CLCol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCol2ImKernel(CLCol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default; + /** Default destructor */ + ~CLCol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: F16, F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + std::pair _convolved_dims; +}; +} + +#endif /*__ARM_COMPUTE_CLCOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h new file mode 100644 index 0000000000..a88e2dcdf3 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLColorConvertKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ +#define __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the color convert kernel. + * + */ +class CLColorConvertKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel(const CLColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLColorConvertKernel(CLColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default; + /** Default destructor. */ + ~CLColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor + * @param[out] output Destination tensor + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input multi-planar source image + * @param[out] output single-planar destination image + */ + void configure(const ICLMultiImage *input, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] input single-planar source image + * @param[out] output multi-planar destination image + */ + void configure(const ICLImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input multi-planar source image + * @param[out] output multi-planar destination image + */ + void configure(const ICLMultiImage *input, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /*pointer to single planar tensor input */ + ICLTensor *_output; /*pointer to single planar tensor output */ + const ICLMultiImage *_multi_input; /*pointer to multi-planar input */ + ICLMultiImage *_multi_output; /*pointer to multi-planar output */ +}; +} + +#endif /* __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h new file mode 100644 index 0000000000..9c0908405a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ +#define __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class CLConvolutionKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a 3x3 convolution to a tensor. */ +using CLConvolution3x3Kernel = CLConvolutionKernel<3>; +/** Interface for the kernel which applies a 5x5 convolution to a tensor. */ +using CLConvolution5x5Kernel = CLConvolutionKernel<5>; +/** Interface for the kernel which applies a 7x7 convolution to a tensor. */ +using CLConvolution7x7Kernel = CLConvolutionKernel<7>; +/** Interface for the kernel which applies a 9x9 convolution to a tensor. */ +using CLConvolution9x9Kernel = CLConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel +{ +public: + /** Default Constructor */ + CLSeparableConvolutionHorKernel(); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: S16. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class CLConvolutionRectangleKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h new file mode 100644 index 0000000000..0ad0c0db32 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_CLCONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface for the weights reshape kernel used by convolution and fully connected layers. + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref CLIm2ColKernel can transform a convolution into a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class CLConvolutionLayerWeightsReshapeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLConvolutionLayerWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionLayerWeightsReshapeKernel(const CLConvolutionLayerWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionLayerWeightsReshapeKernel &operator=(const CLConvolutionLayerWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLConvolutionLayerWeightsReshapeKernel(CLConvolutionLayerWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLConvolutionLayerWeightsReshapeKernel &operator=(CLConvolutionLayerWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~CLConvolutionLayerWeightsReshapeKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data types supported: F16, F32 + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM]. Data types supported: Same as @p input + * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + */ + void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_biases; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLCONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h new file mode 100644 index 0000000000..2c3b1b8b69 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth conversion kernel. + * + */ +class CLDepthConvertKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * + * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. + * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); +}; +} + +#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h new file mode 100644 index 0000000000..17552aefbe --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDerivativeKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ +#define __ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the derivative kernel. */ +class CLDerivativeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDerivativeKernel(const CLDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDerivativeKernel(CLDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default; + /** Default destructor */ + ~CLDerivativeKernel() = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */ + bool _run_derivative_x; /**< Do we need to run Derivative X ? */ + bool _run_derivative_y; /**< Do we need to run Derivative Y ? */ +}; +} +#endif /*__ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h new file mode 100644 index 0000000000..a5d3beb02f --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDilateKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDILATEKERNEL_H__ +#define __ARM_COMPUTE_CLDILATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the dilate kernel. + * + */ +class CLDilateKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLDILATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h new file mode 100644 index 0000000000..a43c925be6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLErodeKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLERODEKERNEL_H__ +#define __ARM_COMPUTE_CLERODEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the erode kernel. + * + */ +class CLErodeKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLERODEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h new file mode 100644 index 0000000000..9817b78ae0 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLFastCornersKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ +#define __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** CL kernel to perform fast corners */ +class CLFastCornersKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel(const CLFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFastCornersKernel(CLFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default; + /** Default destructor */ + ~CLFastCornersKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Output image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_mode Strategy to use for borders. + */ + void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLImage *_input; + ICLImage *_output; +}; + +/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */ +class CLCopyToArrayKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCopyToArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default; + /** Default destructor */ + ~CLCopyToArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[in] update_number Flag to indicate whether we need to update the number of corners + * @param[out] corners Array of keypoints to store the results. + * @param[out] num_buffers Number of keypoints to store the results. + */ + void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< source image */ + ICLKeyPointArray *_corners; /**< destination array */ + cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */ +}; +} +#endif /* __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h new file mode 100644 index 0000000000..797f86dae8 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFILLBORDERKERNEL_H__ +#define __ARM_COMPUTE_CLFILLBORDERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for filling the border of a kernel */ +class CLFillBorderKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel(const CLFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFillBorderKernel(CLFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default; + /** Default destructor */ + ~CLFillBorderKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in,out] tensor Tensor to process Data types supported: U8, S16, S32, F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + /** Function to set the constant value on fill border kernel depending on type. + * + * @param[in] idx Index of the kernel argument to set. + * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. + */ + template + void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + bool is_parallelisable() const override; + +private: + ICLTensor *_tensor; +}; +} +#endif /*__ARM_COMPUTE_CLFILLBORDERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..498f9cf6ee --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ +#define __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which interleaves the elements of a matrix A in chunk of 4x4 + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, width / 4 ] + */ +class CLGEMMInterleave4x4Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMInterleave4x4Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMInterleave4x4Kernel(const CLGEMMInterleave4x4Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMInterleave4x4Kernel &operator=(const CLGEMMInterleave4x4Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMInterleave4x4Kernel(CLGEMMInterleave4x4Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMInterleave4x4Kernel &operator=(CLGEMMInterleave4x4Kernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/F16/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h new file mode 100644 index 0000000000..f84d0638da --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to compute low precision matrix multiplication kernel + * + * This kernel performs the following computation: + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + */ +class CLGEMMLowpMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyKernel(const CLGEMMLowpMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyKernel &operator=(const CLGEMMLowpMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyKernel(CLGEMMLowpMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyKernel &operator=(CLGEMMLowpMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel. + * These two kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A. Data types supported: U8 + * @param[in] input1 Input tensor containing the transposed Matrix B. Data types supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication, Data types supported: same as @p input0 + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Offset to be added to each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h new file mode 100644 index 0000000000..ea1db9f831 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface to add a bias to each row of the input tensor + * + */ +class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixAccumulateBiasesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input + */ + void configure(ICLTensor *accum, const ICLTensor *biases); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_accum; + const ICLTensor *_biases; +}; +} + +#endif /*__ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..c808039567 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta. + * The matrices must have the same dimensions + * + * @note This kernel is computed if and only if beta != 0.0. + */ +class CLGEMMMatrixAdditionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAdditionKernel(const CLGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAdditionKernel &operator=(const CLGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAdditionKernel(CLGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAdditionKernel &operator=(CLGEMMMatrixAdditionKernel &&) = default; + /** Initialise the kernel's input, output and beta value + * + * @note The input and output tensors must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref CLGEMMMatrixMultiplyKernel. Data type supported: same as @p input + * @param[in] beta Weight of matrix C + */ + void configure(const ICLTensor *input, ICLTensor *output, float beta); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} + +#endif /* __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..07ea3c12ac --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref CLGEMMInterleave4x4Kernel" and @ref CLGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class CLGEMMMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..f70a0ae253 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16. + * + * Following an example of how the transposition1xW works when the input data type is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * Following an example of how the transposition1xW works when the input data type is F16 + * + * @f[ + * \left( \begin{array}{cccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a7 \\ + * a10 & a11 & a12 & a13 & a14 & a15 & a16 & 17 \\ + * a20 & a21 & a22 & a23 & a24 & a25 & a26 & 27 \\ + * a30 & a31 & a32 & a33 & a34 & a35 & a36 & 37 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ + * \end{array} \right) + * @f] + * + * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ] + * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ] + * @note If the input data type is U8, the output matrix will have the following shape: [ height * 16, width / 16 ] + * + */ +class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/F16/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h new file mode 100644 index 0000000000..028a10b421 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Gaussian 3x3 filter kernel. + * + */ +class CLGaussian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h new file mode 100644 index 0000000000..1484c06311 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ + +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5HorKernel::configure; +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5VertKernel::configure; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h new file mode 100644 index 0000000000..5bb8051add --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */ +class CLGaussianPyramidHorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */ +class CLGaussianPyramidVertKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U16. + * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h new file mode 100644 index 0000000000..d8057df8d1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ +#define __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the harris score kernel. + * + * @note The implementation supports 3, 5, and 7 for the block_size. + */ +class CLHarrisScoreKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default; + /** Default destructor */ + ~CLHarrisScoreKernel() = default; + + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) + * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) + * @param[out] output Destination image (harris score). Data types supported F32 + * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +protected: + const ICLImage *_input1; /**< Source image - Gx component */ + const ICLImage *_input2; /**< Source image - Gy component */ + ICLImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; +} +#endif /* __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h new file mode 100644 index 0000000000..b65e62d9a2 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHistogramKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__ +#define __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLDistribution1D; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16. + * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel + */ +class CLHistogramKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel(const CLHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel &operator=(const CLHistogramKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramKernel(CLHistogramKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramKernel &operator=(CLHistogramKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; + +/** Interface to run the histogram kernel to handle the leftover part of image + * + */ +class CLHistogramBorderKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; +} + +#endif /* __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__*/ diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h new file mode 100644 index 0000000000..d2224b53e1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLIM2COLKERNEL_H__ +#define __ARM_COMPUTE_CLIM2COLKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * = + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class CLIm2ColKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel(const CLIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIm2ColKernel(CLIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] convolved_dims The convolved output dimensions. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + */ + void configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_reduced(const Window &window, cl::CommandQueue &queue); + /** run the generic convolution layer input reshape kernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_generic(const Window &window, cl::CommandQueue &queue); + + /** Common signature for the kernel to run */ + using Im2ColFunction = void (CLIm2ColKernel::*)(const Window &, cl::CommandQueue &); + +private: + const ICLTensor *_input; + ICLTensor *_output; + std::pair _convolved_dims; + PadStrideInfo _conv_info; + int _kernel_size; + unsigned int _num_elems_processed_per_iteration; + Im2ColFunction _run_func; +}; +} + +#endif /*__ARM_COMPUTE_CLIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h new file mode 100644 index 0000000000..0f53c2d2a8 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ +#define __ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to run the horizontal pass of the integral image kernel. */ +class CLIntegralImageHorKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; + +/** Interface to run the vertical pass of the integral image kernel. */ +class CLIntegralImageVertKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIntegralImageVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in,out] in_out The input/output tensor. Data types supported: U32 + */ + void configure(ICLTensor *in_out); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_in_out; +}; +} +#endif /*__ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h new file mode 100644 index 0000000000..ef21694d57 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLKTRACKERKERNEL_H__ +#define __ARM_COMPUTE_CLLKTRACKERKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Internal keypoint structure for Lucas-Kanade Optical Flow */ +struct CLLKInternalKeypoint +{ + float x{ 0.f }; /**< x coordinate of the keypoint */ + float y{ 0.f }; /**< y coordinate of the keypoint */ + float tracking_status{ 0.f }; /**< the tracking status of the keypoint */ + float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ +}; + +/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */ +struct CLCoefficientTable +{ + float A11; /**< iA11 * FLT_SCALE */ + float A12; /**< iA11 * FLT_SCALE */ + float A22; /**< iA11 * FLT_SCALE */ + float min_eig; /**< Minimum eigenvalue */ +}; + +/** Structure for storing ival, ixval and iyval for each point inside the window */ +struct CLOldValue +{ + int16_t ival; /**< ival extracts from old image */ + int16_t ixval; /**< ixval extracts from scharr Gx image */ + int16_t iyval; /**< iyval extracts from scharr Gy image */ + int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ +}; + +using ICLLKInternalKeypointArray = ICLArray; +using ICLCoefficientTableArray = ICLArray; +using ICLOldValArray = ICLArray; + +/** Interface to run the initialization step of LKTracker */ +class CLLKTrackerInitKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points + * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points + * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */ +class CLLKTrackerFinalizeKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points + */ + void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */ +class CLLKTrackerStage0Kernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 + * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points + * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[out] old_ival Pointer to the array holding internal values + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + * @param[in] border_offset The offset used to define the boundary of the tracked pixels in different border modes + */ + void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level, int32_t border_offset); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */ +class CLLKTrackerStage1Kernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points + * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[in] old_ival Pointer to the array holding internal values + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminating the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + * @param[in] border_offset The offset used to define the boundary of the tracked pixels in different border modes + */ + void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level, int32_t border_offset); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_CLLKTRACKERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h new file mode 100644 index 0000000000..a8e1dcb361 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ +#define __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Template interface for the kernel to compute magnitude and phase. + * + */ +class CLMagnitudePhaseKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLMagnitudePhaseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default; + /** Initialise the kernel's input, output. + * + * @note At least one of output1 or output2 must be set. + * + * @param[in] gx The input gradient X tensor. Data types supported: S16. + * @param[in] gy The input gradient Y tensor. Data types supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Input gradient X. */ + const ICLTensor *_gy; /**< Input gradient Y. */ + ICLTensor *_magnitude; /**< Output - Magnitude. */ + ICLTensor *_phase; /**< Output - Phase. */ + bool _run_mag; /**< Calculate magnitude ? */ + bool _run_phase; /**< Calculate phase ? */ +}; +} + +#endif /* __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h new file mode 100644 index 0000000000..9f30f76e1b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ +#define __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class CLMeanStdDevKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). + */ + void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + float *_mean; + float *_stddev; + cl::Buffer *_global_sum; + cl::Buffer *_global_sum_squared; +}; +} +#endif /* __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h new file mode 100644 index 0000000000..5af364b6c6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ +#define __ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the median 3x3 filter kernel. + * + */ +class CLMedian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h new file mode 100644 index 0000000000..6a31f3cf18 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ +#define __ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to perform min max search on an image. + */ +class CLMinMaxKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel(const CLMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxKernel(CLMinMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input Image. Data types supported: U8 or S16. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32. + */ + void configure(const ICLImage *input, cl::Buffer *min_max); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Input image. */ + cl::Buffer *_min_max; /**< Minimum/maximum value. */ + std::array _data_type_max_min; /**< Maximum and minimum data type value respectively. */ +}; + +/** Interface for the kernel to find min max locations of an image. + */ +class CLMinMaxLocationKernel : public ICLKernel +{ +public: + /** Constructor */ + CLMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] input Input image. Data types supported: U8 or S16. + * @param[in] min_max Buffer of 2 elements which contains the min value at position 0 and the max value at position 1. Data type supported: S32 + * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + */ + void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, + ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< Input image. */ + cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */ +}; +} +#endif /*__ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h new file mode 100644 index 0000000000..0c59063bbc --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ +#define __ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to apply a non-linear filter */ +class CLNonLinearFilterKernel : public ICLSimple2DKernel +{ +public: + /** Default constructor */ + CLNonLinearFilterKernel(); + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; +} +#endif /*__ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..1719bbbb47 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ +#define __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL + * + * @note Used by @ref CLFastCorners and @ref CLHarrisCorners + */ +class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /* __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h new file mode 100644 index 0000000000..ca9034b162 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the normalization layer kernel. + */ +class CLNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16, F32. + * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data types should match the input type. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ICLTensor *input, const ICLTensor *squared_input, ICLTensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + const ICLTensor *_squared_input; + ICLTensor *_output; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..dd96aaeb2e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pixelwise multiplication kernel. + * + */ +class CLPixelWiseMultiplicationKernel : public ICLKernel +{ +public: + /** Default constructor.*/ + CLPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F16, F32. + * @param[in] input2 An input tensor. Data types supported: U8, S16, F16, F32. + * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h new file mode 100644 index 0000000000..546a40b15e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pooling layer kernel */ +class CLPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + PoolingLayerInfo _pool_info; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h new file mode 100644 index 0000000000..7cebf2e817 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLRemapKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLREMAPKERNEL_H__ +#define __ARM_COMPUTE_CLREMAPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a remap on a tensor */ +class CLRemapKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLRemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLRemapKernel(const CLRemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLRemapKernel &operator=(const CLRemapKernel &) = delete; + /** Allow instances of this class to be moved */ + CLRemapKernel(CLRemapKernel &&) = default; + /** Allow instances of this class to be moved */ + CLRemapKernel &operator=(CLRemapKernel &&) = default; + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_map_x; + const ICLTensor *_map_y; +}; +} +#endif /*__ARM_COMPUTE_CLREMAPKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h new file mode 100644 index 0000000000..e74a7cb82a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLScaleKernel.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCALEKERNEL_H__ +#define __ARM_COMPUTE_CLSCALEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the warp affine kernel.*/ +class CLScaleKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's inputs, output and interpolation policy + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * + * @param[in] input Source tensor. Data types supported: U8, S16. + * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy Interpolation type to use + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} + +#endif /*__ARM_COMPUTE_CLSCALEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h new file mode 100644 index 0000000000..fe245cc351 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ +#define __ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -3 & 0 & +3\\ + * -10& 0 & +10\\ + * -3 & 0 & +3 + * \end{vmatrix} + * @f] + * @f[ + * \mathbf{G}_y=\begin{vmatrix} + * -3 & -10 & -3\\ + * 0 & 0 & 0\\ + * +3 & +10 & +3 + * \end{vmatrix} + * @f] + */ +class CLScharr3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ICLTensor *_input; /**< Input image */ + ICLTensor *_output_x; /**< Output image for scharr X */ + ICLTensor *_output_y; /**< Output image for scharr Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h new file mode 100644 index 0000000000..9edeb6ceff --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */ +class CLSobel3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default; + /** Default destructor */ + ~CLSobel3x3Kernel() = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor for Sobel X */ + ICLTensor *_output_y; /**< Output tensor for Sobel Y */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h new file mode 100644 index 0000000000..e90f8f587e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h new file mode 100644 index 0000000000..e5ef8444ee --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h new file mode 100644 index 0000000000..0806974ad6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the identifying the max value of 1D Logits */ +class CLLogits1DMaxKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; + +/** Interface for shifting the logits values around the max value and exponentiating the result */ +class CLLogits1DShiftExpSumKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLogits1DShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[in] max Max values tensor. Matching input type and channel number. + * @param[out] output Destination tensor. Matching input type and channel number. + * @param[out] sum Sum of 1D logits tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_max; + ICLTensor *_output; + ICLTensor *_sum; +}; + +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class CLLogits1DNormKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Matching input type and channel number. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_sum; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h new file mode 100644 index 0000000000..477f58dc38 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLTableLookupKernel.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; +class ICLLut; + +/** Interface for the kernel to perform table lookup calculations. */ +class CLTableLookupKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. Data types supported: U8, S16. + * @param[out] output The output tensor. Data types supported: U8, S16. + */ + void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h new file mode 100644 index 0000000000..d7a6ae2cdb --- /dev/null +++ b/arm_compute/core/CL/kernels/CLThresholdKernel.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTHRESHOLDKERNEL_H__ +#define __ARM_COMPUTE_CLTHRESHOLDKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the thresholding kernel. + * + */ +class CLThresholdKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. + * @param[in] false_value value to set when the condition is not respected. + * @param[in] true_value value to set when the condition is respected. + * @param[in] type Thresholding type. Either RANGE or BINARY. + * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + */ + void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h new file mode 100644 index 0000000000..9ad183f8f1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ +#define __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class CLTransposeKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h new file mode 100644 index 0000000000..05d6d0a8f7 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ +#define __ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the warp affine kernel.*/ +class CLWarpAffineKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h new file mode 100644 index 0000000000..5c5013c599 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ +#define __ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; +/** Interface for the warp perspective kernel.*/ +class CLWarpPerspectiveKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} + +#endif /*__ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ */ diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h new file mode 100644 index 0000000000..99ae68f2e5 --- /dev/null +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICPPKERNEL_H__ +#define __ARM_COMPUTE_ICPPKERNEL_H__ + +#include "arm_compute/core/IKernel.h" + +namespace arm_compute +{ +class Window; + +/** Common interface for all kernels implemented in C++ */ +class ICPPKernel : public IKernel +{ +public: + /** Default destructor */ + virtual ~ICPPKernel() = default; + + /** Execute the kernel on the passed window + * + * @warning If is_parallelisable() returns false then the passed window must be equal to window() + * + * @note The window has to be a region within the window returned by the window() method + * + * @note The width of the window has to be a multiple of num_elems_processed_per_iteration(). + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + */ + virtual void run(const Window &window) = 0; +}; +} +#endif /*__ARM_COMPUTE_ICPPKERNEL_H__ */ diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h new file mode 100644 index 0000000000..3c33c4d371 --- /dev/null +++ b/arm_compute/core/CPP/ICPPSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ +#define __ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */ +class ICPPSimpleKernel : public ICPPKernel +{ +public: + /** Constructor */ + ICPPSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICPPSimpleKernel(const ICPPSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete; + /** Allow instances of this class to be moved */ + ICPPSimpleKernel(ICPPSimpleKernel &&) = default; + /** Allow instances of this class to be moved */ + ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default; + /** Default destructor */ + ~ICPPSimpleKernel() = default; + +protected: + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] processed_elements Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const ITensor *input, ITensor *output, unsigned int processed_elements, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const ITensor *_input; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h new file mode 100644 index 0000000000..0866d4ee57 --- /dev/null +++ b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ +#define __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** CPP kernel to perform corner candidates + */ +class CPPCornerCandidatesKernel : public INEKernel +{ +public: + /** Default constructor */ + CPPCornerCandidatesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPCornerCandidatesKernel(const CPPCornerCandidatesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPCornerCandidatesKernel &operator=(const CPPCornerCandidatesKernel &) = delete; + /** Allow instances of this class to be moved */ + CPPCornerCandidatesKernel(CPPCornerCandidatesKernel &&) = default; + /** Allow instances of this class to be moved */ + CPPCornerCandidatesKernel &operator=(CPPCornerCandidatesKernel &&) = default; + /** Default destructor */ + ~CPPCornerCandidatesKernel() = default; + + /** Setup the kernel parameters + * + * @param[in] input Source image (harris score). Format supported F32 + * @param[out] output Destination array of InternalKeypoint + * @param[out] num_corner_candidates Number of corner candidates + */ + void configure(const IImage *input, InternalKeypoint *output, int32_t *num_corner_candidates); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + int32_t *_num_corner_candidates; /**< Number of corner candidates */ + std::mutex _corner_candidates_mutex; /**< Mutex to preventing race conditions */ + const IImage *_input; /**< Source image - Harris score */ + InternalKeypoint *_output; /**< Array of NEInternalKeypoint */ +}; +} //namespace arm_compute +#endif /* __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h new file mode 100644 index 0000000000..dab0192f07 --- /dev/null +++ b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ +#define __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +/** CPP kernel to perform sorting and euclidean distance */ +class CPPSortEuclideanDistanceKernel : public INEKernel +{ +public: + /** Default constructor */ + CPPSortEuclideanDistanceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPSortEuclideanDistanceKernel(const CPPSortEuclideanDistanceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPSortEuclideanDistanceKernel &operator=(const CPPSortEuclideanDistanceKernel &) = delete; + /** Allow instances of this class to be moved */ + CPPSortEuclideanDistanceKernel(CPPSortEuclideanDistanceKernel &&) = default; + /** Allow instances of this class to be moved */ + CPPSortEuclideanDistanceKernel &operator=(CPPSortEuclideanDistanceKernel &&) = default; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in,out] in_out Input internal keypoints. Marked as out as the kernel writes 0 in the strength member. + * @param[out] output Output keypoints. + * @param[in] num_corner_candidates Pointer to the number of corner candidates in the input array + * @param[in] min_distance Radial Euclidean distance to use + */ + void configure(InternalKeypoint *in_out, IKeyPointArray *output, const int32_t *num_corner_candidates, float min_distance); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const int32_t *_num_corner_candidates; /**< Number of corner candidates */ + float _min_distance; /**< Radial Euclidean distance */ + InternalKeypoint *_in_out; /**< Source array of InternalKeypoint */ + IKeyPointArray *_output; /**< Destination array of NEKeyPointArray */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ */ diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h new file mode 100644 index 0000000000..c93626568f --- /dev/null +++ b/arm_compute/core/Coordinates.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_COORDINATES_H__ +#define __ARM_COMPUTE_COORDINATES_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Coordinates of an item */ +class Coordinates : public Dimensions +{ +public: +#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ + /** Constructor to initialize the coordinates. + * + * @param[in] coords Values to initialize the dimensions. + */ + template + constexpr Coordinates(Ts... coords) + : Dimensions{ coords... } + { + } +#endif + /** Allow instances of this class to be copy constructed */ + constexpr Coordinates(const Coordinates &) = default; + /** Allow instances of this class to be copied */ + Coordinates &operator=(const Coordinates &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Coordinates(Coordinates &&) = default; + /** Allow instances of this class to be moved */ + Coordinates &operator=(Coordinates &&) = default; + /** Default destructor */ + ~Coordinates() = default; +}; +} +#endif /*__ARM_COMPUTE_COORDINATES_H__*/ diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h new file mode 100644 index 0000000000..87050d2215 --- /dev/null +++ b/arm_compute/core/Dimensions.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DIMENSIONS_H__ +#define __ARM_COMPUTE_DIMENSIONS_H__ + +#include "arm_compute/core/Error.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/* Constant value used to indicate maximum dimensions of a Window, TensorShape and Coordinates */ +constexpr size_t MAX_DIMS = 6; + +/** Dimensions with dimensionality */ +template +class Dimensions +{ +public: + /** Number of dimensions the tensor has */ + static constexpr size_t num_max_dimensions = MAX_DIMS; + +#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ + /** Constructor to initialize the tensor shape. + * + * @param[in] dims Values to initialize the dimensions. + */ + template + Dimensions(Ts... dims) + : _id{ { dims... } }, _num_dimensions{ sizeof...(dims) } + { + } +#endif + /** Allow instances of this class to be copy constructed */ + Dimensions(const Dimensions &) = default; + /** Allow instances of this class to be copied */ + Dimensions &operator=(const Dimensions &) = default; + /** Allow instances of this class to be move constructed */ + Dimensions(Dimensions &&) = default; + /** Allow instances of this class to be moved */ + Dimensions &operator=(Dimensions &&) = default; + /** Pure virtual destructor */ + virtual ~Dimensions() = 0; + /** Accessor to set the value of one of the dimensions. + * + * @param[in] dimension Dimension for which the value is set. + * @param[in] value Value to be set for the dimension. + */ + void set(size_t dimension, T value) + { + ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions); + _id[dimension] = value; + _num_dimensions = std::max(_num_dimensions, dimension + 1); + } + /** Alias to access the size of the first dimension */ + T x() const + { + return _id[0]; + } + /** Alias to access the size of the second dimension */ + T y() const + { + return _id[1]; + } + /** Alias to access the size of the third dimension */ + T z() const + { + return _id[2]; + } + /** Generic accessor to get the size of any dimension + * + * @note Precondition: dimension < Dimensions::num_max_dimensions + * + * @param[in] dimension Dimension of the wanted size + * + * @return The size of the requested dimension. + */ + T operator[](size_t dimension) const + { + ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions); + return _id[dimension]; + } + /** Returns the effective dimensionality of the tensor */ + inline unsigned int num_dimensions() const + { + return _num_dimensions; + } + + /** Set number of dimensions */ + inline void set_num_dimensions(size_t num_dimensions) + { + _num_dimensions = num_dimensions; + } + +protected: + std::array _id; + size_t _num_dimensions{ 0 }; +}; + +template +inline Dimensions::~Dimensions() +{ +} +} +#endif /*__ARM_COMPUTE_DIMENSIONS_H__*/ diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h new file mode 100644 index 0000000000..a5895015ce --- /dev/null +++ b/arm_compute/core/Error.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ERROR_H__ +#define __ARM_COMPUTE_ERROR_H__ + +/** Print the given message then throw an std::runtime_error. + * + * @param[in] ... Message to display before aborting. + */ +#define ARM_COMPUTE_ERROR(...) ::arm_compute::error(__func__, __FILE__, __LINE__, __VA_ARGS__) // NOLINT + +/** Print the given message then throw an std::runtime_error. + * + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + * @param[in] ... Message to display before aborting. + */ +#define ARM_COMPUTE_ERROR_LOC(func, file, line, ...) ::arm_compute::error(func, file, line, __VA_ARGS__) // NOLINT + +/** To avoid unused variables warnings + * + * This is useful if for example a variable is only used + * in debug builds and generates a warning in release builds. + * + * @param[in] var Variable which is unused + */ +#define ARM_COMPUTE_UNUSED(var) (void)(var) + +#ifdef ARM_COMPUTE_ASSERTS_ENABLED +/** If the condition is true, the given message is printed and an exception is thrown + * + * @param[in] cond Condition to evaluate. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_ERROR_ON_MSG(cond, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_ERROR(__VA_ARGS__); \ + } \ + } while(0) + +/** If the condition is true, the given message is printed and an exception is thrown + * + * @param[in] cond Condition to evaluate. + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_ERROR_LOC(func, file, line, __VA_ARGS__); \ + } \ + } while(0) + +/** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned + * + * @param[in] cond Condition to evaluate. + * @param[in] val Value to be returned. + * @param[in] msg Message to print if cond is false. + */ +#define ARM_COMPUTE_CONST_ON_ERROR(cond, val, msg) (cond) ? throw std::logic_error(msg) : val; +#else /* ARM_COMPUTE_ASSERTS_ENABLED */ +#define ARM_COMPUTE_ERROR_ON_MSG(cond, ...) +#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) +#define ARM_COMPUTE_CONST_ON_ERROR(cond, val, msg) val +#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ + +/** If the condition is true then an error message is printed and an exception thrown + * + * @param[in] cond Condition to evaluate + */ +#define ARM_COMPUTE_ERROR_ON(cond) \ + ARM_COMPUTE_ERROR_ON_MSG(cond, #cond) + +/** If the condition is true then an error message is printed and an exception thrown + * + * @param[in] cond Condition to evaluate + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + */ +#define ARM_COMPUTE_ERROR_ON_LOC(cond, func, file, line) \ + ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, #cond) + +namespace arm_compute +{ +/** Print an error message then throw an std::runtime_error + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] msg Message to display before aborting. + * @param[in] ... Variable number of arguments of the message. + */ +[[noreturn]] void error(const char *function, const char *file, const int line, const char *msg, ...); +} + +#endif /* __ARM_COMPUTE_ERROR_H__ */ diff --git a/arm_compute/core/HOGInfo.h b/arm_compute/core/HOGInfo.h new file mode 100644 index 0000000000..654629306d --- /dev/null +++ b/arm_compute/core/HOGInfo.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HOGINFO_H__ +#define __ARM_COMPUTE_HOGINFO_H__ + +#include "arm_compute/core/Size2D.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Store the HOG's metadata */ +class HOGInfo +{ +public: + /** Default constructor */ + HOGInfo(); + /** Default destructor */ + virtual ~HOGInfo() = default; + /** Allow instances of this class to be copy constructed */ + HOGInfo(const HOGInfo &) = default; + /** Allow instances of this class to be copied */ + HOGInfo &operator=(const HOGInfo &) = default; + /** Allow instances of this class to be move constructed */ + HOGInfo(HOGInfo &&) = default; + /** Allow instances of this class to be moved */ + HOGInfo &operator=(HOGInfo &&) = default; + /** Constructor + * + * @param[in] cell_size Cell size in pixels + * @param[in] block_size Block size in pixels. Must be a multiple of cell_size. + * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride. + * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size + * @param[in] num_bins Number of histogram bins for each cell + * @param[in] normalization_type (Optional) Normalization type to use for each block + * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method + * @param[in] phase_type (Optional) Type of @ref PhaseType + */ + HOGInfo(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins, + HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED); + /** Initialize the metadata structure with the given parameters + * + * @param[in] cell_size Cell size in pixels + * @param[in] block_size Block size in pixels. Must be a multiple of cell_size. + * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride. + * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size + * @param[in] num_bins Number of histogram bins for each cell + * @param[in] normalization_type (Optional) Normalization type to use for each block + * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method + * @param[in] phase_type (Optional) Type of @ref PhaseType + */ + void init(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins, + HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED); + /** The cell size in pixels + * + * @return The cell size in pixels + */ + const Size2D &cell_size() const; + /** The block size in pixels + * + * @return The block size in pixels + */ + const Size2D &block_size() const; + /** The detection window size in pixels + * + * @return The detection window size in pixels + */ + const Size2D &detection_window_size() const; + /** The block stride in pixels. The block stride is the distance between 2 consecutive blocks + * + * @return The block stride in pixels + */ + const Size2D &block_stride() const; + /** The number of histogram bins for each cell + * + * @return The number of histogram bins for each cell + */ + size_t num_bins() const; + /** The normalization type + * + * @return The normalization type + */ + HOGNormType normalization_type() const; + /** Threshold used for L2HYS_NORM normalization type + * + * @return Threshold used for L2HYS_NORM normalization type + */ + float l2_hyst_threshold() const; + /** The type of @ref PhaseType + * + * @return The type of @ref PhaseType + */ + PhaseType phase_type() const; + /** The size of HOG descriptor + * + * @return The size of HOG descriptor + */ + size_t descriptor_size() const; + /** Calculates the number of cells for each block + * + * @return The Size2D data object which stores the number of cells along the x and y directions + */ + Size2D num_cells_per_block() const; + /** Calculates the number of blocks for the given image size + * + * @param[in] image_size The input image size data object + * + * @return The Size2D data object which stores the number of blocks along the x and y directions + */ + Size2D num_blocks_per_image(const Size2D &image_size) const; + +private: + Size2D _cell_size; + Size2D _block_size; + Size2D _detection_window_size; + Size2D _block_stride; + size_t _num_bins; + HOGNormType _normalization_type; + float _l2_hyst_threshold; + PhaseType _phase_type; + size_t _descriptor_size; +}; +} +#endif /*__ARM_COMPUTE_HOGINFO_H__ */ diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h new file mode 100644 index 0000000000..44cf30c762 --- /dev/null +++ b/arm_compute/core/Helpers.h @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HELPERS_H__ +#define __ARM_COMPUTE_HELPERS_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/Steps.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Window.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace arm_compute +{ +class IKernel; +class ITensor; +class TensorInfo; + +namespace cpp14 +{ +#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ +template +struct _Unique_if +{ + typedef std::unique_ptr _Single_object; +}; + +template +struct _Unique_if +{ + typedef std::unique_ptr _Unknown_bound; +}; + +template +struct _Unique_if +{ + typedef void _Known_bound; +}; + +template +typename _Unique_if::_Single_object +make_unique(Args &&... args) +{ + return std::unique_ptr(new T(std::forward(args)...)); +} + +template +typename _Unique_if::_Unknown_bound +make_unique(size_t n) +{ + typedef typename std::remove_extent::type U; + return std::unique_ptr(new U[n]()); +} + +template +typename _Unique_if::_Known_bound +make_unique(Args &&...) = delete; +#endif /* DOXYGEN_SKIP_THIS */ +} +} + +namespace +{ +/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value. Format: Single channel U8 + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy); + +/** Return the pixel at (x,y) using bilinear interpolation. The image must be single channel U8 + * + * @warning Only works if the iterator was created with an IImage + * + * @param[in[ first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in[ stride Stride in bytes of the image; + * + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using bilinear interpolation. + */ +inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y); + +/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel U8 + * + * @warning Only works if the iterator was created with an IImage + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using bilinear interpolation. + */ +inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y); + +/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 + * + * @note The interpolation area depends on the width and height ration of the input and output images + * @note Currently average of the contributing pixels is calculated + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] wr Width ratio among the input image width and output image width. + * @param[in] hr Height ratio among the input image height and output image height. + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using area interpolation. + */ +inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y); + +/** Performs clamping among a lower and upper value. + * + * @param[in] n Value to clamp. + * @param[in] lower Lower threshold. + * @param[in] upper Upper threshold. + * + * @return Clamped value. + */ +template +inline T clamp(const T &n, const T &lower, const T &upper) +{ + return std::max(lower, std::min(n, upper)); +} + +/** Base case of for_each. Does nothing. */ +template +inline void for_each(F &&) +{ +} + +/** Call the function for each of the arguments + * + * @param[in] func Function to be called + * @param[in] arg Argument passed to the function + * @param[in] args Remaining arguments + */ +template +inline void for_each(F &&func, T &&arg, Ts &&... args) +{ + func(arg); + for_each(func, args...); +} + +/** Base case of foldl. Return value. */ +template +inline T foldl(F &&, T &&value) +{ + return value; +} + +/** Fold left. + * + * @param[in] func Function to be called + * @param[in] initial Initial value + * @param[in] value Argument passed to the function + * @param[in] values Remaining arguments + */ +template +inline I foldl(F &&func, I &&initial, T &&value, Ts &&... values) +{ + return foldl(func, func(initial, value), values...); +} +} + +namespace arm_compute +{ +/** Iterator updated by @ref execute_window_loop for each window element */ +class Iterator +{ +public: + /** Default constructor to create an empty iterator */ + constexpr Iterator(); + /** Create a container iterator for the metadata and allocation contained in the ITensor + * + * @param[in] tensor The tensor to associate to the iterator. + * @param[in] window The window which will be used to iterate over the tensor. + */ + Iterator(const ITensor *tensor, const Window &window); + + /** Increment the iterator along the specified dimension of the step value associated to the dimension. + * + * @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow. + * + * @note When incrementing a dimension 'n' the coordinates of all the dimensions in the range (0,n-1) are reset. For example if you iterate over a 2D image, everytime you change row (dimension 1), the iterator for the width (dimension 0) is reset to its start. + * + * @param[in] dimension Dimension to increment + */ + void increment(size_t dimension); + + /** Return the offset in bytes from the first element to the current position of the iterator + * + * @return The current position of the iterator in bytes relative to the first element. + */ + constexpr int offset() const; + + /** Return a pointer to the current pixel. + * + * @warning Only works if the iterator was created with an ITensor. + * + * @return equivalent to buffer() + offset() + */ + constexpr uint8_t *ptr() const; + + /** Move the iterator back to the beginning of the specified dimension. + * + * @param[in] dimension Dimension to reset + */ + void reset(size_t dimension); + +private: + uint8_t *_ptr; + + class Dimension + { + public: + constexpr Dimension() + : _dim_start(0), _stride(0) + { + } + + int _dim_start; + int _stride; + }; + + std::array _dims; +}; + +/** Iterate through the passed window, automatically adjusting the iterators and calling the lambda_functino for each element. + * It passes the x and y positions to the lambda_function for each iteration + * + * @param[in] w Window to iterate through. + * @param[in] lambda_function The function of type void(function)( const Coordinates & id ) to call at each iteration. + * Where id represents the absolute coordinates of the item to process. + * @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function. + */ +template +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators); + +/** Update window and padding size for each of the access patterns. + * + * First the window size is reduced based on all access patterns that are not + * allowed to modify the padding of the underlying tensor. Then the padding of + * the remaining tensors is increased to match the window. + * + * @param[in] win Window that is used by the kernel. + * @param[in] patterns Access patterns used to calculate the final window and padding. + * + * @return True if the window has been changed. Changes to the padding do not + * influence the returned value. + */ +template +bool update_window_and_padding(Window &win, Ts &&... patterns) +{ + bool window_changed = false; + + for_each([&](const IAccessWindow & w) + { + window_changed |= w.update_window_if_needed(win); + }, + patterns...); + + bool padding_changed = false; + + for_each([&](const IAccessWindow & w) + { + padding_changed |= w.update_padding_if_needed(win); + }, + patterns...); + + return window_changed; +} + +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window(const TensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. The border region will be excluded from the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window_horizontal(const TensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Intersect multiple valid regions. + * + * @param[in] regions Valid regions. + * + * @return Intersection of all regions. + */ +template +ValidRegion intersect_valid_regions(Ts &&... regions) +{ + auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion + { + ValidRegion region; + + for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) + { + region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); + } + + for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) + { + region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); + } + + return region; + }; + + return foldl(intersect, std::forward(regions)...); +} + +/** Create a strides object based on the provided strides and the tensor dimensions. + * + * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides. + * @param[in] stride_x Stride to be used in X dimension (in bytes). + * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes). + * + * @return Strides object based on the specified strides. Missing strides are + * calculated based on the tensor shape and the strides of lower dimensions. + */ +template +inline Strides compute_strides(const TensorInfo &info, T stride_x, Ts &&... fixed_strides) +{ + const TensorShape &shape = info.tensor_shape(); + + // Create strides object + Strides strides(stride_x, fixed_strides...); + + for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) + { + strides.set(i, shape[i - 1] * strides[i - 1]); + } + + return strides; +} + +/** Create a strides object based on the tensor dimensions. + * + * @param[in] info Tensor info object used to compute the strides. + * + * @return Strides object based on element size and tensor shape. + */ +template +inline Strides compute_strides(const TensorInfo &info) +{ + return compute_strides(info, info.element_size()); +} +} + +#include "arm_compute/core/Helpers.inl" +#endif /*__ARM_COMPUTE_HELPERS_H__ */ diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl new file mode 100644 index 0000000000..4aa7acf75d --- /dev/null +++ b/arm_compute/core/Helpers.inl @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Validate.h" + +#include +#include + +namespace +{ +inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = *pixel_ptr; + const float a01 = *(pixel_ptr + 1); + const float a10 = *(pixel_ptr + stride); + const float a11 = *(pixel_ptr + stride + 1); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + + return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; +} + +inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + const int32_t xi = x; + const int32_t yi = y; + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1u8(first_pixel_ptr + xi + yi * stride, stride, dx, dy); +} + +inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + x = std::max(-1.f, std::min(x, static_cast(width))); + y = std::max(-1.f, std::min(y, static_cast(height))); + + const float xi = std::floor(x); + const float yi = std::floor(y); + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1u8(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, stride, dx, dy); +} + +inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + // Calculate sampling position + float in_x = (x + 0.5f) * wr - 0.5f; + float in_y = (y + 0.5f) * hr - 0.5f; + + // Get bounding box offsets + int x_from = std::floor(x * wr - 0.5f - in_x); + int y_from = std::floor(y * hr - 0.5f - in_y); + int x_to = std::ceil((x + 1) * wr - 0.5f - in_x); + int y_to = std::ceil((y + 1) * hr - 0.5f - in_y); + + // Clamp position to borders + in_x = std::max(-1.f, std::min(in_x, static_cast(width))); + in_y = std::max(-1.f, std::min(in_y, static_cast(height))); + + // Clamp bounding box offsets to borders + x_from = ((in_x + x_from) < -1) ? -1 : x_from; + y_from = ((in_y + y_from) < -1) ? -1 : y_from; + x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to; + y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to; + + // Get pixel index + const int xi = std::floor(in_x); + const int yi = std::floor(in_y); + + // Bounding box elements in each dimension + const int x_elements = (x_to - x_from + 1); + const int y_elements = (y_to - y_from + 1); + ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0); + + // Sum pixels in area + int sum = 0; + for(int j = yi + y_from, je = yi + y_to; j <= je; ++j) + { + const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; + sum = std::accumulate(ptr, ptr + x_elements, sum); + } + + // Return average + return sum / (x_elements * y_elements); +} +} + +#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ +namespace arm_compute +{ +template +struct IncrementIterators +{ + template + static void unroll(T &&it, Ts &&... iterators) + { + it.increment(dimension); + IncrementIterators::unroll(std::forward(iterators)...); + } + + template + static void unroll(T &&it) + { + it.increment(dimension); + // End of recursion + } +}; + +template +struct ForEachDimension +{ + template + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + { + const auto &d = w[dim - 1]; + + for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...)) + { + id.set(dim - 1, v); + ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...); + } + } +}; + +template <> +struct ForEachDimension<0> +{ + template + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + { + lambda_function(id); + } +}; + +template +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators) +{ + w.validate(); + + Coordinates id; + ForEachDimension::unroll(w, id, std::forward(lambda_function), std::forward(iterators)...); +} + +inline constexpr Iterator::Iterator() + : _ptr(nullptr), _dims() +{ +} + +inline Iterator::Iterator(const ITensor *tensor, const Window &win) + : Iterator() +{ + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + const TensorInfo *info = tensor->info(); + ARM_COMPUTE_ERROR_ON(info == nullptr); + const Strides &strides = info->strides_in_bytes(); + + _ptr = tensor->buffer() + info->offset_first_element_in_bytes(); + + //Initialize the stride for each dimension and calculate the position of the first element of the iteration: + for(unsigned int n = 0; n < info->num_dimensions(); ++n) + { + _dims[n]._stride = win[n].step() * strides[n]; + std::get<0>(_dims)._dim_start += strides[n] * win[n].start(); + } + + //Copy the starting point to all the dimensions: + for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n) + { + _dims[n]._dim_start = std::get<0>(_dims)._dim_start; + } + + ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, info->num_dimensions()); +} + +inline void Iterator::increment(const size_t dimension) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + + _dims[dimension]._dim_start += _dims[dimension]._stride; + + for(unsigned int n = 0; n < dimension; ++n) + { + _dims[n]._dim_start = _dims[dimension]._dim_start; + } +} + +inline constexpr int Iterator::offset() const +{ + return _dims.at(0)._dim_start; +} + +inline constexpr uint8_t *Iterator::ptr() const +{ + return _ptr + _dims.at(0)._dim_start; +} + +inline void Iterator::reset(const size_t dimension) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions - 1); + + _dims[dimension]._dim_start = _dims[dimension + 1]._dim_start; + + for(unsigned int n = 0; n < dimension; ++n) + { + _dims[n]._dim_start = _dims[dimension]._dim_start; + } +} +} +#endif /* DOXYGEN_SKIP_THIS */ diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h new file mode 100644 index 0000000000..8d5c455a6e --- /dev/null +++ b/arm_compute/core/IAccessWindow.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class Window; +class TensorInfo; + +/** Decrease @p required in steps of @p step until it's less than @p available. + * + * @param[in] required Number of required bytes. + * @param[in] available Number of available bytes. + * @param[in] step Step size used to decrease required bytes. + * + * @return Largest value smaller than @p available that is a multiple of @p step + * + **/ +inline int adjust_down(int required, int available, int step) +{ + ARM_COMPUTE_ERROR_ON(step <= 0); + + return required - step * ((required - available + step - 1) / step); +} + +/** Increase @p required in steps of @p step until it's greater than @p available. + * + * @param[in] required Number of required bytes. + * @param[in] available Number of available bytes. + * @param[in] step Step size used to increase required bytes. + * + * @return Largest value smaller than @p available that is a multiple of @p step + * + **/ +inline int adjust_up(int required, int available, int step) +{ + ARM_COMPUTE_ERROR_ON(step <= 0); + + return required + step * ((available - required + step - 1) / step); +} + +/** Interface describing methods to update access window and padding based on kernel parameters. */ +class IAccessWindow +{ +public: + virtual ~IAccessWindow() = default; + /** Shrink the window if padding is not large enough. + * + * @param[in] window Window used by the kernel. + * + * @return True if the window has been changed. + */ + virtual bool update_window_if_needed(Window &window) const = 0; + /** Increase the padding to be large enough for the window. + * + * @param[in] window Window used by the kernel. + * + * @return True if the padding has been changed. + */ + virtual bool update_padding_if_needed(const Window &window) const = 0; + /** Set the valid region based on access pattern, valid region of the inputs and border mode. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + * @param[in] border_undefined Undefined borders are excluded from the valid region. + * @param[in] border_size Size of the border around the XY-plane of the tensor. + */ + virtual void set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) = 0; +}; + +/** Implementation of a rectangular access pattern. */ +class AccessWindowRectangle : public IAccessWindow +{ +public: + /** Constructor for a rectangular access pattern. + * + * @note Width and height have to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] y Offset of the access in Y direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] height Number of elements that are accessed in Y direction. + */ + AccessWindowRectangle(TensorInfo *info, int x, int y, int width, int height) + : AccessWindowRectangle(info, x, y, width, height, 1.f, 1.f) + { + } + + /** Constructor for a rectangular access pattern. + * + * @note Width, height and scale have to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] y Offset of the access in Y direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] height Number of elements that are accessed in Y direction. + * @param[in] scale_x Ratio along the X direction between the window used by the execute_window_loop and the rectangular access pattern defined + * @param[in] scale_y Ratio along the Y direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowRectangle(TensorInfo *info, int x, int y, int width, int height, float scale_x, float scale_y) + : _info(info), _x(x), _y(y), _width(width), _height(height), _scale_x(scale_x), _scale_y(scale_y) + { + ARM_COMPUTE_ERROR_ON(width < 0); + ARM_COMPUTE_ERROR_ON(height < 0); + ARM_COMPUTE_ERROR_ON(scale_x < 0); + ARM_COMPUTE_ERROR_ON(scale_y < 0); + } + + AccessWindowRectangle(const AccessWindowRectangle &) = delete; + AccessWindowRectangle &operator=(const AccessWindowRectangle &) = delete; + AccessWindowRectangle(AccessWindowRectangle &&) = default; + AccessWindowRectangle &operator=(AccessWindowRectangle &&) = default; + ~AccessWindowRectangle() = default; + + /** Set the valid region based on access pattern and valid region of the inputs. + * + * @note This method assumes that there is no border. + * @note This method assumes that all elements written by the kernel are valid. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + */ + void set_valid_region(const Window &window, ValidRegion input_valid_region); + + // Inherited methods overridden: + + /** @note This method assumes that all elements written by the kernel are valid. */ + void set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) override; + + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + +protected: + TensorInfo *_info; + int _x; + int _y; + int _width; + int _height; + float _scale_x; + float _scale_y; +}; + +/** Implementation of a column access pattern. */ +class AccessWindowVertical : public AccessWindowRectangle +{ +public: + /** Constructor for a column access pattern. + * + * @note Height has to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] y Offset of the access in Y direction. + * @param[in] height Number of elements that are accessed in Y direction. + * @param[in] scale_y Ratio along the Y direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowVertical(TensorInfo *info, int y, int height, float scale_y = 1.f) + : AccessWindowRectangle(info, 0, y, 1, height, 1.f, scale_y) + { + ARM_COMPUTE_ERROR_ON(height < 0); + ARM_COMPUTE_ERROR_ON(scale_y < 0); + } +}; + +/** Implementation of a row access pattern. */ +class AccessWindowHorizontal : public AccessWindowRectangle +{ +public: + /** Constructor for a row access pattern. + * + * @note Width has to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] scale_x Ratio along the X direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowHorizontal(TensorInfo *info, int x, int width, float scale_x = 1.f) + : AccessWindowRectangle(info, x, 0, width, 1, scale_x, 1.f) + { + ARM_COMPUTE_ERROR_ON(width < 0); + ARM_COMPUTE_ERROR_ON(scale_x < 0); + } +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_H__*/ diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h new file mode 100644 index 0000000000..2ed56100cf --- /dev/null +++ b/arm_compute/core/IArray.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IARRAY_H__ +#define __ARM_COMPUTE_IARRAY_H__ + +#include "arm_compute/core/Error.h" +#include +#include + +namespace arm_compute +{ +class KeyPoint; +class Coordinates2D; +class DetectionWindow; +class Size2D; + +/** Array of type T */ +template +class IArray +{ +public: + /** Default constructor */ + IArray() + : _num_values(0), _max_size(0) {}; + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + IArray(size_t max_num_values) + : _num_values(0), _max_size(max_num_values) + { + } + /** Maximum number of values which can be stored in this array + * + * @return Maximum number of values + */ + size_t max_num_values() const + { + return _max_size; + } + /** Default virtual destructor */ + virtual ~IArray() = default; + /** Number of values currently stored in the array + * + * @return Number of values currently stored in the array or max_num_values + 1 if the array is overflowed. + */ + size_t num_values() const + { + return _num_values; + } + /** Append the passed argument to the end of the array if there is room. + * + * @param[in] val Value to add to the array. + * + * @return True if the point was successfully added to the array. False if the array is full and the point couldn't be added. + */ + bool push_back(const T &val) + { + ARM_COMPUTE_ERROR_ON(0 == _max_size); + if(_num_values >= max_num_values()) + { + _num_values = max_num_values() + 1; + return false; + } + at(_num_values) = val; + _num_values++; + return true; + } + /** Clear all the points from the array. */ + void clear() + { + _num_values = 0; + } + /** Did we lose some values because the array is too small? + * + * @return True if we tried to add a value using push_back() but there wasn't any room left to store it. + * False if all the values were successfully added to the array. + */ + bool overflow() const + { + return _num_values > max_num_values(); + } + /** Pointer to the first element of the array + * + * Other elements of the array can be accessed using buffer()[idx] for 0 <= idx < num_poins(). + * + * @return A pointer to the first element of the array + */ + virtual T *buffer() const = 0; + /** Reference to the element of the array located at the given index + * + * @param[in] index Index of the element + * + * @return A reference to the element of the array located at the given index. + */ + virtual T &at(size_t index) const + { + ARM_COMPUTE_ERROR_ON(buffer() == nullptr); + ARM_COMPUTE_ERROR_ON(index >= max_num_values()); + return buffer()[index]; + } + /** Resizes the array to contain "num" elements. If "num" is smaller than the maximum array size, the content is reduced to its first "num" elements, + * "num" elements can't be bigger than the maximum number of values which can be stored in this array. + * + * @param[in] num The new array size in number of elements + */ + void resize(size_t num) + { + ARM_COMPUTE_ERROR_ON(num > max_num_values()); + _num_values = num; + }; + +private: + size_t _num_values; + size_t _max_size; +}; +using IKeyPointArray = IArray; +using ICoordinates2DArray = IArray; +using IDetectionWindowArray = IArray; +using ISize2DArray = IArray; +using IUInt8Array = IArray; +using IUInt16Array = IArray; +using IUInt32Array = IArray; +using IInt16Array = IArray; +using IInt32Array = IArray; +using IFloatArray = IArray; +} +#endif /* __ARM_COMPUTE_IARRAY_H__ */ diff --git a/arm_compute/core/IDistribution.h b/arm_compute/core/IDistribution.h new file mode 100644 index 0000000000..b57543a3bf --- /dev/null +++ b/arm_compute/core/IDistribution.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IDISTRIBUTION_H__ +#define __ARM_COMPUTE_IDISTRIBUTION_H__ + +#include +#include + +namespace arm_compute +{ +/** Interface for distribution objects */ +class IDistribution +{ +public: + /** Default virtual destructor */ + virtual ~IDistribution() = default; + /** Returns the dimensions of the distribution. + * + * @note This is fixed to 1-dimensional distribution for now. + * @return Dimensions of the distribution. + */ + virtual size_t dimensions() const = 0; + /** Returns the total size in bytes of the distribution. + * + * @return Total size of the distribution in bytes. + */ + virtual size_t size() const = 0; + /** Returns a pointer to the start of the distribution. + * Other elements of the array can be accessed using buffer()[idx] for 0 <= idx < num_bins() + * + * @return Pointer to the start of the distribution. + */ + virtual uint32_t *buffer() const = 0; + /** Clears the distribution by setting every element to zero. */ + void clear() const; +}; +} +#endif /* __ARM_COMPUTE_IDISTRIBUTION_H__ */ diff --git a/arm_compute/core/IDistribution1D.h b/arm_compute/core/IDistribution1D.h new file mode 100644 index 0000000000..ca8bfc0a7d --- /dev/null +++ b/arm_compute/core/IDistribution1D.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_IDISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution.h" + +#include +#include + +namespace arm_compute +{ +/** 1D Distribution interface */ +class IDistribution1D : public IDistribution +{ +public: + /** Constructor: Creates a 1D Distribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + IDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Returns the number of bins that the distribution has. + * + * @return Number of bins of the distribution. + */ + size_t num_bins() const; + /** Returns the offset of the distribution. + * + * @return Offset of the distribution. + */ + int32_t offset() const; + /** Returns the range of the distribution. + * + * @return Range of the distribution. + */ + uint32_t range() const; + /** Returns the window of the distribution, which is the range divided by the number of bins. + * + * @note If range is not divided by the number of bins then it is invalid. + * + * @return Window of the distribution. + */ + uint32_t window() const; + /** Sets the range of the distribution. + * + * @param[in] range New range of the distribution to be set. + */ + void set_range(uint32_t range); + + // Inherited methods overridden: + size_t size() const override; + size_t dimensions() const override; + +private: + size_t _num_bins; /**< Number of bins. */ + int32_t _offset; /**< Offset, which indicate the start of the usable values. */ + uint32_t _range; /**< The total number of consecutive values of the distribution interval */ +}; +} +#endif /* __ARM_COMPUTE_IDISTRIBUTION1D_H__ */ diff --git a/arm_compute/core/IHOG.h b/arm_compute/core/IHOG.h new file mode 100644 index 0000000000..8bf713ae82 --- /dev/null +++ b/arm_compute/core/IHOG.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IHOG_H__ +#define __ARM_COMPUTE_IHOG_H__ + +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class HOGInfo; +/** Interface for HOG data-object */ +class IHOG +{ +public: + /** Interface to be implemented by the child class to return the HOG's metadata + * + * @return A pointer to the HOG's metadata. + */ + virtual const HOGInfo *info() const = 0; + /** Default virtual destructor */ + virtual ~IHOG() = default; + /** Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor + * + * @note Other elements of the array can be accessed using descriptor()[idx] for idx=[0, descriptor_size() - 1] + * + * @return A pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor + */ + virtual float *descriptor() const = 0; +}; +} +#endif /* __ARM_COMPUTE_IHOG_H__ */ diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h new file mode 100644 index 0000000000..4f3812b6da --- /dev/null +++ b/arm_compute/core/IKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IKERNEL_H__ +#define __ARM_COMPUTE_IKERNEL_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" + +namespace arm_compute +{ +/** Common information for all the kernels */ +class IKernel +{ +public: + /** Constructor */ + IKernel(); + /** Destructor */ + virtual ~IKernel() = default; + /** Indicates whether or not the kernel is parallelisable + * + * If the kernel is parallelisable then the window returned by window() can be split into sub-windows + * which can then be run in parallel. + * + * If the kernel is not parallelisable then only the window returned by window() can be passed to run() + * + * @return True if the kernel is parallelisable + */ + virtual bool is_parallelisable() const; + /** The size of the border for that kernel + * + * @return The width in number of elements of the border. + */ + virtual BorderSize border_size() const; + /** The maximum window the kernel can be executed on + * + * @return The maximum window the kernel can be executed on. + */ + const Window &window() const; + +protected: + /** Configure the kernel's window + * + * @param[in] window The maximum window which will be returned by window() + */ + void configure(const Window &window); + +private: + Window _window; +}; +} +#endif /*__ARM_COMPUTE_IKERNEL_H__ */ diff --git a/arm_compute/core/ILut.h b/arm_compute/core/ILut.h new file mode 100644 index 0000000000..5223aea67a --- /dev/null +++ b/arm_compute/core/ILut.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ILUT_H__ +#define __ARM_COMPUTE_ILUT_H__ + +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Lookup Table object interface. */ +class ILut +{ +public: + /** Default virtual destructor */ + virtual ~ILut() = default; + /** Returns the total number of elements in the LUT. + * + * @return Total number of elements. + */ + virtual size_t num_elements() const = 0; + /** Indicates the offset that needs to be applied to the raw index before performing a lookup in the LUT. + * + * @return The normalization offset. + */ + virtual uint32_t index_offset() const = 0; + /** Returns the total size in bytes of the LUT. + * + * @return Total size of the LUT in bytes. + */ + virtual size_t size_in_bytes() const = 0; + /** Returns the type of the LUT. + * + * @return The type of the LUT. + */ + virtual DataType type() const = 0; + /** Returns a pointer to the start of the LUT. + * Other elements of the LUT can be accessed using buffer()[idx] for 0 <= idx < num_elements(). + * + * @return Pointer to the start of the lut. + */ + virtual uint8_t *buffer() const = 0; + /** Clears the LUT by setting every element to zero. */ + virtual void clear() = 0; +}; +} +#endif /* __ARM_COMPUTE_ILUT_H__ */ diff --git a/arm_compute/core/IMultiHOG.h b/arm_compute/core/IMultiHOG.h new file mode 100644 index 0000000000..e91da75398 --- /dev/null +++ b/arm_compute/core/IMultiHOG.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMULTIHOG_H__ +#define __ARM_COMPUTE_IMULTIHOG_H__ + +#include "arm_compute/core/IHOG.h" + +#include + +namespace arm_compute +{ +/** Interface for storing multiple HOG data-objects */ +class IMultiHOG +{ +public: + /** Default destructor */ + virtual ~IMultiHOG() = default; + /** The number of HOG models stored + * + * @return The number of HOG models stored + */ + virtual size_t num_models() const = 0; + /** Return a pointer to the requested HOG model + * + * @param[in] index The index of the wanted HOG model. + * + * @return A pointer pointed to the HOG model + */ + virtual IHOG *model(size_t index) = 0; + /** Return a const pointer to the requested HOG model + * + * @param[in] index The index of the wanted HOG model. + * + * @return A const pointer pointed to the HOG model + */ + virtual const IHOG *model(size_t index) const = 0; +}; +} + +#endif /* __ARM_COMPUTE_IMULTIHOG_H__ */ diff --git a/arm_compute/core/IMultiImage.h b/arm_compute/core/IMultiImage.h new file mode 100644 index 0000000000..6ed3c785ca --- /dev/null +++ b/arm_compute/core/IMultiImage.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMULTIIMAGE_H__ +#define __ARM_COMPUTE_IMULTIIMAGE_H__ + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; +class MultiImageInfo; + +/** Interface for multi-planar images */ +class IMultiImage +{ +public: + /** Destructor */ + virtual ~IMultiImage() = default; + /** Interface to be implemented by the child class to return the multi-planar image's metadata + * + * @return A pointer to the image's metadata. + */ + virtual const MultiImageInfo *info() const = 0; + /** Return a pointer to the requested plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A pointer pointed to the plane + */ + virtual IImage *plane(unsigned int index) = 0; + /** Return a constant pointer to the requested plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A constant pointer pointed to the plane + */ + virtual const IImage *plane(unsigned int index) const = 0; +}; +} +#endif /*__ARM_COMPUTE_IMULTIIMAGE_H__ */ diff --git a/arm_compute/core/IPyramid.h b/arm_compute/core/IPyramid.h new file mode 100644 index 0000000000..e5d7011cf9 --- /dev/null +++ b/arm_compute/core/IPyramid.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IPYRAMID_H__ +#define __ARM_COMPUTE_IPYRAMID_H__ + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Interface for pyramid data-object */ +class IPyramid +{ +public: + /** Default virtual destructor */ + virtual ~IPyramid() = default; + /** Interface to be implemented by the child class to return the Pyramid's metadata + * + * @return A pointer to the Pyramid's metadata. + */ + virtual const PyramidInfo *info() const = 0; + /** Retrieves a level of the pyramid as a ITensor pointer + * + * @param[in] index The index of the level, such that index is less than levels. + * + * @return An ITensor pointer + */ + virtual ITensor *get_pyramid_level(size_t index) const = 0; +}; +} + +#endif /* __ARM_COMPUTE_IPYRAMID_H__ */ diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h new file mode 100644 index 0000000000..ef4ea7bbc3 --- /dev/null +++ b/arm_compute/core/ITensor.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSOR_H__ +#define __ARM_COMPUTE_ITENSOR_H__ + +#include "arm_compute/core/TensorInfo.h" + +#include + +namespace arm_compute +{ +class Coordinates; + +/** Interface for NEON tensor */ +class ITensor +{ +public: + /** Interface to be implemented by the child class to return the tensor's metadata + * + * @return A pointer to the tensor's metadata. + */ + virtual TensorInfo *info() const = 0; + /** Interface to be implemented by the child class to return the tensor's metadata + * + * @return A pointer to the tensor's metadata. + */ + virtual TensorInfo *info() = 0; + /** Default virtual destructor */ + virtual ~ITensor() = default; + /** Interface to be implemented by the child class to return a pointer to CPU memory + * + * @return A CPU pointer to the beginning of the image's allocation. + */ + virtual uint8_t *buffer() const = 0; + + /** Return a pointer to the element at the passed coordinates + * + * @param[in] id Coordinates of the element + * + * @return Pointer to the requested element + */ + inline uint8_t *ptr_to_element(const Coordinates &id) const + { + return buffer() + info()->offset_element_in_bytes(id); + } + + /** Copy the content of another tensor. + * + * @note The number of dimensions of the source tensor must be less or equal to those of the destination tensor. + * + * @note All dimensions of the destination tensor must be greater or equal to the source tensor ones. + * + * @note num_channels() and element_size() of both tensors must match. + * + * @param[in] src Source tensor to copy from. + */ + void copy_from(const ITensor &src); +}; + +using IImage = ITensor; +} +#endif /*__ARM_COMPUTE_ITENSOR_H__ */ diff --git a/arm_compute/core/MultiImageInfo.h b/arm_compute/core/MultiImageInfo.h new file mode 100644 index 0000000000..6d76953845 --- /dev/null +++ b/arm_compute/core/MultiImageInfo.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIIMAGEINFO_H__ +#define __ARM_COMPUTE_MULTIIMAGEINFO_H__ + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** Store the multi-planar image's metadata */ +class MultiImageInfo +{ +public: + /** Constructor */ + MultiImageInfo(); + /** Initialize the metadata structure with the given parameters + * + * @param[in] width Width of the image (in number of pixels) + * @param[in] height Height of the image (in number of pixels) + * @param[in] format Colour format of the image. + */ + void init(unsigned int width, unsigned int height, Format format); + /** Colour format of the image + * + * @return Colour format of the image + */ + Format format() const; + /** Width in pixels + * + * @return The width in pixels + */ + unsigned int width() const; + /** Height in pixels + * + * @return The height in pixels + */ + unsigned int height() const; + +protected: + unsigned int _width; + unsigned int _height; + Format _format; +}; +} +#endif /*__ARM_COMPUTE_MULTIIMAGEINFO_H__ */ diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/NEON/INEKernel.h new file mode 100644 index 0000000000..3ac8164a51 --- /dev/null +++ b/arm_compute/core/NEON/INEKernel.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INEKERNEL_H__ +#define __ARM_COMPUTE_INEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +using INEKernel = ICPPKernel; +} +#endif /*__ARM_COMPUTE_INEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h new file mode 100644 index 0000000000..ca25532ef1 --- /dev/null +++ b/arm_compute/core/NEON/INESimpleKernel.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INESIMPLEKERNEL_H__ +#define __ARM_COMPUTE_INESIMPLEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPSimpleKernel.h" + +namespace arm_compute +{ +using INESimpleKernel = ICPPSimpleKernel; +} +#endif /*__ARM_COMPUTE_INESIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl new file mode 100644 index 0000000000..9be7c8a658 --- /dev/null +++ b/arm_compute/core/NEON/NEColorConvertHelper.inl @@ -0,0 +1,888 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace +{ +constexpr float red_coef_bt709 = 1.5748F; +constexpr float green_coef_bt709 = -0.1873f; +constexpr float green_coef2_bt709 = -0.4681f; +constexpr float blue_coef_bt709 = 1.8556f; + +constexpr float rgb2yuv_bt709_kr = 0.2126f; +constexpr float rgb2yuv_bt709_kb = 0.0722f; +// K_g = 1 - K_r - K_b +constexpr float rgb2yuv_bt709_kg = 0.7152f; +// C_u = 1 / (2 * (1 - K_b)) +constexpr float rgb2yuv_bt709_cu = 0.5389f; +// C_v = 1 / (2 * (1 - K_r)) +constexpr float rgb2yuv_bt709_cv = 0.6350f; + +inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out) +{ + const auto tmp1 = vmovl_u8(vget_low_u8(in)); + out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1))); + out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1))); + const auto tmp2 = vmovl_u8(vget_high_u8(in)); + out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2))); + out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2))); +} + +inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out) +{ + out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])), + vqmovn_u32(vcvtq_u32_f32(in2.val[0])))); + out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])), + vqmovn_u32(vcvtq_u32_f32(in2.val[1])))); + out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])), + vqmovn_u32(vcvtq_u32_f32(in2.val[2])))); +} + +inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t &out) +{ + const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])), + vqmovn_u32(vcvtq_u32_f32(in.val[1]))); + const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])), + vqmovn_u32(vcvtq_u32_f32(in.val[3]))); + out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high)); +} + +inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec, + float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec) +{ + /* + Y'= 0.2126*R' + 0.7152*G' + 0.0722*B' + U'=-0.1146*R' - 0.3854*G' + 0.5000*B' + V'= 0.5000*R' - 0.4542*G' - 0.0458*B' + */ + const auto c128 = vdupq_n_f32(128.f); + + // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b + yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr); + yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg); + yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb); + + // U = (B - Y) / (2 * (1 - K_b)) + uvec = vsubq_f32(bvec, yvec); + uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu); + + // V = (R - Y) / (2 * (1 - K_r)) + vvec = vsubq_f32(rvec, yvec); + vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv); +} + +inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val, + float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha) +{ + float32x4x3_t rgb1, rgb2; + + // Compute: cb - 128 and cr - 128; + const auto c128 = vdupq_n_f32(128.f); + uvec_val = vsubq_f32(uvec_val, c128); + vvec_val = vsubq_f32(vvec_val, c128); + + // Compute: + // r = 0.0000f*f_u + 1.5748f*f_v; + // g = 0.1873f*f_u - 0.4681f*f_v; + // b = 1.8556f*f_u + 0.0000f*f_v; + const auto red = vmulq_n_f32(vvec_val, red_coef_bt709); + const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709); + const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709), + vmulq_n_f32(vvec_val, green_coef2_bt709)); + + // Compute the final r,g,b values using y1 for the first texel and y2 for the second one. + // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t + // and written back to memory using vst3 instruction + + rgb1.val[0] = vaddq_f32(yvec_val, red); + rgb1.val[1] = vaddq_f32(yvec_val, green); + rgb1.val[2] = vaddq_f32(yvec_val, blue); + + rgb2.val[0] = vaddq_f32(yyvec_val, red); + rgb2.val[1] = vaddq_f32(yyvec_val, green); + rgb2.val[2] = vaddq_f32(yyvec_val, blue); + + uint8x8x3_t u8_rgb; + convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb); + + if(!alpha) + { + vst3_lane_u8(&output_ptr[0], u8_rgb, 0); + vst3_lane_u8(&output_ptr[3], u8_rgb, 4); + vst3_lane_u8(&output_ptr[6], u8_rgb, 1); + vst3_lane_u8(&output_ptr[9], u8_rgb, 5); + vst3_lane_u8(&output_ptr[12], u8_rgb, 2); + vst3_lane_u8(&output_ptr[15], u8_rgb, 6); + vst3_lane_u8(&output_ptr[18], u8_rgb, 3); + vst3_lane_u8(&output_ptr[21], u8_rgb, 7); + } + else + { + uint8x8x4_t u8_rgba; + u8_rgba.val[0] = u8_rgb.val[0]; + u8_rgba.val[1] = u8_rgb.val[1]; + u8_rgba.val[2] = u8_rgb.val[2]; + u8_rgba.val[3] = vdup_n_u8(255); + vst4_lane_u8(&output_ptr[0], u8_rgba, 0); + vst4_lane_u8(&output_ptr[4], u8_rgba, 4); + vst4_lane_u8(&output_ptr[8], u8_rgba, 1); + vst4_lane_u8(&output_ptr[12], u8_rgba, 5); + vst4_lane_u8(&output_ptr[16], u8_rgba, 2); + vst4_lane_u8(&output_ptr[20], u8_rgba, 6); + vst4_lane_u8(&output_ptr[24], u8_rgba, 3); + vst4_lane_u8(&output_ptr[28], u8_rgba, 7); + } +} + +inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha) +{ + uint8x16x3_t rgb; + + if(alpha) + { + const auto tmp = vld4q_u8(ptr); + rgb.val[0] = tmp.val[0]; + rgb.val[1] = tmp.val[1]; + rgb.val[2] = tmp.val[2]; + } + else + { + rgb = vld3q_u8(ptr); + } + + return rgb; +} + +inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom) +{ + // Convert the uint8x16_t to float32x4x4_t + float32x4x4_t frvec_top, fgvec_top, fbvec_top; + convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top); + convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top); + convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top); + + float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom; + convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom); + convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom); + convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom); + + float32x4x4_t fyvec_top, fuvec_top, fvvec_top; + float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom; + + for(auto i = 0; i < 4; ++i) + { + rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i], + fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]); + rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i], + fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]); + } + + convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]); + convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]); + convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]); + convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]); + convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]); + convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]); +} + +inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, + const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, + unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, + unsigned char *const __restrict out_uv) +{ + uint8x16x3_t vec_top, vec_bottom; + vec_top.val[0] = rvec_top; + vec_top.val[1] = gvec_top; + vec_top.val[2] = bvec_top; + vec_bottom.val[0] = rvec_bottom; + vec_bottom.val[1] = gvec_bottom; + vec_bottom.val[2] = bvec_bottom; + + rgb_to_yuv_conversion(vec_top, vec_bottom); + + vst1q_u8(out_y_top, vec_top.val[0]); + vst1q_u8(out_y_bottom, vec_bottom.val[0]); + + const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]); + const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]); + const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]); + const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]); + + uint8x8x2_t uvvec; + uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp)); + uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp)); + + vst2_u8(out_uv, uvvec); +} + +inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, + const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, + unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, + unsigned char *const __restrict out_u, + unsigned char *const __restrict out_v) +{ + uint8x16x3_t vec_top, vec_bottom; + vec_top.val[0] = rvec_top; + vec_top.val[1] = gvec_top; + vec_top.val[2] = bvec_top; + vec_bottom.val[0] = rvec_bottom; + vec_bottom.val[1] = gvec_bottom; + vec_bottom.val[2] = bvec_bottom; + + rgb_to_yuv_conversion(vec_top, vec_bottom); + + vst1q_u8(out_y_top, vec_top.val[0]); + vst1q_u8(out_y_bottom, vec_bottom.val[0]); + + const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]); + const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]); + const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]), + vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1])); + + vst1_u8(out_u, vget_low_u8(uvvec)); + vst1_u8(out_v, vget_high_u8(uvvec)); +} + +inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec, + unsigned char *const __restrict out_y, + unsigned char *const __restrict out_u, + unsigned char *const __restrict out_v) +{ + // Convert the uint8x16_t to float32x4x4_t + float32x4x4_t frvec, fgvec, fbvec; + convert_uint8x16_to_float32x4x4(rvec, frvec); + convert_uint8x16_to_float32x4x4(gvec, fgvec); + convert_uint8x16_to_float32x4x4(bvec, fbvec); + + float32x4x4_t fyvec, fuvec, fvvec; + for(auto i = 0; i < 4; ++i) + { + rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i], + fyvec.val[i], fuvec.val[i], fvvec.val[i]); + } + + uint8x16_t yvec, uvec, vvec; + convert_float32x4x4_to_unit8x16(fyvec, yvec); + convert_float32x4x4_to_unit8x16(fuvec, uvec); + convert_float32x4x4_to_unit8x16(fvvec, vvec); + + vst1q_u8(out_y, yvec); + vst1q_u8(out_u, uvec); + vst1q_u8(out_v, vvec); +} +} + +namespace arm_compute +{ +void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta1 = vld3q_u8(in.ptr()); + uint8x16x4_t ta2; + ta2.val[0] = ta1.val[0]; + ta2.val[1] = ta1.val[1]; + ta2.val[2] = ta1.val[2]; + ta2.val[3] = vdupq_n_u8(255); + vst4q_u8(out.ptr(), ta2); + }, + in, out); +} + +void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta1 = vld4q_u8(in.ptr()); + uint8x16x3_t ta2; + ta2.val[0] = ta1.val[0]; + ta2.val[1] = ta1.val[1]; + ta2.val[2] = ta1.val[2]; + vst3q_u8(out.ptr(), ta2); + }, + in, out); +} + +template +void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + constexpr auto shift = yuyv ? 0 : 1; + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + float32x4x4_t uvec, yvec, vvec, yyvec; + const auto ta = vld4q_u8(in.ptr()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + // Convert the uint8x16x4_t to float32x4x4_t + convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec); + convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec); + convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec); + convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec); + + yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + }, + in, out); +} + +template +void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + const auto out_stride = output_ptr->info()->strides_in_bytes().y(); + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + // Convert the uint8x16x4_t to float32x4x4_t + float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; + convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); + convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); + convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec); + convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec); + + yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + + yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); + }, + in_y, in_uv, out); +} + +template +void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + const auto out_stride = output_ptr->info()->strides_in_bytes().y(); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_u = vld1q_u8(in_u.ptr()); + const auto ta_v = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_u.val[0] = U0 U2 U4 U6 ... + //ta_v.val[0] = V0 V2 V4 V6 ... + + // Convert the uint8x16x4_t to float32x4x4_t + float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; + convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); + convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); + convert_uint8x16_to_float32x4x4(ta_u, uvec); + convert_uint8x16_to_float32x4x4(ta_v, vvec); + + yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + + yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); + }, + in_y, in_u, in_v, out); +} + +template +void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = yuyv ? 0 : 1; + + // NV12's UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_top = vld4q_u8(in.ptr()); + const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + uint8x16x2_t yvec; + yvec.val[0] = ta_top.val[0 + shift]; + yvec.val[1] = ta_top.val[2 + shift]; + vst2q_u8(out_y.ptr(), yvec); + + uint8x16x2_t yyvec; + yyvec.val[0] = ta_bottom.val[0 + shift]; + yyvec.val[1] = ta_bottom.val[2 + shift]; + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); + + uint8x16x2_t uvvec; + uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); + uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); + vst2q_u8(out_uv.ptr(), uvvec); + }, + in, out_y, out_uv); +} + +void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + uint8x16x2_t ta_uv; + ta_uv.val[0] = vld1q_u8(in_u.ptr()); + ta_uv.val[1] = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + vst2q_u8(out_uv.ptr(), ta_uv); + }, + in_y, in_u, in_v, out_y, out_uv); +} + +template +void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]); + vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]); + }, + in_y, in_uv, out_y, out_u, out_v); +} + +template +void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = yuyv ? 0 : 1; + + // Destination's UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_top = vld4q_u8(in.ptr()); + const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + uint8x16x2_t yvec; + yvec.val[0] = ta_top.val[0 + shift]; + yvec.val[1] = ta_top.val[2 + shift]; + vst2q_u8(out_y.ptr(), yvec); + + uint8x16x2_t yyvec; + yyvec.val[0] = ta_bottom.val[0 + shift]; + yyvec.val[1] = ta_bottom.val[2 + shift]; + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); + + uint8x16_t uvec; + uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); + vst1q_u8(out_u.ptr(), uvec); + + uint8x16_t vvec; + vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); + vst1q_u8(out_v.ptr(), vvec); + }, + in, out_y, out_u, out_v); +} + +template +void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + + uint8x16x2_t uvec; + uvec.val[0] = ta_uv.val[0 + shift]; + uvec.val[1] = ta_uv.val[0 + shift]; + vst2q_u8(out_u.ptr(), uvec); + vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); + + uint8x16x2_t vvec; + vvec.val[0] = ta_uv.val[1 - shift]; + vvec.val[1] = ta_uv.val[1 - shift]; + vst2q_u8(out_v.ptr(), vvec); + vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); + }, + in_y, in_uv, out_y, out_u, out_v); +} + +void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_u = vld1q_u8(in_u.ptr()); + const auto ta_v = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_u = U0 U2 U4 U6 ... + //ta_v = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + + uint8x16x2_t uvec; + uvec.val[0] = ta_u; + uvec.val[1] = ta_u; + vst2q_u8(out_u.ptr(), uvec); + vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); + + uint8x16x2_t vvec; + vvec.val[0] = ta_v; + vvec.val[1] = ta_v; + vst2q_u8(out_v.ptr(), vvec); + vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); + }, + in_y, in_u, in_v, out_y, out_u, out_v); +} + +template +void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb_top = load_rgb(in.ptr(), alpha); + const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], + ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], + out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), + out_uv.ptr()); + }, + in, out_y, out_uv); +} + +template +void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb_top = load_rgb(in.ptr(), alpha); + const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], + ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], + out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), + out_u.ptr(), out_v.ptr()); + }, + in, out_y, out_u, out_v); +} + +template +void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb = load_rgb(in.ptr(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2], + out_y.ptr(), out_u.ptr(), out_v.ptr()); + }, + in, out_y, out_u, out_v); +} +} diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h new file mode 100644 index 0000000000..2b5596477a --- /dev/null +++ b/arm_compute/core/NEON/NEKernels.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEKERNELS_H__ +#define __ARM_COMPUTE_NEKERNELS_H__ + +/* Header regrouping all the NEON kernels */ +#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" +#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" +#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h" +#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h" +#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" +#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" +#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" +#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NERemapKernel.h" +#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" +#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" +#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" + +#endif /* __ARM_COMPUTE_NEKERNELS_H__ */ diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h new file mode 100644 index 0000000000..bf27f116e6 --- /dev/null +++ b/arm_compute/core/NEON/NEMath.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMATH_H__ +#define __ARM_COMPUTE_NEMATH_H__ + +#include + +namespace arm_compute +{ +/* Exponent polynomial coefficients */ +const std::array exp_tab = +{ + { + vdupq_n_f32(1.f), + vdupq_n_f32(0.0416598916054f), + vdupq_n_f32(0.500000596046f), + vdupq_n_f32(0.0014122662833f), + vdupq_n_f32(1.00000011921f), + vdupq_n_f32(0.00833693705499f), + vdupq_n_f32(0.166665703058f), + vdupq_n_f32(0.000195780929062f), + } +}; + +/* Logarithm polynomial coefficients */ +const std::array log_tab = +{ + { + vdupq_n_f32(-2.29561495781f), + vdupq_n_f32(-2.47071170807f), + vdupq_n_f32(-5.68692588806f), + vdupq_n_f32(-0.165253549814f), + vdupq_n_f32(5.17591238022f), + vdupq_n_f32(0.844007015228f), + vdupq_n_f32(4.58445882797f), + vdupq_n_f32(0.0141278216615f), + } +}; + +/** Calculate inverse square root. + * + * @param x Input value. + * + * @return The calculated inverse square root. + */ +inline float32x4_t vinvsqrt_f32(float32x4_t x) +{ + float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + + return sqrt_reciprocal; +} + +/** Calculate reciprocal. + * + * @param x Input value. + * + * @return The calculated reciprocal. + */ +inline float32x4_t vinv_f32(const float32x4_t &x) +{ + float32x4_t recip = vrecpeq_f32(x); + recip = vmulq_f32(vrecpsq_f32(x, recip), recip); + recip = vmulq_f32(vrecpsq_f32(x, recip), recip); + return recip; +} + +/** Perform a 7th degree polynomial approximation using Estrin's method. + * + * @param x Input vector value in F32 format. + * @param coeffs Polynomial coefficients table. + * + * @return The calculated approximation. + */ +inline float32x4_t vtaylor_poly_f32(const float32x4_t &x, const std::array &coeffs) +{ + float32x4_t A = vmlaq_f32(coeffs[0], coeffs[4], x); + float32x4_t B = vmlaq_f32(coeffs[2], coeffs[6], x); + float32x4_t C = vmlaq_f32(coeffs[1], coeffs[5], x); + float32x4_t D = vmlaq_f32(coeffs[3], coeffs[7], x); + float32x4_t x2 = vmulq_f32(x, x); + float32x4_t x4 = vmulq_f32(x2, x2); + float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4); + return res; +} + +/** Calculate exponential + * + * @param x Input vector value in F32 format. + * + * @return The calculated exponent. + */ +inline float32x4_t vexp_f32(const float32x4_t &x) +{ + static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + static const float32x4_t CONST_INV_LN2 = vdupq_n_f32(1.4426950408f); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, CONST_INV_LN2)); + float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), CONST_LN2); + + // Polynomial Approximation + float32x4_t poly = vtaylor_poly_f32(val, exp_tab); + + // Reconstruct + poly = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(poly), vshlq_n_s32(m, 23))); + + return poly; +} + +/** Calculate logarithm + * + * @param x Input vector value in F32 format. + * + * @return The calculated logarithm. + */ +inline float32x4_t vlog_f32(const float32x4_t &x) +{ + static const int32x4_t CONST_127 = vdupq_n_s32(127); // 127 + static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + + // Extract exponent + int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127); + float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23))); + + // Polynomial Approximation + float32x4_t poly = vtaylor_poly_f32(val, log_tab); + + // Reconstruct + poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2); + + return poly; +} + +/** Calculate hyperbolic tangent. + * + * tanh(x) = (e^2x - 1)/(e^2x + 1) + * + * @param val Input vector value in F32 format. + * + * @return The calculated Hyperbolic Tangent. + */ +inline float32x4_t vtanh_f32(const float32x4_t &val) +{ + static const float32x4_t CONST_1 = vdupq_n_f32(1.f); // 1.f + static const float32x4_t CONST_2 = vdupq_n_f32(2.f); // 2.f + + float32x4_t exp2x = vexp_f32(vmulq_f32(CONST_2, val)); + float32x4_t num = vsubq_f32(exp2x, CONST_1); + float32x4_t den = vaddq_f32(exp2x, CONST_1); + float32x4_t tanh = vmulq_f32(num, vinv_f32(den)); + return tanh; +} + +/** Calculate n power of a number. + * + * pow(x,n) = e^(n*log(x)) + * + * @param val Input vector value in F32 format. + * @param n Powers to raise the input to. + * + * @return The calculated power. + */ +inline float32x4_t vpowq_f32(const float32x4_t &val, const float32x4_t &n) +{ + return vexp_f32(vmulq_f32(n, vlog_f32(val))); +} +} + +#endif /* __ARM_COMPUTE_NEMATH_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..39f92e3f68 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ +#define __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the absolute difference kernel + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class NEAbsoluteDifferenceKernel : public INEKernel +{ +public: + /** Default constructor */ + NEAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~NEAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output tensors + * + * @param[in] input1 Source tensor. Data types supported: U8/S16 + * @param[in] input2 Source tensor. Data types supported: U8/S16 + * @param[out] output Destination tensor, Data types supported: U8/S16 + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised absolute difference functions + * + * @param[in] input1 An input tensor. Data types supported: U8, S16. + * @param[in] input2 An input tensor. Data types supported: U8, S16. + * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. + * @param[in] window Region on which to execute the kernel. + */ + using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + + /** Absolute difference function to use for the particular tensor formats passed to configure() */ + AbsDiffFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h new file mode 100644 index 0000000000..df6d7b8891 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_NEACCUMULATEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the accumulate kernel + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class NEAccumulateKernel : public INESimpleKernel +{ +public: + /** Set the input and accumulation tensors + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] accum Destination tensor. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; +}; + +/** Interface for the accumulate weighted kernel + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class NEAccumulateWeightedKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEAccumulateWeightedKernel(); + /** Set the input and accumulation tensors, and the scale value + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] alpha Scalar value in the range [0.0f, 1.0f] + * @param[in,out] accum Accumulated tensor. Data type supported: U8. + */ + void configure(const ITensor *input, float alpha, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + float _alpha; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Interface for the accumulate weighted kernel using F16 */ +class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +#else +using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; +#endif + +/** Interface for the accumulate squared kernel + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class NEAccumulateSquaredKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEAccumulateSquaredKernel(); + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] shift Shift value in the range of [0, 15] + * @param[in,out] accum Accumulated tensor. Data type supported: S16. + */ + void configure(const ITensor *input, uint32_t shift, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + uint32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h new file mode 100644 index 0000000000..ba93c59194 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the activation layer kernel. */ +class NEActivationLayerKernel : public INESimpleKernel +{ +public: + /** Constructor */ + NEActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel(const NEActivationLayerKernel &) = delete; + /** Default move constructor */ + NEActivationLayerKernel(NEActivationLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete; + /** Default move assignment operator */ + NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] activation_info Activation layer information. + */ + void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using ActivationFunction = ActivationLayerInfo::ActivationFunction; + /** Common signature for all the specialised @ref NEActivationLayerKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + void activation(const Window &window); + +private: + ActivationFunctionExecutorPtr _func; + ActivationLayerInfo _act_info; +}; +} +#endif /*__ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h new file mode 100644 index 0000000000..46d22927f6 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ +#define __ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEArithmeticAdditionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEArithmeticAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticAdditionKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F32 + * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32). + * @param[in] policy Overflow policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised add functions + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F32. + * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32). + * @param[in] window Region on which to execute the kernel. + */ + using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + /** Add function to use for the particular tensor types passed to configure() */ + AddFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h new file mode 100644 index 0000000000..a1dcb73d7f --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ +#define __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform subtraction between two tensors */ +class NEArithmeticSubtractionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEArithmeticSubtractionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticSubtractionKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F32 + * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32). + * @param[in] policy Overflow policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised sub functions + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F32. + * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32) + * @param[in] window Region on which to execute the kernel. + */ + using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + /** Sub function to use for the particular tensor types passed to configure() */ + SubFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h new file mode 100644 index 0000000000..b931445419 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class NEBitwiseAndKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h new file mode 100644 index 0000000000..e34eb0f5ae --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise NOT operation + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class NEBitwiseNotKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseNotKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default; + /** Initialise the kernel's input and output + * + * @param[in] input An input tensor. Data type supported: U8. + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h new file mode 100644 index 0000000000..d2bae2660c --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEORKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEORKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise inclusive OR between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class NEBitwiseOrKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h new file mode 100644 index 0000000000..9dea36e7e3 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class NEBitwiseXorKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h new file mode 100644 index 0000000000..6b7bebbf17 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBOX3x3KERNEL_H__ +#define __ARM_COMPUTE_NEBOX3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Box 3x3 filter */ +class NEBox3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform a Box 3x3 filter using F16 simd + */ +class NEBox3x3FP16Kernel : public NEBox3x3Kernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +#else +using NEBox3x3FP16Kernel = NEBox3x3Kernel; +#endif +} +#endif /*__ARM_COMPUTE_NEBOX3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h new file mode 100644 index 0000000000..b86085f439 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECANNYEDGEKERNEL_H__ +#define __ARM_COMPUTE_NECANNYEDGEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Computes magnitude and quantised phase from inputs gradients. */ +class NEGradientKernel : public INEKernel +{ +public: + /** Default constructor */ + NEGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel(const NEGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel &operator=(const NEGradientKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGradientKernel(NEGradientKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGradientKernel &operator=(NEGradientKernel &&) = default; + /** Default destructor */ + virtual ~NEGradientKernel() = default; + + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and magnitude must all be the same size (either 16 or 32) + * + * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx. + * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32). + * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8. + * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm + */ + virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + /** Common signature for all the specialised gradient functions + * + * @param[in] gx_ptr Pointer to the first input tensor. + * @param[in] gy_ptr Pointer to the second input tensor. + * @param[out] magnitude_ptr Pointer to the first output tensor + * @param[out] phase_ptr Pointer to the second output tensor + */ + using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr); + + GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */ + const ITensor *_gx; /**< Source tensor - Gx component */ + const ITensor *_gy; /**< Source tensor - Gy component */ + ITensor *_magnitude; /**< Destination tensor - Magnitude */ + ITensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform Gradient computation + */ +class NEGradientFP16Kernel : public NEGradientKernel +{ +public: + // Inherited methods overriden: + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override; +}; +#else /* ARM_COMPUTE_ENABLE_FP16 */ +using NEGradientFP16Kernel = NEGradientKernel; +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + +/** NEON kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE. + * + * @note Hysteresis is computed in @ref NEEdgeTraceKernel + */ +class NEEdgeNonMaxSuppressionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Default destructor */ + ~NEEdgeNonMaxSuppressionKernel() = default; + + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Common signature for all the specialised non-maxima suppression functions + * + * @param[in] magnitude_ptr Pointer to the first input tensor. + * @param[in] phase_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] stride_mag Stride of the magnitude tensor + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + */ + using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr, + const int32_t lower_thr); + + EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_magnitude; /**< Source tensor - Magnitude */ + const ITensor *_phase; /**< Source tensor - Quantized phase */ + ITensor *_output; /**< Destination tensor */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis */ +}; + +/** NEON kernel to perform Edge tracing */ +class NEEdgeTraceKernel : public INEKernel +{ +public: + /** Default constructor */ + NEEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default; + /** Default constructor */ + ~NEEdgeTraceKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge). + */ + void configure(ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; + +private: + ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NECANNYEDGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h new file mode 100644 index 0000000000..8b669a4d28 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ +#define __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel combine kernel */ +class NEChannelCombineKernel : public INEKernel +{ +public: + /** Default constructor */ + NEChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel(const NEChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel(NEChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default; + /** Default destructor */ + ~NEChannelCombineKernel() = default; + + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 + * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + */ + void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444 + */ + void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + /** Combine 3 planes to form a three channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_3C(const Window &win); + /** Combine 4 planes to form a four channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_4C(const Window &win); + /** Combine 3 planes to form a single plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + template + void combine_YUV_1p(const Window &win); + /** Combine 3 planes to form a two plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_2p(const Window &win); + /** Combine 3 planes to form a three plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_3p(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win, uint32_t plane_id); + /** Common signature for all the specialised ChannelCombine functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window); + /** ChannelCombine function to use for the particular tensor types passed to configure() */ + ChannelCombineFunction _func; + std::array _planes; + ITensor *_output; + IMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; + unsigned int _num_elems_processed_per_iteration; + bool _is_parallelizable; +}; +} +#endif /* __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h new file mode 100644 index 0000000000..0715e1f8cb --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ +#define __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel extract kernel */ +class NEChannelExtractKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel(const NEChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel(NEChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default; + /** Default destructor */ + ~NEChannelExtractKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Format supported: u8 + */ + void configure(const ITensor *input, Channel channel, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar destination image. Format supported: U8 + */ + void configure(const IMultiImage *input, Channel channel, IImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Extract one channel from a two channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_2C_img(const Window &win); + /** Extract one channel from a three channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_3C_img(const Window &win); + /** Extract one channel from a four channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_4C_img(const Window &win); + /** Extract U/V channel from a single planar YUVY/UYVY tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_YUYV_uv(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win); + /** Common signature for all the specialised ChannelExtract functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window); + /** ChannelExtract function to use for the particular tensor types passed to configure() */ + ChannelExtractFunction _func; + unsigned int _lut_index; +}; +} +#endif /* __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h new file mode 100644 index 0000000000..b808dc10af --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECOL2IMKERNEL_H__ +#define __ARM_COMPUTE_NECOL2IMKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform col2im reshaping. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class NECol2ImKernel : public INEKernel +{ +public: + /** Default constructor */ + NECol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel(const NECol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel &operator=(const NECol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + NECol2ImKernel(NECol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + NECol2ImKernel &operator=(NECol2ImKernel &&) = default; + /** Default destructor */ + ~NECol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const ITensor *input, ITensor *output, std::pair convolved_dims); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; + ITensor *_output; + std::pair _convolved_dims; +}; +} + +#endif /*__ARM_COMPUTE_NECOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h new file mode 100644 index 0000000000..3913be38ec --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_COLORCONVERTKERNEL_H__ +#define __ARM_COMPUTE_COLORCONVERTKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the color convert kernel */ +class NEColorConvertKernel : public INEKernel +{ +public: + /** Default constructor */ + NEColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel(const NEColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEColorConvertKernel(NEColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default; + /** Default destructor */ + ~NEColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/) + */ + void configure(const ITensor *input, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const IMultiImage *input, IImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const IImage *input, IMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const IMultiImage *input, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); + const void *_input; + void *_output; + unsigned int _num_elems_processed_per_iteration; + ColorConvertFunction *_func; +}; +} +#endif /*__ARM_COMPUTE_NECOLORCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h new file mode 100644 index 0000000000..588a228a5d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ +#define __ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/INESimpleKernel.h" + +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class NEConvolutionKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEConvolutionKernel(); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + template + void convolution(const Window &win); + +protected: + uint32_t _scale; /**< scale of the convolution */ + std::array _convolution; /**< convolution matrix */ +}; + +/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/ +using NEConvolution3x3Kernel = NEConvolutionKernel<3>; +/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/ +using NEConvolution5x5Kernel = NEConvolutionKernel<5>; +/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/ +using NEConvolution7x7Kernel = NEConvolutionKernel<7>; +///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/ +using NEConvolution9x9Kernel = NEConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution */ +template +class NESeparableConvolutionHorKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NESeparableConvolutionHorKernel(); + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U16, S16, S32. + * @param[in] conv_row Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor.. + * + * @param[in] window Window to apply the convolution on. + */ + template + void convolve(const Window &window); + + std::array _conv_row; /**< Convolution coefficients */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution */ +template +class NESeparableConvolutionVertKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NESeparableConvolutionVertKernel(); + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U16, S16, S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv_col Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as U16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_u16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S32. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s32(const Window &win); + + std::array _conv_col; /**< Convolution coefficients */ + uint32_t _scale; /**< Convolution's scale */ +}; + +/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/ +using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/ +using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/ +using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class NEConvolutionRectangleKernel : public INEKernel +{ +public: + /** Default constructor */ + NEConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + unsigned int get_index(uint32_t val); + /** Apply the object's convolution to the given window of the input tensor. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution(const Window &win); + +protected: + const ITensor *_input; /**< Input tensor */ + ITensor *_output; /**< Output tensor */ + uint32_t _scale; /**< Scale of the convolution */ + std::vector _convolution; /**< Convolution matrix */ + BorderSize _border_size; /**< Calculated border width */ + uint32_t _func_idx; /**< Index used to specify convolution function to be used */ + const static unsigned int _nr_supported_sizes + { + 4 + }; /**< Number of supported permutations */ +}; +} +#endif /*__ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h new file mode 100644 index 0000000000..6057b2f4ec --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_NECONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform reshaping on the weights used by convolution layer. + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class NEConvolutionLayerWeightsReshapeKernel : public INEKernel +{ +public: + /** Default constructor */ + NEConvolutionLayerWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayerWeightsReshapeKernel(const NEConvolutionLayerWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionLayerWeightsReshapeKernel &operator=(const NEConvolutionLayerWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvolutionLayerWeightsReshapeKernel(NEConvolutionLayerWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvolutionLayerWeightsReshapeKernel &operator=(NEConvolutionLayerWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~NEConvolutionLayerWeightsReshapeKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data types supported: F32 + * @param[in] bias The shared bias tensor to append. Biases are 1D tensor with dimensions [OFM]. Data types supported: Same as @p input + * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; + bool _has_bias; +}; +} + +#endif /*__ARM_COMPUTE_NECONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h new file mode 100644 index 0000000000..67b8c6052d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ +#define __ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class IDistribution1D; +class ILut; +class ITensor; +using IImage = ITensor; + +/** Interface for the cumulative distribution (cummulative summmation) calculation kernel. + * + * This kernel calculates the cumulative sum of a given distribution (meaning that each output element + * is the sum of all its previous elements including itself) and creates a lookup table with the normalized + * pixel intensities which is used for improve the constrast of the image. + */ +class NECumulativeDistributionKernel : public INEKernel +{ +public: + /** Default constructor */ + NECumulativeDistributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default; + /** Set the input and output distribution. + * + * @param[in] input Input image. Data type supported: U8 + * @param[in] distribution Unnormalized 256-bin distribution of the input image. + * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution. + * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements. + */ + void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; /**< Input image. */ + const IDistribution1D *_distribution; /**< Input histogram of the input image. */ + IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */ + ILut *_output; /**< Output with the equalization lookup table. */ +private: + static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */ +}; +} + +#endif /*__ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h new file mode 100644 index 0000000000..e92e09bfdf --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Depth conversion kernel */ +class NEDepthConvertKernel : public INESimpleKernel +{ +public: + /** Default constructor*/ + NEDepthConvertKernel(); + /** Set the input and output of the kernel + * + * Valid conversions Input -> Output : + * + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * + * + * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. + * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + ConvertPolicy _policy; + uint32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h new file mode 100644 index 0000000000..abb8a894c0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ +#define __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the derivative along the X/Y directions on a tensor. + * + */ +class NEDerivativeKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel(const NEDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDerivativeKernel(NEDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform derivative along the X direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_x(const Window &window); + /** Function to perform derivative along the Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_y(const Window &window); + /** Function to perform derivative along the X and Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_xy(const Window &window); + /** Common signature for all the specialised derivative functions + * + * @param[in] window Region on which to execute the kernel. + */ + using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window); + /** Derivative function to use for the particular tensor types passed to configure() */ + DerivativeFunction _func; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */ +}; +} +#endif /* __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDilateKernel.h b/arm_compute/core/NEON/kernels/NEDilateKernel.h new file mode 100644 index 0000000000..05f148a1fd --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDilateKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDILATEKERNEL_H__ +#define __ARM_COMPUTE_NEDILATEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image dilatation */ +class NEDilateKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEDILATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEErodeKernel.h b/arm_compute/core/NEON/kernels/NEErodeKernel.h new file mode 100644 index 0000000000..86dc217cc0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEErodeKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEERODEKERNEL_H__ +#define __ARM_COMPUTE_NEERODEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image erosion */ +class NEErodeKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEERODEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h new file mode 100644 index 0000000000..d9bd6acde9 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFASTCORNERSKERNEL_H__ +#define __ARM_COMPUTE_NEFASTCORNERSKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** NEON kernel to perform fast corners */ +class NEFastCornersKernel : public INEKernel +{ +public: + /** Constructor */ + NEFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel(const NEFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFastCornersKernel(NEFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default; + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Output image. Data type supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IImage *_input; /**< source image */ + IImage *_output; /**< inermediate results */ + uint8_t _threshold; /**< threshold on difference between intensity */ + bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */ +}; +} +#endif diff --git a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h new file mode 100644 index 0000000000..8e0846ea88 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLARRAYKERNEL_H__ +#define __ARM_COMPUTE_NEFILLARRAYKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */ +class NEFillArrayKernel : public INEKernel +{ +public: + /** Default contructor */ + NEFillArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel(const NEFillArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillArrayKernel(NEFillArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default; + /** Default detructor */ + ~NEFillArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[in] threshold Texels greater than the threshold will be added to the array. + * @param[out] output Arrays of keypoints to store the results. + */ + void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; + IKeyPointArray *_output; + uint8_t _threshold; +}; +} +#endif diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h new file mode 100644 index 0000000000..0829cc7d8a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLBORDERKERNEL_H__ +#define __ARM_COMPUTE_NEFILLBORDERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to fill borders */ +class NEFillBorderKernel : public INEKernel +{ +public: + /** Default Constructor */ + NEFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel(const NEFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillBorderKernel(NEFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default; + /** Default destructor */ + ~NEFillBorderKernel() = default; + + /** Initialise the function. + * + * @note This kernel fills the borders within the XY-planes. + * + * @param[in,out] tensor Tensor to process. Data types supported: U8, S16, S32, F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + template + void fill_replicate_single_channel(const Window &window); + template + void fill_constant_value_single_channel(const Window &window); + + ITensor *_tensor; + BorderSize _border_size; + BorderMode _mode; + PixelValue _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_NEFILLBORDERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h new file mode 100644 index 0000000000..1c8ef32ef7 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ +#define __ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to fill the interior borders */ +class NEFillInnerBorderKernel : public INEKernel +{ +public: + /** Default constructor */ + NEFillInnerBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillInnerBorderKernel(const NEFillInnerBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillInnerBorderKernel &operator=(const NEFillInnerBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillInnerBorderKernel(NEFillInnerBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillInnerBorderKernel &operator=(NEFillInnerBorderKernel &&) = default; + /** Default destructor */ + ~NEFillInnerBorderKernel() = default; + + /** Initialise the function. + * + * @note This kernel fills the borders within the XY-planes. + * + * @param[in,out] input Tensor to process. Data types supported: U8, S16, S32, F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, BorderSize border_size, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + template + void fill_value_single_channel(const Window &window); + + ITensor *_tensor; + BorderSize _border_size; + PixelValue _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..ab5787c88a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__ +#define __ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to interleave the elements of a matrix + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, width / 4 ] + */ +class NEGEMMInterleave4x4Kernel : public INESimpleKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor (Matrix A). Data types supported: F32, F16. + * @param[out] output Output tensor (Matrix A interleaved). Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h new file mode 100644 index 0000000000..ba4dcc3373 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply matrices + * + * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + */ +class NEGEMMLowpMatrixMultiplyKernel : public INEKernel +{ +public: + /** Constructor */ + NEGEMMLowpMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two + * kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8 + * @param[in] input1 Input tensor containing the transposed Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication, Data type supported: same as @p input0 + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Value to be multipied to each entry of the result. + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + int32_t _a_offset; + int32_t _b_offset; + int32_t _output_offset; + int32_t _output_mult_int; + int32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h new file mode 100644 index 0000000000..7d6806d338 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +/** NEON kernel to add a bias to each row of the input tensor */ +class NEGEMMMatrixAccumulateBiasesKernel : public INEKernel +{ +public: + /** Default constructor */ + NEGEMMMatrixAccumulateBiasesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixAccumulateBiasesKernel(const NEGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixAccumulateBiasesKernel &operator=(const NEGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAccumulateBiasesKernel(NEGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAccumulateBiasesKernel &operator=(NEGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Default destructor */ + ~NEGEMMMatrixAccumulateBiasesKernel() = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] accum The accumulate tensor to convert. Data type supported: F32 + * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input + */ + void configure(ITensor *accum, const ITensor *biases); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + ITensor *_accum; + const ITensor *_biases; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..d1eccec3c0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: + * + * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size + * + * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have: + * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel + * - MTX_1 = C + */ +class NEGEMMMatrixAdditionKernel : public INESimpleKernel +{ +public: + /** Constructor */ + NEGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: F32, F16. + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + */ + void configure(const ITensor *input, ITensor *output, const float beta); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + float _beta; +}; +} +#endif /* __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..f45fb0f825 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + */ +class NEGEMMMatrixMultiplyKernel : public INEKernel +{ +public: + /** Constructor */ + NEGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * These two kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F32, F16. + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + float _alpha; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..416b55f27c --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16. + * + * Following an example of how the transposition1xW works when the input data type is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * Following an example of how the transposition1xW works when the input data type is F16 + * + * @f[ + * \left( \begin{array}{cccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a7 \\ + * a10 & a11 & a12 & a13 & a14 & a15 & a16 & 17 \\ + * a20 & a21 & a22 & a23 & a24 & a25 & a26 & 27 \\ + * a30 & a31 & a32 & a33 & a34 & a35 & a36 & 37 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ + * \end{array} \right) + * @f] + * + * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ] + * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ] + * + */ +class NEGEMMTranspose1xWKernel : public INESimpleKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: F32, 16. + * @param[out] output Output tensor. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h new file mode 100644 index 0000000000..763fab88f6 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 3x3 filter */ +class NEGaussian3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: S16 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h new file mode 100644 index 0000000000..86b28907da --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */ +class NEGaussian5x5HorKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEGaussian5x5HorKernel(); + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; +}; + +/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */ +class NEGaussian5x5VertKernel : public INESimpleKernel +{ +public: + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h new file mode 100644 index 0000000000..decbb2024a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a GaussianPyramid (horizontal pass) */ +class NEGaussianPyramidHorKernel : public INEKernel +{ +public: + /** Default constructor */ + NEGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + ITensor *_output; +}; +/** NEON kernel to perform a GaussianPyramid (vertical pass) */ +class NEGaussianPyramidVertKernel : public INEKernel +{ +public: + /** Default constructor */ + NEGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h new file mode 100644 index 0000000000..24fa03263b --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ +#define __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG Orientation Binning */ +class NEHOGOrientationBinningKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~NEHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor + * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor + * @param[out] output_ptr Pointer to the output cell of hog space tensor + * @param[in] mag_stride Stride of the magnitude tensor + * @param[in] phase_stride Stride of the phase tensor + * @param[in] cell_width Width of the cell + * @param[in] cell_height Height of the cell + * @param[in] num_bins Number of bins for each cell + * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index + */ + using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, + size_t cell_height, size_t num_bins, float phase_scale); + /** Orientation binning function to use for the particular cell width passed to configure() */ + OrientBinFunc *_func; + const ITensor *_input_magnitude; + const ITensor *_input_phase; + ITensor *_output; + size_t _cell_width; + size_t _cell_height; + size_t _num_bins; + float _phase_scale; +}; + +/** NEON kernel to perform HOG block normalization */ +class NEHOGBlockNormalizationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~NEHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor + * @param[out] output_ptr Pointer to the output block of the hog normalized space + * @param[in] input_stride Stride of the input hog space tensor + * @param[in] num_cells_per_block_height Number of cells per block along the Y direction + * @param[in] num_bins_block_x Number of bins per block along the X direction + * @param[in] num_bins_block Number of total bins per block + * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization + */ + using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, + float l2_hyst_threshold); + /** Block normalization function to use for the particular normalization type passed to configure() */ + BlockNormFunc *_func; + const ITensor *_input; + ITensor *_output; + Size2D _num_cells_per_block; + Size2D _num_cells_per_block_stride; + size_t _num_bins; + float _l2_hyst_threshold; +}; +} +#endif /* __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h new file mode 100644 index 0000000000..bda213b9a3 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ +#define __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG detector kernel using linear SVM */ +class NEHOGDetectorKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGDetectorKernel(NEHOGDetectorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = default; + /** Default destructor */ + ~NEHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; + IDetectionWindowArray *_detection_windows; + const float *_hog_descriptor; + float _bias; + float _threshold; + uint16_t _idx_class; + size_t _num_bins_per_descriptor_x; + size_t _num_blocks_per_descriptor_y; + size_t _block_stride_width; + size_t _block_stride_height; + size_t _detection_window_width; + size_t _detection_window_height; + std::mutex _mutex; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h b/arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h new file mode 100644 index 0000000000..c602f06b8e --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGNONMAXIMASUPPRESSIONKERNEL_H__ +#define __ARM_COMPUTE_NEHOGNONMAXIMASUPPRESSIONKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** NEON kernel to perform in-place computation of euclidean distance based non-maxima suppression for HOG + * + * @note This kernel is meant to be used alongside HOG and performs a non-maxima suppression on a + * HOG detection window. + */ +class NEHOGNonMaximaSuppressionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGNonMaximaSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGNonMaximaSuppressionKernel(const NEHOGNonMaximaSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGNonMaximaSuppressionKernel &operator=(const NEHOGNonMaximaSuppressionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGNonMaximaSuppressionKernel(NEHOGNonMaximaSuppressionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGNonMaximaSuppressionKernel &operator=(NEHOGNonMaximaSuppressionKernel &&) = default; + /** Initialise the kernel's input, output and the euclidean minimum distance + * + * @param[in, out] input_output Input/Output array of @ref DetectionWindow + * @param[in] min_distance Radial Euclidean distance for non-maxima suppression + */ + void configure(IDetectionWindowArray *input_output, float min_distance); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + IDetectionWindowArray *_input_output; + float _min_distance; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGNONMAXIMASUPPRESSIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h new file mode 100644 index 0000000000..34e45886ac --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ +#define __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ + +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Common interface for all Harris Score kernels */ +class INEHarrisScoreKernel : public INEKernel +{ +public: + /** Default constructor */ + INEHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default; + /** Default destructor */ + ~INEHarrisScoreKernel() = default; + +public: + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported: S16, S32 + * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 + * @param[out] output Destination image (harris score). Data types supported: F32 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0; + +protected: + const IImage *_input1; /**< Source image - Gx component */ + const IImage *_input2; /**< Source image - Gy component */ + IImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ +}; + +/** Template NEON kernel to perform Harris Score. + * The implementation supports 3, 5, and 7 for the block_size + */ +template +class NEHarrisScoreKernel : public INEHarrisScoreKernel +{ +public: + /** Default constructor */ + NEHarrisScoreKernel(); + // Inherited methods overridden: + void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; + BorderSize border_size() const override; + void run(const Window &window) override; + +private: + /** Common signature for all the specialised harris score functions */ + using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, + float norm_factor, float sensitivity, float strength_thresh); + /** Harris Score function to use for the particular image types passed to configure() */ + HarrisScoreFunction *_func; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Interface for the accumulate Weighted kernel using F16 */ +template +class NEHarrisScoreFP16Kernel : public INEHarrisScoreKernel +{ +public: + /** Default constructor */ + NEHarrisScoreFP16Kernel(); + // Inherited methods overridden: + void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; + BorderSize border_size() const override; + void run(const Window &window) override; + +private: + using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, + float norm_factor, float sensitivity, float strength_thresh); + /** Harris Score function to use for the particular image types passed to configure() */ + HarrisScoreFunction *_func; +}; +#else +template +using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel; +#endif +} +#endif /* __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h new file mode 100644 index 0000000000..e11b41f1d0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHistogramKernel.h @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ +#define __ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include +#include + +namespace arm_compute +{ +class IDistribution1D; +class ITensor; +using IImage = ITensor; + +/** Interface for the histogram kernel */ +class NEHistogramKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHistogramKernel(); + /** Default destructor */ + ~NEHistogramKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel(const NEHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel &operator=(const NEHistogramKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHistogramKernel(NEHistogramKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHistogramKernel &operator=(NEHistogramKernel &&) = default; + + /** Set the input image and the distribution output. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution. + * @param[in,out] local_hist Array that the threads use to save their local histograms. + * It's size should be equal to (number_of_threads * num_bins), + * and the Window::thread_id() is used to determine the part of the array + * used by each thread. + * @param[out] window_lut LUT with pre-calculated possible window values. + * The size of the LUT should be equal to max_range_size and it will be filled + * during the configure stage, while it re-used in every run, therefore can be + * safely shared among threads. + */ + void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut); + /** Set the input image and the distribution output. + * + * @note Used for histogram of fixed size equal to 256 + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution which must be of 256 bins.. + */ + void configure(const IImage *input, IDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to merge multiple partial histograms. + * + * @param[out] global_hist Pointer to the final histogram. + * @param[in] local_hist Pointer to the partial histograms. + * @param[in] bins Number of bins. + */ + void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins); + /** Function to merge multiple minimum values of partial histograms. + * + * @param[out] global_min Pointer to the global min value. + * @param[in] local_min Local min value. + */ + void merge_min(uint8_t *global_min, const uint8_t &local_min); + /** Function to perform histogram on the given window + * + * @param[in] win Region on which to execute the kernel + */ + void histogram_U8(const Window &win); + /** Function to perform histogram on the given window where histogram is + * of fixed size 256 without ranges and offsets. + * + * @param[in] win Region on which to execute the kernel + */ + void histogram_fixed_U8(const Window &win); + /** Pre-calculate the pixel windowing for every possible pixel + * + * Calculate (V - offset) * numBins / range where V is every possible pixel value. + * + * @note We currently support U8 image thus possible pixel values are between 0 and 255 + */ + void calculate_window_lut() const; + /** Common signature for all the specialised Histogram functions + * + * @param[in] window Region on which to execute the kernel. + */ + using HistogramFunction = void (NEHistogramKernel::*)(const Window &window); + /** Histogram function to use for the particular image types passed to configure() */ + HistogramFunction _func; + +private: + const IImage *_input; + IDistribution1D *_output; + uint32_t *_local_hist; + uint32_t *_window_lut; + std::mutex _hist_mtx; + static constexpr unsigned int _max_range_size{ 256 }; //< 256 possible pixel values as we handle only U8 images +}; + +/** Interface for the histogram border handling kernel. + * + * @note If the image width is not a multiple of the number of elements processed by @ref NEHistogramKernel + * this kernel is used to handle the leftover columns. + */ +class NEHistogramBorderKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHistogramBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramBorderKernel(const NEHistogramBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramBorderKernel &operator=(const NEHistogramBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHistogramBorderKernel(NEHistogramBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHistogramBorderKernel &operator=(NEHistogramBorderKernel &&) = default; + /** Default destructor */ + ~NEHistogramBorderKernel() = default; + + /** Set the input image and the distribution output. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution. + * @param[in] window_lut LUT with precalculated possible window values. + * @param[in] hist_elements_per_thread Pixels per thread that the histogram kernel computes. + */ + void configure(const IImage *input, IDistribution1D *output, uint32_t *window_lut, const unsigned int hist_elements_per_thread); + /** Set the input image and the distribution output. + * + * @note Used for histogram of fixed size equal to 256 + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution. + * @param[in] hist_elements_per_thread Pixels per thread that the histogram kernel computes. + */ + void configure(const IImage *input, IDistribution1D *output, const unsigned int hist_elements_per_thread); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + /** Function to perform histogram on the given window + * + * @param[in] win Region on which to execute the kernel + */ + void histogram_U8(const Window &win); + /** Function to perform histogram on the given window where histogram is + * of fixed size 256 without ranges and offsets. + * + * @param[in] win Region on which to execute the kernel + */ + void histogram_fixed_U8(const Window &win); + /** Common signature for all the specialised Histogram functions + * + * @param[in] window Region on which to execute the kernel. + */ + using HistogramBorderFunction = void (NEHistogramBorderKernel::*)(const Window &window); + /** Histogram function to use for the particular image types passed to configure() */ + HistogramBorderFunction _func; + +private: + const IImage *_input; + IDistribution1D *_output; + uint32_t *_window_lut; + static constexpr unsigned int _max_range_size{ 256 }; //< 256 possible pixel values as we handle only U8 images +}; +} + +#endif /*__ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h new file mode 100644 index 0000000000..ba5077a487 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEIM2COLKERNEL_H__ +#define __ARM_COMPUTE_NEIM2COLKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class NEIm2ColKernel : public INEKernel +{ +public: + /** Default constructor */ + NEIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel(const NEIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + NEIm2ColKernel(NEIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default; + /** Default destructor */ + ~NEIm2ColKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32 + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] convolved_dims The convolved output dimensions. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + */ + void configure(const ITensor *input, ITensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Run the im2col used for the convolution layer case + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + void run_reduced(const Window &window); + /** Run the im2col optimised for the fully connected layer case + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + void run_generic(const Window &window); + /** Common signature for all the specialised im2col functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window); + + Im2ColFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + std::pair _convolved_dims; + PadStrideInfo _conv_info; + unsigned int _kernel_size; + bool _has_bias; +}; +} +#endif /*__ARM_COMPUTE_NEIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h new file mode 100644 index 0000000000..13647889ab --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ +#define __ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform an image integral on an image */ +class NEIntegralImageKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U32 + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; +}; +} +#endif /*__ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h new file mode 100644 index 0000000000..e578a6afdb --- /dev/null +++ b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LKTRACKERKERNEL_H__ +#define __ARM_COMPUTE_LKTRACKERKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Internal keypoint class for Lucas-Kanade Optical Flow */ +struct NELKInternalKeypoint +{ + float x{ 0.f }; /**< x coordinate of the keypoint */ + float y{ 0.f }; /**< y coordinate of the keypoint */ + bool tracking_status{ false }; /**< the tracking status of the keypoint */ +}; + +using INELKInternalKeypointArray = IArray; + +/** Interface for the Lucas-Kanade tracker kernel */ +class NELKTrackerKernel : public INEKernel +{ +public: + /** Default constructor */ + NELKTrackerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel(const NELKTrackerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete; + /** Allow instances of this class to be moved */ + NELKTrackerKernel(NELKTrackerKernel &&) = default; + /** Allow instances of this class to be moved */ + NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default; + /** Default destructor */ + ~NELKTrackerKernel() = default; + + /** Initialise the kernel input and output + * + * @param[in] input_old Pointer to the input old tensor. Data type supported: U8 + * @param[in] input_new Pointer to the input new tensor. Data type supported. U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points + * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + * @param[in] border_offset The offset used to define the boundary of the tracked pixels in different border modes + */ + void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy, + const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points, + INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal, + Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension, + size_t level, size_t num_levels, float pyramid_scale, int32_t border_offset); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Initialise the array of keypoints in the provide range + * + * @param[in] start Index of first element in the keypoints array to be initialised + * @param[in] end Index after last elelemnt in the keypoints array to be initialised + */ + void init_keypoints(int start, int end); + /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y + * + * @param[in] keypoint Keypoint for which gradients are computed + * @param[out] bilinear_x Intermediate interpolated data for X gradient + * @param[out] bilinear_y Intermediate interpolated data for Y gradient + * + * @return Values A11, A12, A22 + */ + std::tuple compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int *bilinear_ix, int *bilinear_iy); + /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y} + * + * @param[in] old_keypoint Old keypoint for which gradient is computed + * @param[in] new_keypoint New keypoint for which gradient is computed + * @param[in] bilinear_x Intermediate interpolated data for X gradient + * @param[in] bilinear_y Intermediate interpolated data for Y gradient + * + * @return Values b1, b2 + */ + std::pair compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int *bilinear_ix, const int *bilinear_iy); + + const ITensor *_input_old; + const ITensor *_input_new; + const ITensor *_old_scharr_gx; + const ITensor *_old_scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + INELKInternalKeypointArray *_old_points_internal; + INELKInternalKeypointArray *_new_points_internal; + Termination _termination; + bool _use_initial_estimate; + float _pyramid_scale; + float _epsilon; + unsigned int _num_iterations; + int _window_dimension; + size_t _level; + size_t _num_levels; + int32_t _border_offset; +}; +} +#endif /*__ARM_COMPUTE_NELKTRACKERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h new file mode 100644 index 0000000000..5d49901dd0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ +#define __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Template interface for the kernel to compute magnitude and phase */ +template +class NEMagnitudePhaseKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMagnitudePhaseKernel(); + /** Destructor */ + ~NEMagnitudePhaseKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete; + /** Default move constructor */ + NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete; + /** Default move assignment operator */ + NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default; + + /** Initialise the kernel's input, output. + * + * @note At least one of out1 or out2 must be set + * + * @param[in] gx Gradient X tensor. Data type supported: S16. + * @param[in] gy Gradient Y tensor. Data type supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. + */ + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to perform magnitude on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude(const Window &window); + /** Function to perform phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void phase(const Window &window); + /** Function to perform magnitude and phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude_phase(const Window &window); + +private: + /** Common signature for all the specialised MagnitudePhase functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window); + /** MagnitudePhase function to use for the particular formats passed to configure() */ + MagnitudePhaseFunctionPtr _func; + const ITensor *_gx; /**< Input gradient X */ + const ITensor *_gy; /**< Input gradient Y */ + ITensor *_magnitude; /**< Output - Magnitude */ + ITensor *_phase; /**< Output - Phase */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Template interface for the kernel to compute magnitude and phase */ +template +class NEMagnitudePhaseFP16Kernel : public INEKernel +{ +public: + /** Default constructor */ + NEMagnitudePhaseFP16Kernel(); + /** Destructor */ + ~NEMagnitudePhaseFP16Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseFP16Kernel(const NEMagnitudePhaseFP16Kernel &) = delete; + /** Default move constructor */ + NEMagnitudePhaseFP16Kernel(NEMagnitudePhaseFP16Kernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseFP16Kernel &operator=(const NEMagnitudePhaseFP16Kernel &) = delete; + /** Default move assignment operator */ + NEMagnitudePhaseFP16Kernel &operator=(NEMagnitudePhaseFP16Kernel &&) = default; + + /** Initialise the kernel's input, output. + * + * @note At least one of out1 or out2 must be set + * + * @param[in] gx Gradient X tensor. Data type supported: S16. + * @param[in] gy Gradient Y tensor. Data type supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. + */ + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to perform magnitude on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude(const Window &window); + /** Function to perform phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void phase(const Window &window); + /** Function to perform magnitude and phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude_phase(const Window &window); + + /** Common signature for all the specialised MagnitudePhase functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseFP16Kernel::*)(const Window &window); + /** MagnitudePhase function to use for the particular formats passed to configure() */ + MagnitudePhaseFunctionPtr _func; + const ITensor *_gx; /**< Input gradient X */ + const ITensor *_gy; /**< Input gradient Y */ + ITensor *_magnitude; /**< Output - Magnitude */ + ITensor *_phase; /**< Output - Phase */ +}; +#else +template +using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel; +#endif +} +#endif /* __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h new file mode 100644 index 0000000000..83407ccb7d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ +#define __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class NEMeanStdDevKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMeanStdDevKernel(NEMeanStdDevKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = default; + /** Default destructor */ + ~NEMeanStdDevKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. + */ + void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IImage *_input; + float *_mean; + float *_stddev; + uint64_t *_global_sum; + uint64_t *_global_sum_squared; + std::mutex _mtx; +}; +} +#endif /* __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h new file mode 100644 index 0000000000..dee1aadfb9 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ +#define __ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform a median filter on a tensor */ +class NEMedian3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h new file mode 100644 index 0000000000..b18dc001b0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ +#define __ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to perform min max search on an image. */ +class NEMinMaxKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel(const NEMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMinMaxKernel(NEMinMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMinMaxKernel &operator=(NEMinMaxKernel &&) = default; + /** Default destructor */ + ~NEMinMaxKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8/S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + */ + void configure(const IImage *input, int32_t *min, int32_t *max); + /** Resets global minimum and maximum. */ + void reset(); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Performs the min/max algorithm on U8 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_U8(const Window &win); + /** Performs the min/max algorithm on S16 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_S16(const Window &win); + /** Common signature for all the specialised MinMax functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxFunction = void (NEMinMaxKernel::*)(const Window &window); + /** MinMax function to use for the particular image types passed to configure() */ + MinMaxFunction _func; + /** Helper to update min/max values **/ + template + void update_min_max(T min, T max); + + const IImage *_input; /**< Input image. */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + int32_t _min_init; /**< Value to initialise global minimum value. */ + int32_t _max_init; /**< Value to initialise global maximum value. */ + std::mutex _mtx; /**< Mutex used for result reduction. */ +}; + +/** Interface for the kernel to find min max locations of an image. */ +class NEMinMaxLocationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default; + /** Default destructor */ + ~NEMinMaxLocationKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8 or S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc Array of minimum value locations. + * @param[out] max_loc Array of maximum value locations. + * @param[out] min_count Number of minimum value encounters. + * @param[out] max_count Number of maximum value encounters. + */ + void configure(const IImage *input, int32_t *min, int32_t *max, + ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + /** Performs the min/max location algorithm on T type images on a given window. + * + * @param win The window to run the algorithm on. + */ + template + void minmax_loc(const Window &win); + /** Common signature for all the specialised MinMaxLoc functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window); + /** MinMaxLoc function to use for the particular image types passed to configure() */ + MinMaxLocFunction _func; + /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */ + template + struct create_func_table; + + const IImage *_input; /**< Input image. */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Count of minimum value encounters. */ + uint32_t *_max_count; /**< Count of maximum value encounters. */ + ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */ + ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */ + unsigned int _processed_elements; /**< Elements processed per iteration. */ +}; +} +#endif /*__ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h new file mode 100644 index 0000000000..ede0294a73 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ +#define __ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to apply a non-linear filter */ +class NENonLinearFilterKernel : public INEKernel +{ +public: + /** Default constructor */ + NENonLinearFilterKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Fill mask with the corresponding given pattern. + * + * @param[in,out] mask Mask to be filled according to pattern + * @param[in] cols Columns (width) of mask + * @param[in] rows Rows (height) of mask + * @param[in] pattern Pattern to fill the mask according to + */ + void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern); + /** Apply a median filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_box(const Window &win); + /** Apply a min filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_box(const Window &win); + /** Apply a max filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_box(const Window &win); + /** Apply a median filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_cross(const Window &win); + /** Apply a min filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_cross(const Window &win); + /** Apply a max filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_cross(const Window &win); + /** Apply a median filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_disk(const Window &win); + /** Apply a min filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_disk(const Window &win); + /** Apply a max filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_disk(const Window &win); + /** Apply a non-linear filter when given mask has user-defined pattern. + * + * @param[in] win Window to apply the filter on. + */ + template + void non_linear_filter_generic(const Window &win); + +private: + unsigned int _border_width; + const ITensor *_input; + ITensor *_output; + const uint8_t *_mask; + MatrixPattern _pattern; + NonLinearFilterFunction _function; + unsigned int _func_idx; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..e74f9c207d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ +#define __ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON + * + * @note Used by @ref NEFastCorners and @ref NEHarrisCorners + */ +class NENonMaximaSuppression3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NENonMaximaSuppression3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default; + /** Default destructor */ + ~NENonMaximaSuppression3x3Kernel() = default; + + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +protected: + /** Common signature for all the specialised non-maxima suppression 3x3 functions + * + * @param[in] input_ptr Pointer to the input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] input_stride Stride of the input tensor + */ + using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride); + /** Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + NonMaxSuppr3x3Function *_func; + + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform Non-Maxima suppression 3x3 + */ +class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel +{ +public: + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); +}; +#else +using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; +#endif +} +#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h new file mode 100644 index 0000000000..18d198c0e7 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the normalization layer kernel. + */ +class NENormalizationLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NENormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NENormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data type supported: same as @p input + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform normalization depending on the given templates dimension. + * + * @note Only normalization across X and Z is currently supported and tested. + * + * @param window Region on which to execute the kernel. + */ + template + void normalize(const Window &window); + /** Common signature for all the specialised normalization functions + * + * @param window Region on which to execute the kernel. + */ + using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window); + +private: + NormalizationFunction _func; + const ITensor *_input; + const ITensor *_input_squared; + ITensor *_output; + NormalizationLayerInfo _norm_info; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..0891d0c6be --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ +#define __ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEPixelWiseMultiplicationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default; + /** Default destructor */ + ~NEPixelWiseMultiplicationKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F32. + * @param[in] input2 An input tensor. Data types supported: U8, S16, F32. + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. + * @param[in] rounding_policy Rounding policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised multiplication functions with integer scaling factor + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor + */ + using MulFunctionInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale); + /** Common signature for all the specialised multiplication functions with float scaling factor + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor + */ + using MulFunctionFloat = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale); + + MulFunctionFloat *_func_float; + MulFunctionInt *_func_int; + +private: + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; + float _scale; + int _scale_exponent; +}; +} +#endif /*__ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h new file mode 100644 index 0000000000..728b2ffde3 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the pooling layer kernel */ +class NEPoolingLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform 2x2 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3(const Window &window_input, const Window &window); + /** Common signature for all the specialised Pooling functions + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window); + +private: + PoolingFunction _func; + const ITensor *_input; + ITensor *_output; + PoolingLayerInfo _pool_info; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NERemapKernel.h b/arm_compute/core/NEON/kernels/NERemapKernel.h new file mode 100644 index 0000000000..f9eae68ee8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NERemapKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEREMAPKERNEL_H__ +#define __ARM_COMPUTE_NEREMAPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a remap on a tensor */ +class NERemapKernel : public INEKernel +{ +public: + /** Default constructor */ + NERemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel(const NERemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel &operator=(const NERemapKernel &) = delete; + /** Allow instances of this class to be moved */ + NERemapKernel(NERemapKernel &&) = default; + /** Allow instances of this class to be moved */ + NERemapKernel &operator=(NERemapKernel &&) = default; + /** Default destructor */ + ~NERemapKernel() = default; + + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] map_x Map for X coordinates. Data type supported: F32. + * @param[in] map_y Map for Y coordinates. Data type supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + */ + void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** function to perform nearest interpolation on the given window */ + void remap_nearest(const Window &window); + /** function to perform bilinear interpolation on the given window */ + void remap_bilinear(const Window &window); + /** Remap function to use for the particular interpolation type passed to configure() */ + void (NERemapKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input image */ + ITensor *_output; /**< Output image */ + const ITensor *_map_x; /**< Input remap x coordinates */ + const ITensor *_map_y; /**< Input remap y coordinates */ +}; +} +#endif /*__ARM_COMPUTE_NEREMAPKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h new file mode 100644 index 0000000000..0f11e7e66e --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCALEKERNEL_H__ +#define __ARM_COMPUTE_NESCALEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform scaling on a tensor */ +class NEScaleKernel : public INEKernel +{ +public: + /** Default constructor */ + NEScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel(const NEScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel &operator=(const NEScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEScaleKernel(NEScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEScaleKernel &operator=(NEScaleKernel &&) = default; + /** Default destructor */ + ~NEScaleKernel() = default; + + /** Initialise the kernel's inputs, output and interpolation policy + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * + * @param[in] input Source tensor. Data types supported: U8 or S16. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 + * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. + * @param[out] output Destination tensor. Data types supported: U8 or S16. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy Interpolation type to use + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** function to perform scale using nearest interpolation on the given window */ + void scale_nearest(const Window &window); + /** function to perform scale using bilinear interpolation on the given window */ + void scale_bilinear(const Window &window); + /** function to perform scale using area interpolation on the given window + * + * @note Used only in case down-sampling. + */ + void scale_area(const Window &window); + /** Scale function to use for the particular interpolation type passed to configure() */ + void (NEScaleKernel::*_func)(const Window &window); + + const ITensor *_offsets; + const ITensor *_dx; + const ITensor *_dy; + const ITensor *_input; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NESCALEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h new file mode 100644 index 0000000000..c618456d49 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHARR3x3KERNEL_H__ +#define __ARM_COMPUTE_NESCHARR3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * +* @f[ +* \mathbf{G}_x=\begin{vmatrix} +* -3 & 0 & +3\\ +* -10& 0 & +10\\ +* -3 & 0 & +3 +* \end{vmatrix} +* @f] +*/ +class NEScharr3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NEScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default; + /** Default destructor */ + ~NEScharr3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for scharr X */ + ITensor *_output_y; /**< Output tensor for scharr Y */ +}; +} +#endif /*__ARM_COMPUTE_NESCHARR3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h new file mode 100644 index 0000000000..246dd83573 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL3x3KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -1 & 0 & +1\\ + * -2 & 0 & +2\\ + * -1 & 0 & +1 + * \end{vmatrix} + * @f] +*/ +class NESobel3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel(const NESobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel(NESobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default; + /** Default destructor */ + ~NESobel3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for sobel X */ + ITensor *_output_y; /**< Output tensor for sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h new file mode 100644 index 0000000000..49c1c41e6d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL5x5KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL5x5KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. + * + */ +class NESobel5x5HorKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default; + /** Default destructor */ + ~NESobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. + * +*/ +class NESobel5x5VertKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default; + /** Default destructor */ + ~NESobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16. + * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16. + * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + ITensor *_input_x; /**< X input (X output of the hor pass) */ + ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL5x5KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h new file mode 100644 index 0000000000..4bff8596b8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL7x7KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL7x7KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. + * + */ +class NESobel7x7HorKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default; + /** Default destructor */ + ~NESobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor. + * +*/ +class NESobel7x7VertKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default; + /** Default destructor */ + ~NESobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * @note If output_x is set then input_x must be set too + * @note If output_y is set then input_y must be set too + * + * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input_x; /**< X input (X output of the hor pass) */ + const ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL7x7KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h new file mode 100644 index 0000000000..7df85581db --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ +#define __ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the identifying the max value of 1D Logits */ +class NELogits1DMaxKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NELogits1DMaxKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; + +/** Interface for shifting the logits values around the max value and exponentiating the result */ +class NELogits1DShiftExpSumKernel : public INEKernel +{ +public: + /** Default constructor */ + NELogits1DShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DShiftExpSumKernel(const NELogits1DShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DShiftExpSumKernel &operator=(const NELogits1DShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DShiftExpSumKernel(NELogits1DShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DShiftExpSumKernel &operator=(NELogits1DShiftExpSumKernel &&) = default; + /** Default destructor */ + ~NELogits1DShiftExpSumKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[in] max Max values tensor. Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input. + */ + void configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; + const ITensor *_max; + ITensor *_output; + ITensor *_sum; +}; + +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class NELogits1DNormKernel : public INEKernel +{ +public: + /** Default constructor */ + NELogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DNormKernel(const NELogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DNormKernel &operator=(const NELogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DNormKernel(NELogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DNormKernel &operator=(NELogits1DNormKernel &&) = default; + /** Default destructor */ + ~NELogits1DNormKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[in] sum Sum tensor. The number of dimensions should be dim(input)-1. Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + */ + void configure(const ITensor *input, const ITensor *sum, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; + const ITensor *_sum; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h new file mode 100644 index 0000000000..499b87f0ba --- /dev/null +++ b/arm_compute/core/NEON/kernels/NETableLookupKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; +class ILut; + +/** Interface for the kernel to perform table lookup calculations. */ +class NETableLookupKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NETableLookupKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel(const NETableLookupKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel &operator=(const NETableLookupKernel &) = delete; + /** Allow instances of this class to be moved */ + NETableLookupKernel(NETableLookupKernel &&) = default; + /** Allow instances of this class to be moved */ + NETableLookupKernel &operator=(NETableLookupKernel &&) = default; + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. + * @param[out] output The output tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ILut *lut, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Perform table lookup on a given window. + * + * @param window window Region on which to execute the kernel. + */ + template + void tableLookup(const Window &window); + /** Common signature for all the specialised lut functions + * + * @param[in] window Region on which to execute the kernel. + */ + using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window); + /** Sub function to use for the particular tensor types passed to configure() */ + TableLookupFunction _func; + const ILut *_lut; +}; +} +#endif /* __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h new file mode 100644 index 0000000000..778176293f --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEThresholdKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETHRESHOLDKERNEL_H__ +#define __ARM_COMPUTE_NETHRESHOLDKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the thresholding kernel + * + */ +class NEThresholdKernel : public INEKernel +{ +public: + /** Constructor + * Initialize all the pointers to nullptr and parameters to zero. + */ + NEThresholdKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel(const NEThresholdKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; + /** Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + * @param[in] threshold Threshold. When the threhold type is RANGE, this is used as the lower threshold. + * @param[in] false_value value to set when the condition is not respected. + * @param[in] true_value value to set when the condition is respected. + * @param[in] type Thresholding type. Either RANGE or BINARY. + * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + */ + void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** run binary thresholding on the given window */ + void run_binary(const Window &window); + /** run range thresholding on the given window */ + void run_range(const Window &window); + + void (NEThresholdKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input */ + ITensor *_output; /**< Output */ + uint8_t _threshold; + uint8_t _false_value; + uint8_t _true_value; + uint8_t _upper; +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h new file mode 100644 index 0000000000..4d8238366a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ +#define __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class NETransposeKernel : public INEKernel +{ +public: + /** Default constructor */ + NETransposeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel(const NETransposeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel &operator=(const NETransposeKernel &) = delete; + /** Allow instances of this class to be moved */ + NETransposeKernel(NETransposeKernel &&) = default; + /** Allow instances of this class to be moved */ + NETransposeKernel &operator=(NETransposeKernel &&) = default; + /** Default destructor */ + ~NETransposeKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the transpose functions + * + * @param[in] input An input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + */ + using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window); + /** Transpose function to use for the particular tensor types passed to configure() */ + TransposeFunction *_func; + const ITensor *_input; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEWarpKernel.h b/arm_compute/core/NEON/kernels/NEWarpKernel.h new file mode 100644 index 0000000000..10fed1d450 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEWarpKernel.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPKERNEL_H__ +#define __ARM_COMPUTE_NEWARPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Common interface for warp affine and warp perspective */ +class INEWarpKernel : public INEKernel +{ +public: + /** Default constructor */ + INEWarpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel(const INEWarpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel &operator=(const INEWarpKernel &) = delete; + /** Allow instances of this class to be moved */ + INEWarpKernel(INEWarpKernel &&) = default; + /** Allow instances of this class to be moved */ + INEWarpKernel &operator=(INEWarpKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float. + * @param[in] border_mode Strategy to use for borders + * @param[in] constant_border_value Constant value used for filling the border. + */ + virtual void configure(const ITensor *input, ITensor *output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_undefined(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_constant(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_replicate(const Window &window) = 0; + /** Common signature for all the specialised warp functions + * + * @param[in] window Region on which to execute the kernel. + */ + void (INEWarpKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input Tensor */ + ITensor *_output; /**< Output Tensor */ + uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */ + const float *_matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */ +}; + +/** Template interface for the kernel to compute warp affine + * + */ +template +class NEWarpAffineKernel : public INEWarpKernel +{ +private: + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; + +/** Template interface for the kernel to compute warp perspective + * + */ +template +class NEWarpPerspectiveKernel : public INEWarpKernel +{ +private: + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_NEWARPKERNEL_H__ */ diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h new file mode 100644 index 0000000000..8a45444c57 --- /dev/null +++ b/arm_compute/core/PixelValue.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PIXELVALUE_H__ +#define __ARM_COMPUTE_PIXELVALUE_H__ + +#include + +namespace arm_compute +{ +/** Class describing the value of a pixel for any image format. */ +class PixelValue +{ +public: + /** Default constructor: value initialized to 0 */ + PixelValue() + : value{ { 0 } } + { + } + /** Initialize the union with a U8 pixel value + * + * @param[in] v U8 value. + */ + PixelValue(uint8_t v) + : PixelValue() + { + value.u8 = v; + } + /** Initialize the union with a U16 pixel value + * + * @param[in] v U16 value. + */ + PixelValue(uint16_t v) + : PixelValue() + { + value.u16 = v; + } + /** Initialize the union with a S16 pixel value + * + * @param[in] v S16 value. + */ + PixelValue(int16_t v) + : PixelValue() + { + value.s16 = v; + } + /** Initialize the union with a U32 pixel value + * + * @param[in] v U32 value. + */ + PixelValue(uint32_t v) + : PixelValue() + { + value.u32 = v; + } + /** Initialize the union with a S32 pixel value + * + * @param[in] v S32 value. + */ + PixelValue(int32_t v) + : PixelValue() + { + value.s32 = v; + } + /** Initialize the union with a F32 pixel value + * + * @param[in] v F32 value. + */ + PixelValue(float v) + : PixelValue() + { + value.f32 = v; + } + /** Union which describes the value of a pixel for any image format. + * Use the field corresponding to the image format + */ + union + { + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + float f32; /**< Single channel float 32 */ + uint8_t u8; /**< Single channel U8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ + } value; + /** Interpret the pixel value as a U8 + * + * @param[out] v Returned value + */ + void get(uint8_t &v) const + { + v = value.u8; + } + /** Interpret the pixel value as a U16 + * + * @param[out] v Returned value + */ + void get(uint16_t &v) const + { + v = value.u16; + } + /** Interpret the pixel value as a S16 + * + * @param[out] v Returned value + */ + void get(int16_t &v) const + { + v = value.s16; + } + /** Interpret the pixel value as a U32 + * + * @param[out] v Returned value + */ + void get(uint32_t &v) const + { + v = value.u32; + } + /** Interpret the pixel value as a S32 + * + * @param[out] v Returned value + */ + void get(int32_t &v) const + { + v = value.s32; + } + /** Interpret the pixel value as a F32 + * + * @param[out] v Returned value + */ + void get(float &v) const + { + v = value.f32; + } +}; +} +#endif /* __ARM_COMPUTE_PIXELVALUE_H__ */ diff --git a/arm_compute/core/PyramidInfo.h b/arm_compute/core/PyramidInfo.h new file mode 100644 index 0000000000..917a14d504 --- /dev/null +++ b/arm_compute/core/PyramidInfo.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PYRAMIDINFO_H__ +#define __ARM_COMPUTE_PYRAMIDINFO_H__ + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Store the Pyramid's metadata */ +class PyramidInfo +{ +public: + /** Default constructor */ + PyramidInfo(); + /** Default destructor */ + virtual ~PyramidInfo() = default; + /** Allow instances of this class to be copy constructed */ + PyramidInfo(const PyramidInfo &) = default; + /** Allow instances of this class to be copied */ + PyramidInfo &operator=(const PyramidInfo &) = default; + /** Allow instances of this class to be move constructed */ + PyramidInfo(PyramidInfo &&) = default; + /** Allow instances of this class to be moved */ + PyramidInfo &operator=(PyramidInfo &&) = default; + /** Initialize pyramid's metadata for 2D tensors + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] width The width of the 2D tensor at 0th pyramid level + * @param[in] height The height of the 2D tensor at 0th pyramid level + * @param[in] format The format of all 2D tensors in the pyramid + * NV12, NV21, IYUV, UYVY and YUYV formats are not supported. + */ + void init(size_t num_levels, float scale, size_t width, size_t height, Format format); + /** Initialize pyramid's metadata using TensorShape + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements + * @param[in] format The format of all tensors in the pyramid + */ + void init(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format); + /** Return the number of the pyramid levels + * + * @return The number of the pyramid levels + */ + size_t num_levels() const; + /** Return the width of the 0th level tensor + * + * @return The width of the 0th level tensor + */ + size_t width() const; + /** Return the height of the 0th level tensor + * + * @return The height of the 0th level tensor + */ + size_t height() const; + /** Return the TensorShape of the o-th level tensor + * + * @return + */ + const TensorShape &tensor_shape() const; + /** Return the image format of all tensor in the pyramid + * + * @return The image format + */ + Format format() const; + /** Return the scale factor of the pyramid + * + * @return Return the scale factor + */ + float scale() const; + +private: + size_t _num_levels; + TensorShape _tensor_shape; + Format _format; + float _scale; +}; +} +#endif /*__ARM_COMPUTE_PYRAMIDINFO_H__ */ diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h new file mode 100644 index 0000000000..cb053ea2c4 --- /dev/null +++ b/arm_compute/core/Size2D.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SIZE2D_H__ +#define __ARM_COMPUTE_SIZE2D_H__ + +#include + +namespace arm_compute +{ +/** Class for specifying the size of an image or rectangle */ +class Size2D +{ +public: + /** Default constructor */ + Size2D() + : width(0), height(0) + { + } + /** Constructor. Initializes "width" and "height" respectively with "w" and "h" + * + * @param[in] w Width of the image or rectangle + * @param[in] h Height of the image or rectangle + */ + Size2D(size_t w, size_t h) + : width(w), height(h) + { + } + /** Constructor. Initializes "width" and "height" with the dimensions of "size" + * + * @param[in] size Size data object + */ + Size2D(const Size2D &size) + : width(size.width), height(size.height) + { + } + /** Copy assignment + * + * @param[in] size Constant reference input "Size2D" data object to copy + * + * @return Reference to the newly altered left hand side "Size2D" data object + */ + Size2D &operator=(const Size2D &size) + { + width = size.width; + height = size.height; + return *this; + } + /** The area of the image or rectangle calculated as (width * height) + * + * @return Area (width * height) + * + */ + size_t area() const + { + return (width * height); + } + +public: + size_t width; /**< Width of the image region or rectangle */ + size_t height; /**< Height of the image region or rectangle */ +}; +} +#endif /*__ARM_COMPUTE_SIZE2D_H__ */ diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h new file mode 100644 index 0000000000..4706c81696 --- /dev/null +++ b/arm_compute/core/Steps.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_STEPS_H__ +#define __ARM_COMPUTE_STEPS_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Class to describe a number of elements in each dimension. Similar to @ref + * Strides but not in bytes but number of elements. + */ +class Steps : public Dimensions +{ +public: +#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ + /** Constructor to initialize the steps. + * + * @param[in] steps Values to initialize the steps. + */ + template + Steps(Ts... steps) + : Dimensions{ steps... } + { + // Initialize empty dimensions to 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } +#endif + /** Allow instances of this class to be copy constructed */ + constexpr Steps(const Steps &) = default; + /** Allow instances of this class to be copied */ + Steps &operator=(const Steps &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Steps(Steps &&) = default; + /** Allow instances of this class to be moved */ + Steps &operator=(Steps &&) = default; + /** Default destructor */ + ~Steps() = default; +}; +} +#endif /*__ARM_COMPUTE_STEPS_H__*/ diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h new file mode 100644 index 0000000000..efdeb11bbb --- /dev/null +++ b/arm_compute/core/Strides.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_STRIDES_H__ +#define __ARM_COMPUTE_STRIDES_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Strides of an item in bytes */ +class Strides : public Dimensions +{ +public: +#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ + /** Constructor to initialize the strides. + * + * @param[in] strides Values to initialize the strides. + */ + template + constexpr Strides(Ts... strides) + : Dimensions{ strides... } + { + } +#endif + /** Allow instances of this class to be copy constructed */ + constexpr Strides(const Strides &) = default; + /** Allow instances of this class to be copied */ + Strides &operator=(const Strides &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Strides(Strides &&) = default; + /** Allow instances of this class to be moved */ + Strides &operator=(Strides &&) = default; + /** Default destructor */ + ~Strides() = default; +}; +} +#endif /*__ARM_COMPUTE_STRIDES_H__*/ diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h new file mode 100644 index 0000000000..c0fbc2acac --- /dev/null +++ b/arm_compute/core/TensorInfo.h @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORINFO_H__ +#define __ARM_COMPUTE_TENSORINFO_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace arm_compute +{ +class HOGInfo; + +/** Store the tensor's metadata */ +class TensorInfo +{ +public: + /** Default constructor */ + TensorInfo(); + /** Default destructor */ + virtual ~TensorInfo() = default; + /** Allow instances of this class to be copy constructed */ + TensorInfo(const TensorInfo &) = default; + /** Allow instances of this class to be copied */ + TensorInfo &operator=(const TensorInfo &) = default; + /** Allow instances of this class to be move constructed */ + TensorInfo(TensorInfo &&) = default; + /** Allow instances of this class to be moved */ + TensorInfo &operator=(TensorInfo &&) = default; + /** 2D tensor constructor + * + * @param[in] width Width of the 2D tensor + * @param[in] height Height of the 2D tensor + * @param[in] format Single plane format of the tensor. + */ + TensorInfo(unsigned int width, unsigned int height, Format format); + /** Constructor + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + */ + TensorInfo(const TensorShape &tensor_shape, Format format); + /** Constructor + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_pos (Optional) It specifies the fixed point position when the tensor data type is INT8, INT16 or INT32. (Default = 0) + If 0, calculations are performed in integer math + */ + TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, size_t fixed_point_pos = 0); + /** Constructor + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height); + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + */ + void init(const TensorShape &tensor_shape, Format format); + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + * @param[in] strides_in_bytes Stride in bytes for accessing each dimension of the tensor. + * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. + * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). + */ + void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes); + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] fixed_point_pos (Optional) Fixed point position when the tensor data type is INT8, INT16 or INT32 (default = 0). + * If 0, calculations are performed in integer arithmetic. + */ + void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, size_t fixed_point_pos = 0); + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] strides_in_bytes Stride in bytes for accessing each dimension of the tensor. + * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. + * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). + * @param[in] fixed_point_pos (Optional) Fixed point position when the tensor data type is INT8, INT16 or INT32 (default = 0). + * If 0, calculations are performed in integer arithmetic. + */ + void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, + size_t total_size_in_bytes, size_t fixed_point_pos = 0); + /** Initialize the metadata structure for the given HOG's metadata + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + void init(const HOGInfo &hog_info, unsigned int width, unsigned int height); + /** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated) + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements + * @param[in] format Single plane format of the image. + * + * @return Total allocation size including padding in bytes. + */ + size_t init_auto_padding(const TensorShape &tensor_shape, Format format); + /** Initialize the metadata structure for the given tensor shape, number of channels, + * data type and fixed point position. (Padding is automatically calculated) + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_pos (Optional) It specifies the fixed point position when the tensor data type is INT8, INT16 or INT32. (Default = 0) + * If 0, calculations are performed in integer math + * + * @return Total allocation size including padding in bytes. + */ + size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, size_t fixed_point_pos = 0); + /** Initialize the metadata structure for the given HOG's metadata + * + * @note init_auto_padding will be used for the tensor initialization. + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + size_t init_auto_padding(const HOGInfo &hog_info, unsigned int width, unsigned int height); + /** Update the offset to the first element and the strides to automatically computed values. + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @return True if the strides or the offset to the first element have changed. + */ + bool auto_padding(); + /** Update the offset to the first element, the strides and the total size. + * + * @note This function can only increase the offset, strides and total size. + * + * @param[in] padding Padding around the XY plane in number of elements. + * + * @return True if the strides, offset and total size have changed. + */ + bool extend_padding(const PaddingSize &padding); + /** Set the format of an already initialized tensor. + * + * @note The passed format must be compatible with the existing number of channels and data type of the tensor. + * + * @param[in] format Single-plane format of the tensor. + */ + void set_format(Format format); + /** Return the size of the requested dimension + * + * @param[in] index Index of the dimension + * + * @return Dimension of the requested dimension + */ + size_t dimension(size_t index) const + { + return _tensor_shape[index]; + } + /** The strides in bytes for accessing each dimension of the tensor + * + * @return Strides in bytes for each tensor dimension + */ + const Strides &strides_in_bytes() const + { + return _strides_in_bytes; + } + /** The offset from the beginning of the memory allocation to the first element of the tensor. + * This can be used to access efficiently elements in a 2D tensor + * + * @return The offset in bytes to access the first element of the tensor. + */ + size_t offset_first_element_in_bytes() const + { + return _offset_first_element_in_bytes; + } + /** The offset in bytes from the beginning of the memory allocation to access the element at position (x, y, z ...) + * + * @param[in] pos Vector with the coordinates of the element to access. + * The size of this vector must be equal to the number of dimensions of the tensor + * + * @return Offset in bytes from the beginning of the memory allocation to access the element (x, y, z, ...) + */ + size_t offset_element_in_bytes(const Coordinates &pos) const; + /** Fixed point position used when the tensor data type is S8, S16 or S32. + * + * @return The fixed point position + */ + size_t fixed_point_pos() const + { + return _fixed_point_pos; + } + /** Element size in bytes calculated as data_size() * num_channels + * + * @return The size of one element in bytes + */ + size_t element_size() const + { + return data_size_from_type(_data_type) * _num_channels; + } + /** The number of dimensions of the tensor (rank) + * + * @return The number of dimensions of the tensor (rank) + */ + size_t num_dimensions() const + { + return _tensor_shape.num_dimensions(); + } + /** The number of channels for each tensor element + * + * @return The number of channels for each tensor element + */ + size_t num_channels() const + { + return _num_channels; + } + /** Size for each dimension of the tensor + * + * @return A vector with the size for each dimension of the tensor + */ + const TensorShape &tensor_shape() const + { + return _tensor_shape; + } + /** Data type used for each element of the tensor + * + * @return Tensor data type + */ + DataType data_type() const + { + return _data_type; + } + /** Colour format of the image + * + * @return Colour format of the image + */ + Format format() const + { + return _format; + } + /** Returns the total size of the tensor in bytes. + * + * @return Total size of the tensor in bytes. + */ + size_t total_size() const + { + return _total_size; + } + /** Checks if the tensor has been allocated with padding or not. + * + * @return True if padding is allocated in the tensor, otherwise false. + */ + bool has_padding() + { + return (this->total_size() != (this->tensor_shape().total_size() * this->element_size())); + } + /** Flag indicating whether the size of the tensor can be changed. + * + * @return True if the tensor size can be changed. + */ + bool is_resizable() + { + return _is_resizable; + } + /** Set the flag whether the tensor size can be changed. */ + void set_is_resizable(bool is_resizable) + { + _is_resizable = is_resizable; + } + /** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined. + * + * @return The valid region. + */ + ValidRegion valid_region() const + { + return _valid_region; + } + /** Set the valid region of the tensor. */ + void set_valid_region(ValidRegion valid_region) + { + _valid_region = std::move(valid_region); + } + +private: + /** Calculates strides, offset and total size resulting from the specified padding around the XY plane. + * + * @param[in] padding Padding around the XY plane in elements. + */ + std::tuple calculate_padding_requirements(const PaddingSize &padding); + + size_t _total_size; + size_t _fixed_point_pos; + size_t _offset_first_element_in_bytes; + Strides _strides_in_bytes; + size_t _num_channels; + TensorShape _tensor_shape; + DataType _data_type; + Format _format; + bool _is_resizable; + ValidRegion _valid_region; +}; +} +#endif /*__ARM_COMPUTE_TENSORINFO_H__ */ diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h new file mode 100644 index 0000000000..3ac629846a --- /dev/null +++ b/arm_compute/core/TensorShape.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORSHAPE_H__ +#define __ARM_COMPUTE_TENSORSHAPE_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Shape of a tensor */ +class TensorShape : public Dimensions +{ +public: +#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ + /** Constructor to initialize the tensor shape. + * + * @param[in] dims Values to initialize the dimensions. + */ + template + TensorShape(Ts... dims) + : Dimensions{ dims... } + { + // Initialize empty dimensions to 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } +#endif + /** Allow instances of this class to be copy constructed */ + TensorShape(const TensorShape &) = default; + /** Allow instances of this class to be copied */ + TensorShape &operator=(const TensorShape &) = default; + /** Allow instances of this class to be move constructed */ + TensorShape(TensorShape &&) = default; + /** Allow instances of this class to be moved */ + TensorShape &operator=(TensorShape &&) = default; + /** Default destructor */ + ~TensorShape() = default; + /** Collapses all dimensions to a single linear total size. + * + * @return The total tensor size in terms of elements. + */ + size_t total_size() const + { + const size_t size = std::accumulate(_id.begin(), _id.end(), 1, std::multiplies()); + ARM_COMPUTE_ERROR_ON(0 == size); + return size; + } + /** Collapses given dimension and above. + * + * @note Precondition: dimension < TensorShape::num_max_dimensions + * + * @param[in] dimension Size of the wanted dimension + * + * @return The linear size of the collapsed dimensions + */ + size_t total_size_upper(size_t dimension) const + { + const size_t size = std::accumulate(_id.begin() + dimension, _id.end(), 1, std::multiplies()); + ARM_COMPUTE_ERROR_ON(0 == size); + return size; + } +}; +} +#endif /*__ARM_COMPUTE_TENSORSHAPE_H__*/ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h new file mode 100644 index 0000000000..a6a74a82a5 --- /dev/null +++ b/arm_compute/core/Types.h @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TYPES_H__ +#define __ARM_COMPUTE_TYPES_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/TensorShape.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Image colour formats */ +enum class Format +{ + UNKNOWN, /** Unknown image format */ + U8, /** 1 channel, 1 U8 per channel */ + S16, /** 1 channel, 1 S16 per channel */ + U16, /** 1 channel, 1 U16 per channel */ + S32, /** 1 channel, 1 S32 per channel */ + U32, /** 1 channel, 1 U32 per channel */ + F16, /** 1 channel, 1 F16 per channel */ + F32, /** 1 channel, 1 F32 per channel */ + UV88, /** 2 channel, 1 U8 per channel */ + RGB888, /** 3 channels, 1 U8 per channel */ + RGBA8888, /** 4 channels, 1 U8 per channel */ + YUV444, /** A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */ + YUYV422, /** A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */ + NV12, /** A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */ + NV21, /** A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */ + IYUV, /** A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */ + UYVY422 /** A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */ +}; + +/** Available data types */ +enum class DataType +{ + UNKNOWN, + U8, + S8, + U16, + S16, + U32, + S32, + U64, + S64, + F16, + F32, + F64, + SIZET +}; + +/** Constant value of the border pixels when using BorderMode::CONSTANT */ +constexpr uint8_t CONSTANT_BORDER_VALUE = 199; + +/* Constant value used to indicate a half-scale pyramid */ +constexpr float SCALE_PYRAMID_HALF = 0.5f; + +/* Constant value used to indicate a ORB scaled pyramid */ +constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01; + +struct ValidRegion +{ + ValidRegion() + : anchor{}, shape{} + { + } + + ValidRegion(const ValidRegion &) = default; + ValidRegion(ValidRegion &&) = default; + ValidRegion &operator=(const ValidRegion &) = default; + ValidRegion &operator=(ValidRegion &&) = default; + ~ValidRegion() = default; + + ValidRegion(Coordinates anchor, TensorShape shape) + : anchor{ anchor }, shape{ shape } + { + } + + Coordinates anchor; + TensorShape shape; +}; + +/** Methods available to handle borders */ +enum class BorderMode +{ + UNDEFINED, /**< Borders are left undefined */ + CONSTANT, /**< Pixels outside the image are assumed to have a constant value */ + REPLICATE /**< Pixels outside the image are assumed to have the same value as the closest image pixel */ +}; + +/** Container for 2D border size */ +struct BorderSize +{ + /** Empty border, i.e. no border */ + constexpr BorderSize() + : top{ 0 }, right{ 0 }, bottom{ 0 }, left{ 0 } + { + } + + /** Border with equal size around the 2D plane */ + constexpr BorderSize(unsigned int size) + : top{ size }, right{ size }, bottom{ size }, left{ size } + { + } + + /** Border with same size for top/bottom and left/right */ + constexpr BorderSize(unsigned int top_bottom, unsigned int left_right) + : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right } + { + } + + /** Border with different sizes */ + constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left) + : top{ top }, right{ right }, bottom{ bottom }, left{ left } + { + } + + /** Check if the entire border is zero */ + constexpr bool empty() const + { + return top == 0 && right == 0 && bottom == 0 && left == 0; + } + + /** Check if the border is the same size on all sides */ + constexpr bool uniform() const + { + return top == right && top == bottom && top == left; + } + + BorderSize &operator*=(float scale) + { + top *= scale; + right *= scale; + bottom *= scale; + left *= scale; + + return *this; + } + + BorderSize operator*(float scale) + { + BorderSize size = *this; + size *= scale; + + return size; + } + + unsigned int top; + unsigned int right; + unsigned int bottom; + unsigned int left; +}; + +using PaddingSize = BorderSize; + +/** Policy to handle overflow */ +enum class ConvertPolicy +{ + WRAP, /**< Wrap around */ + SATURATE /**< Saturate */ +}; + +/** Interpolation method */ +enum class InterpolationPolicy +{ + NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */ + BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */ + AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */ +}; + +/** Bilinear Interpolation method used by LKTracker */ +enum class BilinearInterpolation +{ + BILINEAR_OLD_NEW, + BILINEAR_SCHARR +}; + +/** Threshold mode */ +enum class ThresholdType +{ + BINARY, /**< Threshold with one value */ + RANGE /**< Threshold with two values*/ +}; + +/** Rounding method */ +enum class RoundingPolicy +{ + TO_ZERO, /**< Truncates the least significand values that are lost in operations. */ + TO_NEAREST_EVEN /**< Rounds to nearest even output value */ +}; + +/** Termination criteria */ +enum class Termination +{ + TERM_CRITERIA_EPSILON, + TERM_CRITERIA_ITERATIONS, + TERM_CRITERIA_BOTH +}; + +/** Magnitude calculation type. */ +enum class MagnitudeType +{ + L1NORM, /**< L1 normalization type */ + L2NORM /**< L2 normalization type */ +}; + +/** Phase calculation type. + * + * @note When PhaseType == SIGNED, each angle is mapped to the range 0 to 255 inclusive otherwise angles between 0 and 180 + */ +enum class PhaseType +{ + SIGNED, /**< Angle range: [0, 360] */ + UNSIGNED /**< Angle range: [0, 180] */ +}; + +/** Keypoint type */ +struct KeyPoint +{ + int32_t x{ 0 }; /**< X coordinates */ + int32_t y{ 0 }; /**< Y coordinates */ + float strength{ 0.f }; /**< Strength of the point */ + float scale{ 0.f }; /**< Scale initialized to 0 by the corner detector */ + float orientation{ 0.f }; /**< Orientation initialized to 0 by the corner detector */ + int32_t tracking_status{ 0 }; /**< Status initialized to 1 by the corner detector, set to 0 when the point is lost */ + float error{ 0.f }; /**< Tracking error initialized to 0 by the corner detector */ +}; + +using InternalKeypoint = std::tuple; /* x,y,strength */ + +/** Rectangle type */ +struct Rectangle +{ + uint16_t x; /**< Top-left x coordinate */ + uint16_t y; /**< Top-left y coordinate */ + uint16_t width; /**< Width of the rectangle */ + uint16_t height; /**< Height of the rectangle */ +}; + +/** Coordinate type */ +struct Coordinates2D +{ + int32_t x; /**< X coordinates */ + int32_t y; /**< Y coordinates */ +}; + +/** Coordinate type */ +struct Coordinates3D +{ + uint32_t x; /**< X coordinates */ + uint32_t y; /**< Y coordinates */ + uint32_t z; /**< Z coordinates */ +}; + +/** Available channels */ +enum class Channel +{ + UNKNOWN, /** Unknown channel format */ + C0, /**< First channel (used by formats with unknown channel types). */ + C1, /**< Second channel (used by formats with unknown channel types). */ + C2, /**< Third channel (used by formats with unknown channel types). */ + C3, /**< Fourth channel (used by formats with unknown channel types). */ + R, /**< Red channel. */ + G, /**< Green channel. */ + B, /**< Blue channel. */ + A, /**< Alpha channel. */ + Y, /**< Luma channel. */ + U, /**< Cb/U channel. */ + V /**< Cr/V/Value channel. */ +}; + +/** Available matrix patterns */ +enum class MatrixPattern +{ + BOX, /**< Box pattern matrix. */ + CROSS, /**< Cross pattern matrix. */ + DISK, /**< Disk pattern matrix. */ + OTHER /**< Any other matrix pattern. */ +}; + +/** Available non linear functions. */ +enum class NonLinearFilterFunction : unsigned +{ + MEDIAN = 0, /**< Non linear median filter. */ + MIN = 1, /**< Non linear erode. */ + MAX = 2, /**< Non linear dilate. */ +}; + +/** The normalization type used for the normalization layer */ +enum class NormType +{ + IN_MAP, /* Normalization applied within the same map */ + CROSS_MAP /* Normalization applied cross maps */ +}; + +/** Normalization type for Histogram of Oriented Gradients (HOG) */ +enum class HOGNormType +{ + L2_NORM, /**< L2-norm */ + L2HYS_NORM, /**< L2-norm followed by clipping */ + L1_NORM, /**< L1 norm */ + L1SQRT_NORM /**< L1 norm with SQRT */ +}; + +/** Detection window used for the object detection. The detection window keeps the following information: + * + * -# Geometry of the rectangular window (x/y of top-left corner and width/height) + * -# Index of the class used for evaluating which class the detection window belongs to + * -# Confidence value (score) obtained with the classifier + */ +struct DetectionWindow +{ + uint16_t x{ 0 }; /**< Top-left x coordinate */ + uint16_t y{ 0 }; /**< Top-left y coordinate */ + uint16_t width{ 0 }; /**< Width of the detection window */ + uint16_t height{ 0 }; /**< Height of the detection window */ + uint16_t idx_class{ 0 }; /**< Index of the class */ + float score{ 0.f }; /**< Confidence value for the detection window */ +}; + +/** Dimension rounding type when down-scaling on CNNs + * @note Used in pooling and convolution layer + */ +enum class DimensionRoundingType +{ + FLOOR, /**< Floor rounding */ + CEIL /**< Ceil rounding */ +}; + +/** Available pooling types */ +enum class PoolingType +{ + MAX, /**< Max Pooling */ + AVG /**< Average Pooling */ +}; + +/** Padding and stride information class */ +class PadStrideInfo +{ +public: + /** Constructor + * + * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1. + * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1. + * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0. + * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0. + * @param[in] round (Optional) Dimensions rounding. Defaults to @ref FLOOR. + */ + PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1, + unsigned int pad_x = 0, unsigned int pad_y = 0, + DimensionRoundingType round = DimensionRoundingType::FLOOR) + : _stride(std::make_pair(stride_x, stride_y)), + _pad(std::make_pair(pad_x, pad_y)), + _round_type(round) + { + } + std::pair stride() const + { + return _stride; + } + std::pair pad() const + { + return _pad; + } + DimensionRoundingType round() const + { + return _round_type; + } + +private: + std::pair _stride; + std::pair _pad; + DimensionRoundingType _round_type; +}; + +/** Pooling Layer Information class */ +class PoolingLayerInfo +{ +public: + /** Default Constructor + * + * @param[in] pool_type Pooling type @ref PoolingType. Defaults to @ref PoolingType::MAX + * @param[in] pool_size (Optional) Pooling size, in elements, across x and y. Defaults to 2. + * @param[in] pad_stride_info (Optional) Padding and stride information @ref PadStrideInfo + */ + PoolingLayerInfo(PoolingType pool_type = PoolingType::MAX, unsigned int pool_size = 2, PadStrideInfo pad_stride_info = PadStrideInfo()) + : _pool_type(pool_type), _pool_size(pool_size), _pad_stride_info(pad_stride_info) + { + } + PoolingType pool_type() const + { + return _pool_type; + } + unsigned int pool_size() const + { + return _pool_size; + } + PadStrideInfo pad_stride_info() const + { + return _pad_stride_info; + } + +private: + PoolingType _pool_type; + unsigned int _pool_size; + PadStrideInfo _pad_stride_info; +}; + +/** Activation Layer Information class */ +class ActivationLayerInfo +{ +public: + /** Available activation functions */ + enum class ActivationFunction + { + LOGISTIC, /**< Logistic */ + TANH, /**< Hyperbolic tangent */ + RELU, /**< Rectifier */ + BOUNDED_RELU, /**< Bounded Rectifier */ + SOFT_RELU, /**< Soft Rectifier */ + ABS, /**< Absolute */ + SQUARE, /**< Square */ + SQRT, /**< Square root */ + LINEAR /**< Linear */ + }; + +public: + /** Default Constructor + * + * @param[in] f The activation function to use. + * @param[in] a (Optional) The alpha parameter used by some activation functions + * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). + * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). + */ + ActivationLayerInfo(ActivationFunction f, uint32_t a = 0, uint32_t b = 0) + : _act(f), _a(a), _b(b) + { + } + ActivationFunction activation() const + { + return _act; + } + uint32_t a() const + { + return _a; + } + uint32_t b() const + { + return _b; + } + +private: + ActivationFunction _act; + uint32_t _a; + uint32_t _b; +}; + +/** Normalization Layer Information class */ +class NormalizationLayerInfo +{ +public: + /** Default Constructor + * + * @param[in] type The normalization type. Can be @ref NormType::IN_MAP or NORM_TYPE::CROSS_MAP + * @param[in] norm_size The normalization size is the number of elements to normalize across. Defaults to 5. + * @param[in] alpha Alpha parameter used by normalization equation. Defaults to 0.0001. + * @param[in] beta Beta parameter used by normalization equation. Defaults to 0.5. + * @param[in] kappa Kappa parameter used by [Krichevksy 2012] Across Channel Local Brightness Normalization equation. + */ + NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001, float beta = 0.5, uint32_t kappa = 1.f) + : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa) + { + } + NormType type() const + { + return _type; + } + uint32_t norm_size() const + { + return _norm_size; + } + float alpha() const + { + return _alpha; + } + float beta() const + { + return _beta; + } + uint32_t kappa() const + { + return _kappa; + } + /** Return the scaling factor of the normalization function. If kappa is not 1 then [Krichevksy 2012] normalization scaling is specified. + * @return The normalization scaling factor. + */ + float scale_coeff() const + { + return (_kappa == 1.f) ? (_alpha / _norm_size) : _alpha; + } + +private: + NormType _type; + uint32_t _norm_size; + float _alpha; + float _beta; + float _kappa; +}; +} +#endif /* __ARM_COMPUTE_TYPES_H__ */ diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h new file mode 100644 index 0000000000..9378ef6edf --- /dev/null +++ b/arm_compute/core/Utils.h @@ -0,0 +1,633 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_UTILS_H__ +#define __ARM_COMPUTE_UTILS_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace arm_compute +{ +/** Computes the smallest number larger or equal to value that is a multiple of divisor. */ +template +inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return ((value + divisor - 1) / divisor) * divisor; +} + +/** Computes the largest number smaller or equal to value that is a multiple of divisor. */ +template +inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return (value / divisor) * divisor; +} + +/** Calculate the rounded up quotient of val / m. */ +template +constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m) +{ + return (val + m - 1) / m; +} + +/** Returns the arm_compute library build information + * + * Contains the version number and the build options used to build the library + * + * @return The arm_compute library build information + */ +std::string build_information(); + +/** Load an entire file in memory + * + * @param[in] filename Name of the file to read. + * @param[in] binary Is it a binary file ? + * + * @return The content of the file. + */ +std::string read_file(const std::string &filename, bool binary); + +/** Return a value as a string + * + * @param[in] val Input value. + * + * @return Value represented as a string + */ +template +const std::string val_to_string(T val) +{ + return static_cast(std::ostringstream() << val).str(); +} + +/** The size in bytes of the data type + * + * @param[in] data_type Input data type + * + * @return The size in bytes of the data type + */ +inline size_t data_size_from_type(DataType data_type) +{ + switch(data_type) + { + case DataType::U8: + case DataType::S8: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::F16: + return 2; + case DataType::F32: + case DataType::U32: + case DataType::S32: + return 4; + case DataType::F64: + case DataType::U64: + case DataType::S64: + return 8; + case DataType::SIZET: + return sizeof(size_t); + default: + ARM_COMPUTE_ERROR("Invalid data type"); + return 0; + } +} + +/** The size in bytes of the pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline size_t pixel_size_from_format(Format format) +{ + switch(format) + { + case Format::U8: + return 1; + case Format::U16: + case Format::S16: + case Format::F16: + case Format::UV88: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + case Format::U32: + case Format::S32: + case Format::F32: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Undefined pixel size for given format"); + return 0; + } +} + +/** The size in bytes of the data type + * + * @param[in] dt Input data type + * + * @return The size in bytes of the data type + */ +inline size_t element_size_from_data_type(DataType dt) +{ + switch(dt) + { + case DataType::U8: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::F16: + return 2; + case DataType::U32: + case DataType::S32: + case DataType::F32: + return 4; + default: + ARM_COMPUTE_ERROR("Undefined element size for given data type"); + return 0; + } +} + +/** Return the data type used by a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline DataType data_type_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::UV88: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return DataType::U8; + case Format::U16: + return DataType::U16; + case Format::S16: + return DataType::S16; + case Format::U32: + return DataType::U32; + case Format::S32: + return DataType::S32; + case Format::F16: + return DataType::F16; + case Format::F32: + return DataType::F32; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Not supported data_type for given format"); + return DataType::UNKNOWN; + } +} + +/** Return the plane index of a given channel given an input format. + * + * @param[in] format Input format + * @param[in] channel Input channel + * + * @return The plane index of the specific channel of the specific format + */ +inline int plane_idx_from_channel(Format format, Channel channel) +{ + switch(format) + { + case Format::NV12: + case Format::NV21: + { + switch(channel) + { + case Channel::Y: + return 0; + case Channel::U: + case Channel::V: + return 1; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::IYUV: + case Format::YUV444: + { + switch(channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of planes for a given format + * + * @param[in] format Input format + * + * @return The number of planes for a given image format. + */ +inline size_t num_planes_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::U32: + case Format::F16: + case Format::F32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return 1; + case Format::NV12: + case Format::NV21: + return 2; + case Format::IYUV: + case Format::YUV444: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of channels for a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The number of channels for a given image format. + */ +inline size_t num_channels_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::U16: + case Format::S16: + case Format::U32: + case Format::S32: + case Format::F16: + case Format::F32: + return 1; + // Because the U and V channels are subsampled + // these formats appear like having only 2 channels: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::UV88: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + return 0; + } +} + +/** Separate a 2D convolution into two 1D convolutions +* +* @param[in] conv 2D convolution +* @param[out] conv_col 1D vertical convolution +* @param[out] conv_row 1D horizontal convolution +* @param[in] size Size of the 2D convolution +* +* @return true if the separation was successful +*/ +inline bool separate_matrix(const int16_t *conv, int16_t *conv_col, int16_t *conv_row, uint8_t size) +{ + int32_t min_col = -1; + int16_t min_col_val = -1; + + for(int32_t i = 0; i < size; ++i) + { + if(conv[i] != 0 && (min_col < 0 || abs(min_col_val) > abs(conv[i]))) + { + min_col = i; + min_col_val = conv[i]; + } + } + + if(min_col < 0) + { + return false; + } + + for(uint32_t j = 0; j < size; ++j) + { + conv_col[j] = conv[min_col + j * size]; + } + + for(uint32_t i = 0; i < size; i++) + { + if(static_cast(i) == min_col) + { + conv_row[i] = 1; + } + else + { + int16_t coeff = conv[i] / conv[min_col]; + + for(uint32_t j = 1; j < size; ++j) + { + if(conv[i + j * size] != (conv_col[j] * coeff)) + { + return false; + } + } + + conv_row[i] = coeff; + } + } + + return true; +} + +/** Calculate the scale of the given square matrix + * + * The scale is the absolute value of the sum of all the coefficients in the matrix. + * + * @note If the coefficients add up to 0 then the scale is set to 1. + * + * @param[in] matrix Matrix coefficients + * @param[in] matrix_size Number of elements per side of the square matrix. (Number of coefficients = matrix_size * matrix_size). + * + * @return The absolute value of the sum of the coefficients if they don't add up to 0, otherwise 1. + */ +inline uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matrix_size) +{ + const size_t size = matrix_size * matrix_size; + + return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0))); +} + +/** Calculate accurary required by the horizontal and vertical convolution computations + * + * @param[in] conv_col Pointer to the vertical vector of the separated convolution filter + * @param[in] conv_row Pointer to the horizontal vector of the convolution filter + * @param[in] size Number of elements per vector of the separated matrix + * + * @return The return type is a pair. The first element of the pair is the biggest data type needed for the first stage. The second + * element of the pair is the biggest data type needed for the second stage. + */ +inline std::pair data_type_for_convolution(const int16_t *conv_col, const int16_t *conv_row, size_t size) +{ + DataType first_stage = DataType::UNKNOWN; + DataType second_stage = DataType::UNKNOWN; + + auto gez = [](const int16_t &v) + { + return v >= 0; + }; + + auto accu_neg = [](const int &first, const int &second) + { + return first + (second < 0 ? second : 0); + }; + + auto accu_pos = [](const int &first, const int &second) + { + return first + (second > 0 ? second : 0); + }; + + const bool only_positive_coefficients = std::all_of(conv_row, conv_row + size, gez) && std::all_of(conv_col, conv_col + size, gez); + + if(only_positive_coefficients) + { + const int max_row_value = std::accumulate(conv_row, conv_row + size, 0) * UINT8_MAX; + const int max_value = std::accumulate(conv_col, conv_col + size, 0) * max_row_value; + + first_stage = (max_row_value <= UINT16_MAX) ? DataType::U16 : DataType::S32; + + second_stage = (max_value <= UINT16_MAX) ? DataType::U16 : DataType::S32; + } + else + { + const int min_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_neg) * UINT8_MAX; + const int max_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_pos) * UINT8_MAX; + const int neg_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_neg); + const int pos_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_pos); + const int min_value = neg_coeffs_sum * max_row_value + pos_coeffs_sum * min_row_value; + const int max_value = neg_coeffs_sum * min_row_value + pos_coeffs_sum * max_row_value; + + first_stage = ((INT16_MIN <= min_row_value) && (max_row_value <= INT16_MAX)) ? DataType::S16 : DataType::S32; + + second_stage = ((INT16_MIN <= min_value) && (max_value <= INT16_MAX)) ? DataType::S16 : DataType::S32; + } + + return std::make_pair(first_stage, second_stage); +} + +/** Calculate the accuracy required by the squared convolution calculation. + * + * + * @param[in] conv Pointer to the squared convolution matrix + * @param[in] size The total size of the convolution matrix + * + * @return The return is the biggest data type needed to do the convolution + */ +inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t size) +{ + auto gez = [](const int16_t v) + { + return v >= 0; + }; + + const bool only_positive_coefficients = std::all_of(conv, conv + size, gez); + + if(only_positive_coefficients) + { + const int max_conv_value = std::accumulate(conv, conv + size, 0) * UINT8_MAX; + if(max_conv_value <= UINT16_MAX) + { + return DataType::U16; + } + else + { + return DataType::S32; + } + } + else + { + const int min_value = std::accumulate(conv, conv + size, 0, [](int a, int b) + { + return b < 0 ? a + b : a; + }) + * UINT8_MAX; + + const int max_value = std::accumulate(conv, conv + size, 0, [](int a, int b) + { + return b > 0 ? a + b : a; + }) + * UINT8_MAX; + + if((INT16_MIN <= min_value) && (INT16_MAX >= max_value)) + { + return DataType::S16; + } + else + { + return DataType::S32; + } + } +} + +/** Returns expected width and height of output scaled tensor depending on dimensions rounding mode. + * + * @param width Width of input tensor (Number of columns) + * @param height Height of input tensor (Number of rows) + * @param kernel_size Kernel size. + * @param stride_x Stride of the operation in the x dimension. + * @param stride_y Stride of the operation in the y dimension. + * @param pad_x Padding size in the x dimension. + * @param pad_y Padding size in the y dimension. + * @param round_type Dimensions rounding mode. + * + * @return A pair with the new width in the first position and the new height in the second. + */ +const std::pair scaled_dimensions(unsigned int width, unsigned int height, unsigned int kernel_size, + unsigned int stride_x, unsigned int stride_y, + unsigned int pad_x, unsigned int pad_y, + DimensionRoundingType round_type); + +/** Convert a tensor format into a string. + * + * @param[in] format @ref Format to be translated to string. + * + * @return The string describing the format. + */ +const std::string &string_from_format(Format format); + +/** Convert a channel identity into a string. + * + * @param[in] channel @ref Channel to be translated to string. + * + * @return The string describing the channel. + */ +const std::string &string_from_channel(Channel channel); + +/** Convert a data type identity into a string. + * + * @param[in] dt @ref DataType to be translated to string. + * + * @return The string describing the data type. + */ +const std::string &string_from_data_type(DataType dt); +/** Convert a matrix pattern into a string. + * + * @param[in] pattern @ref MatrixPattern to be translated to string. + * + * @return The string describing the matrix pattern. + */ +const std::string &string_from_matrix_pattern(MatrixPattern pattern); +/** Translates a given activation function to a string. + * + * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string. + * + * @return The string describing the activation function. + */ +const std::string &string_from_activation_func(ActivationLayerInfo::ActivationFunction act); +/** Translates a given non linear function to a string. + * + * @param[in] function @ref NonLinearFilterFunction to be translated to string. + * + * @return The string describing the non linear function. + */ +const std::string &string_from_non_linear_filter_function(NonLinearFilterFunction function); +/** Translates a given interpolation policy to a string. + * + * @param[in] policy @ref InterpolationPolicy to be translated to string. + * + * @return The string describing the interpolation policy. + */ +const std::string &string_from_interpolation_policy(InterpolationPolicy policy); +/** Translates a given border mode policy to a string. + * + * @param[in] border_mode @ref BorderMode to be translated to string. + * + * @return The string describing the border mode. + */ +const std::string &string_from_border_mode(BorderMode border_mode); +/** Lower a given string. + * + * @param val Given string to lower. + * + * @return The lowered string + */ +std::string lower_string(std::string val); + +inline bool is_data_type_float(DataType dt) +{ + switch(dt) + { + case DataType::F16: + case DataType::F32: + return true; + default: + return false; + } +} +} +#endif /*__ARM_COMPUTE_UTILS_H__ */ diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h new file mode 100644 index 0000000000..a07d9d99aa --- /dev/null +++ b/arm_compute/core/Validate.h @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_VALIDATE_H__ +#define __ARM_COMPUTE_VALIDATE_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/IKernel.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Window.h" + +#include + +namespace arm_compute +{ +/** Throw an error if the passed window is invalid. + * + * The subwindow is invalid if: + * - It is not a valid window. + * - Its dimensions don't match the full window's ones + * - The step for each of its dimension is not identical to the corresponding one of the full window. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] full Full size window + * @param[in] win Window to validate. + */ +void error_on_mismatching_windows(const char *function, const char *file, const int line, + const Window &full, const Window &win); +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) ::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w) + +/** Throw an error if the passed subwindow is invalid. + * + * The subwindow is invalid if: + * - It is not a valid window. + * - It is not fully contained inside the full window + * - The step for each of its dimension is not identical to the corresponding one of the full window. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] full Full size window + * @param[in] sub Sub-window to validate. + */ +void error_on_invalid_subwindow(const char *function, const char *file, const int line, + const Window &full, const Window &sub); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) ::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s) + +/** Throw an error if the passed coordinates have too many dimensions. + * + * The coordinates have too many dimensions if any of the dimensions greater or equal to max_dim is different from 0. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] pos Coordinates to validate + * @param[in] max_dim Maximum number of dimensions allowed. + */ +void error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line, + const Coordinates &pos, unsigned int max_dim); +#define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) ::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md) + +/** Throw an error if the passed window has too many dimensions. + * + * The window has too many dimensions if any of the dimension greater or equal to max_dim is different from 0. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] win Window to validate + * @param[in] max_dim Maximum number of dimensions allowed. + */ +void error_on_window_dimensions_gte(const char *function, const char *file, const int line, + const Window &win, unsigned int max_dim); +#define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) ::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md) + +/* Check whether two tensors have different shapes. + * + * @param[in] tensor_1 First tensor to be compared + * @param[in] tensor_2 Second tensor to be compared + * + * @return Return true if the two tensors have different shapes + */ +inline bool have_different_shapes(const ITensor *tensor_1, const ITensor *tensor_2) +{ + for(size_t i = 0; i < arm_compute::Coordinates::num_max_dimensions; ++i) + { + if(tensor_1->info()->dimension(i) != tensor_2->info()->dimension(i)) + { + return true; + } + } + + return false; +} + +/** Throw an error if the passed two tensors have different shapes + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_shapes(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + ARM_COMPUTE_UNUSED(tensor_1); + ARM_COMPUTE_UNUSED(tensor_2); + + const std::array tensors_array{ { std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(have_different_shapes(tensor_1, tensor_2) || std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return have_different_shapes(tensor_1, tensor); + }), + function, file, line, "Tensors have different shapes"); +} +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) ::arm_compute::error_on_mismatching_shapes(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed two tensors have different data types + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_data_types(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + ARM_COMPUTE_UNUSED(tensor_1); + ARM_COMPUTE_UNUSED(tensor_2); + + DataType &&first_data_type = tensor_1->info()->data_type(); + ARM_COMPUTE_UNUSED(first_data_type); + + const std::array tensors_array{ { std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_2->info()->data_type() != first_data_type || std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->data_type() != first_data_type; + }), + function, file, line, "Tensors have different data types"); +} + +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] object Tensor/multi-image to validate. + * @param[in] format First format allowed. + * @param[in] formats (Optional) Further allowed formats. + */ +template +void error_on_format_not_in(const char *function, const char *file, const int line, + const T *object, F &&format, Fs &&... formats) +{ + ARM_COMPUTE_ERROR_ON_LOC(object == nullptr, function, file, line); + + Format &&object_format = object->info()->format(); + ARM_COMPUTE_UNUSED(object_format); + + ARM_COMPUTE_ERROR_ON_LOC(object_format == Format::UNKNOWN, function, file, line); + + const std::array formats_array{ { std::forward(formats)... } }; + ARM_COMPUTE_UNUSED(formats_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(object_format != format && std::none_of(formats_array.begin(), formats_array.end(), [&](const F & f) + { + return f == object_format; + }), + function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str()); +} +#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) + +/** Throw an error if the data type of the passed tensor does not match any of the data types provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * @param[in] dt First data type allowed. + * @param[in] dts (Optional) Further allowed data types. + */ +template +void error_on_data_type_not_in(const char *function, const char *file, const int line, + const ITensor *tensor, T &&dt, Ts &&... dts) +{ + ARM_COMPUTE_ERROR_ON_LOC(tensor == nullptr, function, file, line); + + DataType &&tensor_dt = tensor->info()->data_type(); + ARM_COMPUTE_UNUSED(tensor_dt); + + ARM_COMPUTE_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line); + + const std::array dts_array{ { std::forward(dts)... } }; + ARM_COMPUTE_UNUSED(dts_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T & d) + { + return d == tensor_dt; + }), + function, file, line, "ITensor data type %s not supported by this kernel", string_from_data_type(tensor_dt).c_str()); +} +#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(d, ...) ::arm_compute::error_on_data_type_not_in(__func__, __FILE__, __LINE__, d, __VA_ARGS__) + +/** Throw an error if the data type or the number of channels of the passed tensor does not match any of the data types and number of channels provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * @param[in] num_channels Number of channels to check + * @param[in] dt First data type allowed. + * @param[in] dts (Optional) Further allowed data types. + */ +template +void error_on_data_type_channel_not_in(const char *function, const char *file, const int line, + const ITensor *tensor, size_t num_channels, T &&dt, Ts &&... dts) +{ + error_on_data_type_not_in(function, file, line, tensor, std::forward(dt), std::forward(dts)...); + + const size_t tensor_nc = tensor->info()->num_channels(); + ARM_COMPUTE_UNUSED(tensor_nc); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_nc != num_channels, function, file, line, "Number of channels %d. Required number of channels %d", tensor_nc, num_channels); +} +#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(d, c, ...) ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, d, c, __VA_ARGS__) + +/** Throw an error if the tensor is not 2D. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + */ +void error_on_tensor_not_2d(const char *function, const char *file, const int line, + const ITensor *tensor); +#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) ::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t) + +/** Throw an error if the channel is not in channels. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] cn Input channel + * @param[in] channel First channel allowed. + * @param[in] channels (Optional) Further allowed channels. + */ +template +void error_on_channel_not_in(const char *function, const char *file, const int line, + T cn, T &&channel, Ts &&... channels) +{ + ARM_COMPUTE_ERROR_ON_LOC(cn == Channel::UNKNOWN, function, file, line); + + const std::array channels_array{ { std::forward(channels)... } }; + ARM_COMPUTE_UNUSED(channels_array); + ARM_COMPUTE_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), [&](const T & f) + { + return f == cn; + }), + function, file, line); +} +#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN(c, ...) ::arm_compute::error_on_channel_not_in(__func__, __FILE__, __LINE__, c, __VA_ARGS__) + +/** Throw an error if the channel is not in format. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] fmt Input channel + * @param[in] cn First channel allowed. + */ +void error_on_channel_not_in_known_format(const char *function, const char *file, const int line, + Format fmt, Channel cn); +#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) ::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c) + +/** Throw an error if the @ref IMultiHOG container is invalid + * + * An @ref IMultiHOG container is invalid if: + * + * -# it is a nullptr + * -# it doesn't contain models + * -# it doesn't have the HOG data objects with the same phase_type, normalization_type and l2_hyst_threshold (if normalization_type == L2HYS_NORM) + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] multi_hog IMultiHOG container to validate + */ +void error_on_invalid_multi_hog(const char *function, const char *file, const int line, + const IMultiHOG *multi_hog); +#define ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(m) ::arm_compute::error_on_invalid_multi_hog(__func__, __FILE__, __LINE__, m) + +/** Throw an error if the kernel is not configured. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] kernel Kernel to validate. + */ +void error_on_unconfigured_kernel(const char *function, const char *file, const int line, + const IKernel *kernel); +#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) ::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k) +} +#endif /* __ARM_COMPUTE_VALIDATE_H__*/ diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h new file mode 100644 index 0000000000..727a4890ba --- /dev/null +++ b/arm_compute/core/Window.h @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_WINDOW_H__ +#define __ARM_COMPUTE_WINDOW_H__ + +#include +#include +#include + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" + +namespace arm_compute +{ +/** Describe a multidimensional execution window. */ +class Window +{ +public: + /** Alias for dimension 0 also known as X dimension */ + static constexpr size_t DimX = 0; + /** Alias for dimension 1 also known as Y dimension */ + static constexpr size_t DimY = 1; + /** Alias for dimension 2 also known as Z dimension */ + static constexpr size_t DimZ = 2; + + /** Default constructor: create a window containing a single element. */ + constexpr Window() + : _dims(), _thread_id(0), _num_threads(1) + { + } + /** Copy constructor + * + * @param[in] src Copy the values from src to a new object + */ + Window(const Window &src); + + /** Describe one of the image's dimensions with a start, end and step. + * + * Iteration through the elements of the dimension is done like this: + * for(int v = start(); v < end(); v += step()) + * { + * ... + * } + */ + class Dimension + { + public: + /** Constructor, by default creates a dimension of 1. + * + * @param[in] start Start of the dimension + * @param[in] end End of the dimension + * @param[in] step Step between two elements of the dimension when iterating. + * + */ + constexpr Dimension(int start = 0, int end = 1, int step = 1) + : _start(start), _end(end), _step(step) + { + } + /** Default assignment operator to allow dimensions to be copied */ + Dimension &operator=(const Dimension &d) = default; + /** Return the start of the dimension */ + constexpr int start() const + { + return _start; + } + /** Return the end of the dimension */ + constexpr int end() const + { + return _end; + } + /** Return the step of the dimension */ + constexpr int step() const + { + return _step; + } + /** Set the dimension's step + * + * @param[in] step The new step + */ + void set_step(int step) + { + _step = step; + } + + private: + int _start; /**< Start of the dimension */ + int _end; /**< End of the dimension */ + int _step; + }; + + /** Read only access to a given dimension of the window + * + * @note Precondition: dimension < Coordinates::num_max_dimensions + * + * @param[in] dimension The dimension to access + * + * @return The requested dimension + */ + constexpr const Dimension &operator[](size_t dimension) const; + + /** Alias to access the first dimension of the window + * + * @return First dimension of the window + */ + constexpr const Dimension &x() const + { + return _dims.at(Window::DimX); + } + + /** Alias to access the second dimension of the window + * + * @return Second dimension of the window + */ + constexpr const Dimension &y() const + { + return _dims.at(Window::DimY); + } + + /** Alias to access the third dimension of the window + * + * @return Third dimension of the window + */ + constexpr const Dimension &z() const + { + return _dims.at(Window::DimZ); + } + + /** Set the values of a given dimension + * + * @param[in] dimension The dimension to set + * @param[in] dim The values to set the dimension to + */ + void set(size_t dimension, const Dimension &dim); + + /** Use the tensor's dimensions to fill the window dimensions. + * + * @param[in] info Tensor information to copy the dimensions from. + * @param[in] first_dimension Only copy dimensions which are greater or equal to this value. + */ + void use_tensor_dimensions(const TensorInfo *info, size_t first_dimension = Window::DimX); + + /** Shift the values of a given dimension by the given shift_value + * + * @param[in] dimension The dimension to shift + * @param[in] shift_value Value to shift the start and end values of. + */ + void shift(size_t dimension, int shift_value); + + /** Scale the values of a given dimension by the given scale_value + * + * @note The end of the window is rounded up to be a multiple of step after the scaling. + * + * @param[in] dimension The dimension to scale + * @param[in] scale_value Value to scale the start, end and step values of. + */ + void scale(size_t dimension, float scale_value); + + /** Set the step of a given dimension. + * + * @param[in] dimension Dimension to update + * @param[in] step The new dimension's step value + */ + void set_dimension_step(size_t dimension, int step); + + /** Will validate all the window's dimensions' values when asserts are enabled + * + * No-op when asserts are disabled + */ + void validate() const; + + /** Return the number of iterations needed to iterate through a given dimension + * + * @param[in] dimension The requested dimension + * + * @return The number of iterations + */ + constexpr size_t num_iterations(size_t dimension) const; + + /** Split a window into a set of sub windows along a given dimension + * + * For example to split a window into 3 sub-windows along the Y axis, you would have to do:
+ * Window sub0 = window.split_window( 1, 0, 3);
+ * Window sub1 = window.split_window( 1, 1, 3);
+ * Window sub2 = window.split_window( 1, 2, 3);
+ * + * @param[in] dimension Dimension along which the split will be performed + * @param[in] id Id of the sub-window to return. Must be in the range (0, total-1) + * @param[in] total Total number of sub-windows the window will be split into. + * + * @return The subwindow "id" out of "total" + */ + Window split_window(size_t dimension, size_t id, size_t total) const; + /** First 1D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_1D() const + { + return first_slice_window<1>(); + }; + /** First 2D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_2D() const + { + return first_slice_window<2>(); + }; + /** First 3D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_3D() const + { + return first_slice_window<3>(); + }; + /** Slide the passed 1D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_1D(Window &slice) const + { + return slide_window_slice<1>(slice); + } + /** Slide the passed 2D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_2D(Window &slice) const + { + return slide_window_slice<2>(slice); + } + /** Slide the passed 3D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_3D(Window &slice) const + { + return slide_window_slice<3>(slice); + } + /** Sets the ID of the thread that the window is associated with. + * + * @param id ID of the thread that the window is associated with. + */ + void set_thread_id(unsigned int id) + { + _thread_id = id; + } + /** Sets the number of threads dispatched that the window is associated with. + * + * @param num_threads The number of threads dispatched that the window is associated with. + */ + void set_num_threads(unsigned int num_threads) + { + _num_threads = num_threads; + } + /** Get the ID of the thread that the window is associated with. + * + * @return ID of the thread that the window is associated with. + */ + constexpr unsigned int thread_id() const + { + return _thread_id; + } + /** Get the number of threads dispatched that the window is associated with. + * + * @return The number of threads dispatched that the window is associated with. + */ + constexpr unsigned int num_threads() const + { + return _num_threads; + } + +private: + /** First slice of the window + * + * @return The first slice of the window. + */ + template + Window first_slice_window() const; + + /** Slide the passed window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + template + bool slide_window_slice(Window &slice) const; + +private: + std::array _dims; + unsigned int _thread_id; + unsigned int _num_threads; +}; +} +#include "Window.inl" +#endif /*__ARM_COMPUTE_WINDOW_H__ */ diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl new file mode 100644 index 0000000000..71bcaa3c9a --- /dev/null +++ b/arm_compute/core/Window.inl @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +namespace arm_compute +{ +inline Window::Window(const Window &src) + : _dims(), _thread_id(src._thread_id), _num_threads(src._num_threads) +{ + for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + { + set(i, src[i]); + } +} + +inline constexpr const Window::Dimension &Window::operator[](const size_t dimension) const +{ + // Precondition: dimension < Coordinates::num_max_dimensions + return _dims.at(dimension); +} +inline void Window::set(const size_t dimension, const Window::Dimension &dim) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + _dims[dimension] = dim; +} + +inline void Window::shift(const size_t dimension, const int shift_value) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + d = Window::Dimension(d.start() + shift_value, d.end() + shift_value, d.step()); +} + +inline void Window::scale(const size_t dimension, float scale_value) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + const int scaled_step = d.step() * scale_value; + const int scaled_end = ceil_to_multiple(d.end() * scale_value, scaled_step); + d = Window::Dimension(d.start() * scale_value, scaled_end, scaled_step); +} + +inline void Window::set_dimension_step(const size_t dimension, const int step) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + _dims[dimension].set_step(step); +} + +inline void Window::validate() const +{ + for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_ERROR_ON(_dims[i].step() == 0); + ARM_COMPUTE_ERROR_ON(_dims[i].end() <= _dims[i].start()); + ARM_COMPUTE_ERROR_ON((_dims[i].end() - _dims[i].start()) % _dims[i].step()); + } +} + +inline constexpr size_t Window::num_iterations(size_t dimension) const +{ + // Precondition: dimension < Coordinates::num_max_dimensions + // Precondition: (end - start) % step == 0 + return (_dims.at(dimension).end() - _dims.at(dimension).start()) / _dims.at(dimension).step(); +} + +inline Window Window::split_window(const size_t dimension, const size_t id, const size_t total) const +{ + ARM_COMPUTE_ERROR_ON(id >= total); + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + + Window out; + + for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) + { + if(d == dimension) + { + int start = _dims[d].start(); + int end = _dims[d].end(); + int per_sub_window = (num_iterations(d) / total) * _dims[d].step(); + + start += id * per_sub_window; + + if(id != total - 1) + { + end = start + per_sub_window; + } + + out.set(d, Dimension(start, end, _dims[d].step())); + } + else + { + out.set(d, _dims[d]); + } + } + + return out; +} + +template +inline bool Window::slide_window_slice(Window &slice) const +{ + for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + { + // Did we reach the end of this dimension? + const int v = slice._dims[n].start() + 1; + + if(v < _dims[n].end()) + { + // No: increment + slice._dims[n] = Dimension(v, v + 1, 1); + + // Reset lower dimensions: + for(unsigned int lower = window_dimension; lower < n; ++lower) + { + slice._dims[lower] = Dimension(_dims[lower].start(), _dims[lower].start() + 1, 1); + } + return true; + } + } + + // It was the last slice + return false; // Iteration over +} + +template +inline Window Window::first_slice_window() const +{ + Window slice; + + std::copy_n(_dims.begin(), window_dimension, slice._dims.begin()); + + //Initialise higher dimensions to be the first slice. + for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + { + slice._dims[n] = Dimension(_dims[n].start(), _dims[n].start() + 1, 1); + } + + return slice; +} + +inline void Window::use_tensor_dimensions(const TensorInfo *info, const size_t first_dimension) +{ + for(unsigned int n = first_dimension; n < info->num_dimensions(); ++n) + { + set(n, Window::Dimension(0, std::max(info->dimension(n), static_cast(1)))); + } +} +} diff --git a/arm_compute/runtime/Array.h b/arm_compute/runtime/Array.h new file mode 100644 index 0000000000..c8a240e428 --- /dev/null +++ b/arm_compute/runtime/Array.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ARRAY_H__ +#define __ARM_COMPUTE_ARRAY_H__ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of the IArray interface which allocates a static number of T values */ +template +class Array : public IArray +{ +public: + /** Default constructor: empty array */ + Array() + : IArray(0), _values(nullptr) + { + } + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + Array(size_t max_num_values) + : IArray(max_num_values), _values(arm_compute::cpp14::make_unique(max_num_values)) + { + } + + // Inherited methods overridden: + T *buffer() const override + { + return _values.get(); + } + +private: + std::unique_ptr _values; +}; + +using KeyPointArray = Array; +using Coordinates2DArray = Array; +using DetectionWindowArray = Array; +using Size2DArray = Array; +using UInt8Array = Array; +using UInt16Array = Array; +using UInt32Array = Array; +using Int16Array = Array; +using Int32Array = Array; +using FloatArray = Array; +} +#endif /* __ARM_COMPUTE_ARRAY_H__ */ diff --git a/arm_compute/runtime/CL/CLArray.h b/arm_compute/runtime/CL/CLArray.h new file mode 100644 index 0000000000..f4c2ef06d9 --- /dev/null +++ b/arm_compute/runtime/CL/CLArray.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARRAY_H__ +#define __ARM_COMPUTE_CLARRAY_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +namespace arm_compute +{ +/** CLArray implementation */ +template +class CLArray : public ICLArray +{ +public: + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArray(const CLArray &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLArray &operator=(const CLArray &) = delete; + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + CLArray(size_t max_num_values) + : ICLArray(max_num_values), _buffer(cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, max_num_values * sizeof(T))) + { + } + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true) + { + ICLArray::map(CLScheduler::get().queue(), blocking); + } + using ICLArray::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap() + { + ICLArray::unmap(CLScheduler::get().queue()); + } + using ICLArray::unmap; + + // Inherited methods overridden: + const cl::Buffer &cl_buffer() const override + { + return _buffer; + } + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override + { + ARM_COMPUTE_ERROR_ON(nullptr == _buffer.get()); + return static_cast(q.enqueueMapBuffer(_buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, this->max_num_values() * sizeof(T))); + } + void do_unmap(cl::CommandQueue &q, uint8_t *mapping) override + { + ARM_COMPUTE_ERROR_ON(nullptr == _buffer.get()); + q.enqueueUnmapMemObject(_buffer, mapping); + } + +private: + cl::Buffer _buffer; +}; + +using CLKeyPointArray = CLArray; +using CLCoordinates2DArray = CLArray; +using CLDetectionWindowArray = CLArray; +using CLSize2DArray = CLArray; +using CLUInt8Array = CLArray; +using CLUInt16Array = CLArray; +using CLUInt32Array = CLArray; +using CLInt16Array = CLArray; +using CLInt32Array = CLArray; +using CLFloatArray = CLArray; +} +#endif /* __ARM_COMPUTE_CLARRAY_H__ */ diff --git a/arm_compute/runtime/CL/CLDistribution1D.h b/arm_compute/runtime/CL/CLDistribution1D.h new file mode 100644 index 0000000000..55dd1247ed --- /dev/null +++ b/arm_compute/runtime/CL/CLDistribution1D.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_CLDISTRIBUTION1D_H__ + +#include "arm_compute/core/CL/ICLDistribution1D.h" +#include "arm_compute/core/CL/OpenCL.h" + +#include +#include + +namespace arm_compute +{ +/** CLDistribution1D object class */ +class CLDistribution1D : public ICLDistribution1D +{ +public: + /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + CLDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDistribution1D(const CLDistribution1D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDistribution1D &operator=(const CLDistribution1D &) = delete; + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLDistribution1D::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLDistribution1D::unmap; + + // Inherited methods overridden: + cl::Buffer &cl_buffer() override; + +protected: + // Inherited methods overridden: + uint32_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + cl::Buffer _mem; +}; +} +#endif /* __ARM_COMPUTE_CLDISTRIBUTION1D_H__ */ diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h new file mode 100644 index 0000000000..fa501728d5 --- /dev/null +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFUNCTIONS_H__ +#define __ARM_COMPUTE_CLFUNCTIONS_H__ + +/* Header regrouping all the CL functions */ + +#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h" +#include "arm_compute/runtime/CL/functions/CLAccumulate.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseXor.h" +#include "arm_compute/runtime/CL/functions/CLBox3x3.h" +#include "arm_compute/runtime/CL/functions/CLCannyEdge.h" +#include "arm_compute/runtime/CL/functions/CLChannelCombine.h" +#include "arm_compute/runtime/CL/functions/CLChannelExtract.h" +#include "arm_compute/runtime/CL/functions/CLColorConvert.h" +#include "arm_compute/runtime/CL/functions/CLConvolution.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLDerivative.h" +#include "arm_compute/runtime/CL/functions/CLDilate.h" +#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h" +#include "arm_compute/runtime/CL/functions/CLErode.h" +#include "arm_compute/runtime/CL/functions/CLFastCorners.h" +#include "arm_compute/runtime/CL/functions/CLFillBorder.h" +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" +#include "arm_compute/runtime/CL/functions/CLGEMMLowp.h" +#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" +#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h" +#include "arm_compute/runtime/CL/functions/CLHistogram.h" +#include "arm_compute/runtime/CL/functions/CLIntegralImage.h" +#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h" +#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h" +#include "arm_compute/runtime/CL/functions/CLMagnitude.h" +#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" +#include "arm_compute/runtime/CL/functions/CLMedian3x3.h" +#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h" +#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h" +#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" +#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" +#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h" +#include "arm_compute/runtime/CL/functions/CLPhase.h" +#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" +#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" +#include "arm_compute/runtime/CL/functions/CLRemap.h" +#include "arm_compute/runtime/CL/functions/CLScale.h" +#include "arm_compute/runtime/CL/functions/CLScharr3x3.h" +#include "arm_compute/runtime/CL/functions/CLSobel3x3.h" +#include "arm_compute/runtime/CL/functions/CLSobel5x5.h" +#include "arm_compute/runtime/CL/functions/CLSobel7x7.h" +#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" +#include "arm_compute/runtime/CL/functions/CLTableLookup.h" +#include "arm_compute/runtime/CL/functions/CLThreshold.h" +#include "arm_compute/runtime/CL/functions/CLTranspose.h" +#include "arm_compute/runtime/CL/functions/CLWarpAffine.h" +#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h" + +#endif /* __ARM_COMPUTE_CLFUNCTIONS_H__ */ diff --git a/arm_compute/runtime/CL/CLLut.h b/arm_compute/runtime/CL/CLLut.h new file mode 100644 index 0000000000..9bac2b44c3 --- /dev/null +++ b/arm_compute/runtime/CL/CLLut.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLUT_H__ +#define __ARM_COMPUTE_CLLUT_H__ + +#include "arm_compute/core/CL/ICLLut.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLLutAllocator.h" + +#include +#include + +namespace arm_compute +{ +class ILutAllocator; + +/** Basic implementation of the OpenCL lut interface */ +class CLLut : public ICLLut +{ +public: + /** Constructor */ + CLLut(); + /** Constructor: initializes a LUT which can contain num_values values of data_type type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + CLLut(size_t num_elements, DataType data_type); + /** Return a pointer to the lut's allocator + * + * @return A pointer to the lut's allocator + */ + ILutAllocator *allocator(); + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLLut::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLLut::unmap; + + // Inherited methods overridden: + size_t num_elements() const override; + uint32_t index_offset() const override; + size_t size_in_bytes() const override; + DataType type() const override; + const cl::Buffer &cl_buffer() const override; + void clear() override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + CLLutAllocator _allocator; /**< Instance of the OpenCL lut allocator */ +}; +} +#endif /*__ARM_COMPUTE_CLLUT_H__ */ diff --git a/arm_compute/runtime/CL/CLLutAllocator.h b/arm_compute/runtime/CL/CLLutAllocator.h new file mode 100644 index 0000000000..4648ffb51f --- /dev/null +++ b/arm_compute/runtime/CL/CLLutAllocator.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLUTALLOCATOR_H__ +#define __ARM_COMPUTE_CLLUTALLOCATOR_H__ + +#include "arm_compute/runtime/ILutAllocator.h" + +#include "arm_compute/core/CL/OpenCL.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of a CL memory LUT allocator. */ +class CLLutAllocator : public ILutAllocator +{ +public: + /** Default constructor. */ + CLLutAllocator(); + /** Default destructor. */ + ~CLLutAllocator() = default; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLLutAllocator(const CLLutAllocator &) = delete; + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + const CLLutAllocator &operator=(const CLLutAllocator &) = delete; + /** Interface to be implemented by the child class to return the pointer to the mapped data. */ + uint8_t *data(); + /** Interface to be implemented by the child class to return the pointer to the CL data. */ + const cl::Buffer &cl_data() const; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + uint8_t *map(cl::CommandQueue &q, bool blocking); + /** Enqueue an unmap operation of the allocated buffer on the given queue. + * + * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping The cpu mapping to unmap. + */ + void unmap(cl::CommandQueue &q, uint8_t *mapping); + +protected: + /** Allocate num_elements() * sizeof(type()) of OpenCL memory. */ + void allocate() override; + /** Call map() on the OpenCL buffer. + * + * @return A pointer to the beginning of the LUT's allocation. + */ + uint8_t *lock() override; + /** Call unmap() on the OpenCL buffer. */ + void unlock() override; + +private: + cl::Buffer _buffer; /**< OpenCL buffer containing the LUT data. */ + uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ +}; +} + +#endif /* __ARM_COMPUTE_CLLUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/CLMultiImage.h b/arm_compute/runtime/CL/CLMultiImage.h new file mode 100644 index 0000000000..f70929db07 --- /dev/null +++ b/arm_compute/runtime/CL/CLMultiImage.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMULTIIMAGE_H__ +#define __ARM_COMPUTE_CLMULTIIMAGE_H__ + +#include "arm_compute/core/CL/ICLMultiImage.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic implementation of the CL multi-planar image interface */ +class CLMultiImage : public ICLMultiImage +{ +public: + /** Constructor */ + CLMultiImage(); + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Heigth of the whole image + * @param[in] format Format of the whole image + */ + void init(unsigned int width, unsigned int height, Format format); + /** Init the multi-planar image + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init_auto_padding(unsigned int width, unsigned int height, Format format); + /** Allocated a previously initialised multi image + * + * @note The multi image must not already be allocated when calling this function. + * + **/ + void allocate(); + + // Inherited methods overridden: + const MultiImageInfo *info() const override; + CLImage *cl_plane(unsigned int index) override; + const CLImage *cl_plane(unsigned int index) const override; + +private: + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + * @param[in] auto_padding Specifies whether the image uses auto padding + */ + void internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding); + + MultiImageInfo _info; /** Instance of the multi-planar image's meta data */ + std::array _plane; /* Instance CLImage to hold the planar's information */ +}; +} +#endif /*__ARM_COMPUTE_CLMULTIIMAGE_H__ */ diff --git a/arm_compute/runtime/CL/CLPyramid.h b/arm_compute/runtime/CL/CLPyramid.h new file mode 100644 index 0000000000..5e0afb3c63 --- /dev/null +++ b/arm_compute/runtime/CL/CLPyramid.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPYRAMID_H__ +#define __ARM_COMPUTE_CLPYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include +#include + +namespace arm_compute +{ +class CLTensor; + +/** Basic implementation of the OpenCL pyramid interface */ +class CLPyramid : public IPyramid +{ +public: + /** Default constructor */ + CLPyramid(); + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + */ + void init(const PyramidInfo &info); + + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] info Pyramid's metadata + */ + void init_auto_padding(const PyramidInfo &info); + + /** Allocate the planes in the pyramid + * + * @note The pyramid must not already be allocated when calling this function. + * + **/ + void allocate(); + + // Inherited method overridden + const PyramidInfo *info() const override; + CLTensor *get_pyramid_level(size_t index) const override; + +private: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + * @param[in] auto_padding Specifies whether the image in the pyramid use auto padding + */ + void internal_init(const PyramidInfo &info, bool auto_padding); + + PyramidInfo _info; + std::unique_ptr _pyramid; +}; +} +#endif /*__ARM_COMPUTE_CLPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h new file mode 100644 index 0000000000..71baa55ce2 --- /dev/null +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHEDULER_H__ +#define __ARM_COMPUTE_CLSCHEDULER_H__ + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/OpenCL.h" + +namespace arm_compute +{ +class ICLKernel; + +/** Provides global access to a CL context and command queue. */ +class CLScheduler +{ +private: + /** Constructor */ + CLScheduler(); + +public: + /** Access the scheduler singleton. + * + * @return The scheduler + */ + static CLScheduler &get(); + /** Initialises the context and command queue used by the scheduler to default values + * and sets a default device and kernel path for the @ref CLKernelLibrary. + */ + void default_init() + { + CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault()); + init(cl::Context::getDefault(), cl::CommandQueue::getDefault()); + } + /** Schedule the execution of the passed kernel if possible. + * + * @param[in] kernel Kernel to execute. + * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. + */ + void enqueue(ICLKernel &kernel, bool flush = true); + + /** Initialises the context and command queue to be used by the scheduler. + * + * @param[in] context A CL context. + * @param[in] queue A CL command queue. + */ + void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault()) + { + _context = std::move(context); + _queue = std::move(queue); + } + + /** Accessor for the associated CL context. + * + * @return A CL context. + */ + cl::Context &context() + { + return _context; + } + + /** Accessor to set the CL context to be used by the scheduler. + * + * @param[in] context A CL context. + */ + void set_context(cl::Context context) + { + _context = std::move(context); + } + + /** Accessor for the associated CL command queue. + * + * @return A CL command queue. + */ + cl::CommandQueue &queue() + { + return _queue; + } + + /** Accessor to set the CL command queue to be used by the scheduler. + * + * @param[in] queue A CL command queue. + */ + void set_queue(cl::CommandQueue queue) + { + _queue = std::move(queue); + } + + /** Blocks until all commands in the associated command queue have finished. */ + void sync() + { + _queue.finish(); + } + + /** Enqueues a marker into the associated command queue and return the event. + * + * @return An event that can be waited on to block the executing thread. + */ + cl::Event enqueue_sync_event() + { + cl::Event event; + _queue.enqueueMarker(&event); + + return event; + } + +private: + cl::Context _context; + cl::CommandQueue _queue; +}; +} +#endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */ diff --git a/arm_compute/runtime/CL/CLTensor.h b/arm_compute/runtime/CL/CLTensor.h new file mode 100644 index 0000000000..2c685d1ed1 --- /dev/null +++ b/arm_compute/runtime/CL/CLTensor.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTENSOR_H__ +#define __ARM_COMPUTE_CLTENSOR_H__ + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" + +#include + +namespace arm_compute +{ +class ITensorAllocator; +class ITensorInfo; + +/** Basic implementation of the OpenCL tensor interface */ +class CLTensor : public ICLTensor +{ +public: + /** Constructor */ + CLTensor(); + /** Return a pointer to the tensor's allocator + * + * @return A pointer to the tensor's allocator + */ + ITensorAllocator *allocator(); + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLTensor::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLTensor::unmap; + + // Inherited methods overridden: + TensorInfo *info() const override; + TensorInfo *info() override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + mutable CLTensorAllocator _allocator; /**< Instance of the OpenCL tensor allocator */ +}; + +using CLImage = CLTensor; +} +#endif /*__ARM_COMPUTE_CLTENSOR_H__ */ diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h new file mode 100644 index 0000000000..a9fe207f60 --- /dev/null +++ b/arm_compute/runtime/CL/CLTensorAllocator.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTENSORALLOCATOR_H__ +#define __ARM_COMPUTE_CLTENSORALLOCATOR_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of a CL memory tensor allocator. */ +class CLTensorAllocator : public ITensorAllocator +{ +public: + /** Default constructor. */ + CLTensorAllocator(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLTensorAllocator(const CLTensorAllocator &) = delete; + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + CLTensorAllocator &operator=(const CLTensorAllocator &) = delete; + /** Allow instances of this class to be moved */ + CLTensorAllocator(CLTensorAllocator &&) = default; + /** Allow instances of this class to be moved */ + CLTensorAllocator &operator=(CLTensorAllocator &&) = default; + /** Default destructor */ + ~CLTensorAllocator() = default; + + /** Interface to be implemented by the child class to return the pointer to the mapped data. */ + uint8_t *data(); + /** Interface to be implemented by the child class to return the pointer to the CL data. */ + const cl::Buffer &cl_data() const; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + uint8_t *map(cl::CommandQueue &q, bool blocking); + /** Enqueue an unmap operation of the allocated buffer on the given queue. + * + * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping The cpu mapping to unmap. + */ + void unmap(cl::CommandQueue &q, uint8_t *mapping); + /** Allocate size specified by TensorInfo of OpenCL memory. + * + * @note: The tensor must not already be allocated when calling this function. + * + * */ + void allocate() override; + +protected: + /** Call map() on the OpenCL buffer. + * + * @return A pointer to the beginning of the tensor's allocation. + */ + uint8_t *lock() override; + /** Call unmap() on the OpenCL buffer. */ + void unlock() override; + +private: + cl::Buffer _buffer; /**< OpenCL buffer containing the tensor data. */ + uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ +}; +} +#endif /* __ARM_COMPUTE_CLTENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/ICLSimpleFunction.h b/arm_compute/runtime/CL/ICLSimpleFunction.h new file mode 100644 index 0000000000..130c58a98c --- /dev/null +++ b/arm_compute/runtime/CL/ICLSimpleFunction.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ +#define __ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single OpenCL kernel */ +class ICLSimpleFunction : public IFunction +{ +public: + /** Default constructor */ + ICLSimpleFunction(); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ + CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h new file mode 100644 index 0000000000..40ee396644 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ +#define __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLAbsoluteDifferenceKernel + * + * @note The tensor data types for the inputs must be U8 or S16. + * @note The function calculates the absolute difference also when the 2 inputs have different tensor data types. + */ +class CLAbsoluteDifference : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 First input tensor. Data types supported: U8, S16 + * @param[in] input2 Second input tensor. Data types supported: U8, S16 + * @param[out] output Output tensor. Data types supported: U8, S16 + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h new file mode 100644 index 0000000000..51f6df9acb --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLAccumulate.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACCUMULATE_H__ +#define __ARM_COMPUTE_CLACCUMULATE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLAccumulateKernel */ +class CLAccumulate : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, ICLTensor *accum); +}; + +/** Basic function to run @ref CLAccumulateWeightedKernel */ +class CLAccumulateWeighted : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors, and the scale value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, float alpha, ICLTensor *accum); +}; + +/** Basic function to run @ref CLAccumulateSquaredKernel */ +class CLAccumulateSquared : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift The input with a value input the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); +}; +} +#endif /*__ARM_COMPUTE_CLACCUMULATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h new file mode 100644 index 0000000000..6468c996a2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACTIVATIONLAYER_H__ +#define __ARM_COMPUTE_CLACTIVATIONLAYER_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLActivationLayerKernel + * + * @note The function simulates an activation layer with the specified activation function. + */ +class CLActivationLayer : public ICLSimpleFunction +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); +}; +} +#endif /* __ARM_COMPUTE_CLACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLArithmeticAddition.h b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h new file mode 100644 index 0000000000..feadf39820 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICADDITION_H__ +#define __ARM_COMPUTE_CLARITHMETICADDITION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLArithmeticAdditionKernel + * + * @note The tensor data type for the inputs must be U8, S16, F16, F32. + * @note The function performs an arithmetic addition between two tensors. + */ +class CLArithmeticAddition : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICADDITION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h new file mode 100644 index 0000000000..d7bb21144e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ +#define __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLArithmeticSubtractionKernel + * + * @note The tensor data type for the inputs must be U8, S16, F16, F32 + * @note The function performs an arithmetic subtraction between two tensors. + */ +class CLArithmeticSubtraction : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h new file mode 100644 index 0000000000..a4a523baaa --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEAND_H__ +#define __ARM_COMPUTE_CLBITWISEAND_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseAndKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise AND operation using the two input tensors. + */ +class CLBitwiseAnd : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEAND_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h new file mode 100644 index 0000000000..0ff16af870 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISENOT_H__ +#define __ARM_COMPUTE_CLBITWISENOT_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseNotKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise NOT operation on input tensor. + */ +class CLBitwiseNot : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISENOT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h new file mode 100644 index 0000000000..880c4762be --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEOR_H__ +#define __ARM_COMPUTE_CLBITWISEOR_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseOrKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise OR operation using the two input tensors. + */ +class CLBitwiseOr : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h new file mode 100644 index 0000000000..772dec22ea --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEXOR_H__ +#define __ARM_COMPUTE_CLBITWISEXOR_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseXorKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise XOR operation using the two input tensors. + */ +class CLBitwiseXor : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEXOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h new file mode 100644 index 0000000000..5e51c1a390 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBox3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBOX3X3_H__ +#define __ARM_COMPUTE_CLBOX3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLBox3x3Kernel + * + */ +class CLBox3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLBOX3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h new file mode 100644 index 0000000000..09b8b5500a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCANNYEDGE_H__ +#define __ARM_COMPUTE_CLCANNYEDGE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) + * -# @ref CLSobel3x3 (if gradient_size == 3) or @ref CLSobel5x5 (if gradient_size == 5) or @ref CLSobel7x7 (if gradient_size == 7) + * -# @ref CLGradientKernel + * -# @ref CLEdgeNonMaxSuppressionKernel + * -# @ref CLEdgeTraceKernel + * + */ +class CLCannyEdge : public IFunction +{ +public: + /** Constructor */ + CLCannyEdge(); + /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis. + * @param[in] lower_thr Lower threshold used for the hysteresis. + * @param[in] gradient_size Gradient size (3, 5 or 7). + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + virtual void run() override; + +private: + std::unique_ptr _sobel; /**< Pointer to Sobel kernel. */ + CLGradientKernel _gradient; /**< Gradient kernel. */ + CLEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel. */ + CLEdgeTraceKernel _edge_trace; /**< Edge tracing kernel. */ + CLImage _gx; /**< Source tensor - Gx component. */ + CLImage _gy; /**< Source tensor - Gy component. */ + CLImage _mag; /**< Source tensor - Magnitude. */ + CLImage _phase; /**< Source tensor - Phase. */ + CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */ + CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */ +}; +} + +#endif /* __ARM_COMPUTE_CLCANNYEDGE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h new file mode 100644 index 0000000000..337e6b4820 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELCOMBINE_H__ +#define __ARM_COMPUTE_CLCHANNELCOMBINE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLChannelCombineKernel to perform channel combination. */ +class CLChannelCombine : public ICLSimpleFunction +{ +public: + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output image. + */ + void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_CLCHANNELCOMBINE_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h new file mode 100644 index 0000000000..1753374622 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELEXTRACT_H__ +#define __ARM_COMPUTE_CLCHANNELEXTRACT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLChannelExtractKernel to perform channel extraction. */ +class CLChannelExtract : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input tensor to extract the channel from. Formats supported: Any single planar. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Must be of U8 format. + */ + void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image to extract channel from. + * @param[in] channel The channel to extract. + * @param[out] output The extracted 2D channel. Must be of U8 format. + */ + void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); +}; +} +#endif /*__ARM_COMPUTE_CLCHANNELEXTRACT_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h new file mode 100644 index 0000000000..12457a0cf2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLColorConvert.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOLORCONVERT_H__ +#define __ARM_COMPUTE_CLCOLORCONVERT_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLColorConvertKernel + * + * @note The function performs color convert between images. + */ +class CLColorConvert : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input single-planar tensor from which to convert + * @param[in] output The converted single-planar output tensor + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted single-planar output image + */ + void configure(const ICLMultiImage *input, ICLImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The single-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const ICLImage *input, ICLMultiImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const ICLMultiImage *input, ICLMultiImage *output); +}; +} +#endif /* __ARM_COMPUTE_CLCOLORCONVERT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h new file mode 100644 index 0000000000..f526f6ff4a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLConvolution.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTION_H__ +#define __ARM_COMPUTE_CLCONVOLUTION_H__ + +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolution3x3Kernel + * + */ +class CLConvolution3x3 : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; + +/** Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolutionKernel or
+ * @ref CLSeparableConvolutionHorKernel and @ref CLSeparableConvolutionVertKernel (if convolution matrix is separable) + * + */ +template +class CLConvolutionSquare : public IFunction +{ +public: + /** Default constructor */ + CLConvolutionSquare(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overriden: + void run() override; + +private: + CLTensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + CLSeparableConvolutionHorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + CLSeparableConvolutionVertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ + CLConvolutionKernel _kernel; /**< kernel for non-separated convolution **/ + CLFillBorderKernel _border_handler; /**< kernel for border handling */ +}; + +/** Basic function to run 5x5 convolution. */ +using CLConvolution5x5 = CLConvolutionSquare<5>; +/** Basic function to run 7x7 convolution. */ +using CLConvolution7x7 = CLConvolutionSquare<7>; +/** Basic function to run 9x9 convolution. */ +using CLConvolution9x9 = CLConvolutionSquare<9>; + +/** Basic function to execute non-square convolution. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolutionRectangleKernel or
+ * + * @note Convolution rectangle should have dimensions of 3, 5, 7, 9 + */ +class CLConvolutionRectangle : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] rows Rows of convolution kernel. + * @param[in] cols Columns of convolution kernel. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLCONVOLUTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h new file mode 100644 index 0000000000..2a9b487fe4 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels: + * + * -# @ref CLConvolutionLayerWeightsReshapeKernel (executed only once for each configuration) + * -# @ref CLGEMMTranspose1xWKernel (executed only once for each configuration) + * -# @ref CLIm2ColKernel + * -# @ref CLGEMMInterleave4x4Kernel + * -# @ref CLGEMMMatrixMultiplyKernel + * -# @ref CLCol2ImKernel + */ +class CLConvolutionLayer : public IFunction +{ +public: + /** Default constructor */ + CLConvolutionLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F16, F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + CLIm2ColKernel _input_im2col_kernel; + CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel; + CLGEMMInterleave4x4Kernel _input_interleave_kernel; + CLGEMMTranspose1xWKernel _weights_transposed_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLCol2ImKernel _output_col2im_kernel; + CLTensor _input_im2col_reshaped; + CLTensor _input_interleaved_reshaped; + CLTensor _weights_reshaped; + CLTensor _weights_transposed; + CLTensor _gemm_output; + bool _is_first_run; + bool _has_bias; + bool _is_fc; +}; +} +#endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConvert.h b/arm_compute/runtime/CL/functions/CLDepthConvert.h new file mode 100644 index 0000000000..f11027656d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthConvert.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERT_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLDepthConvertKernel. */ +class CLDepthConvert : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * Input data type must be different than output data type. + * + * Valid conversions Input -> Output : + * + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * + * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. + * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); +}; +} +#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h new file mode 100644 index 0000000000..05033e8172 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDerivative.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDERIVATIVE_H__ +#define __ARM_COMPUTE_CLDERIVATIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute first order derivative operator. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLDerivativeKernel + * + */ +class CLDerivative : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data types supported: S16. + * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /* __ARM_COMPUTE_CLDERIVATIVE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h new file mode 100644 index 0000000000..8534139c86 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDilate.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDILATE_H__ +#define __ARM_COMPUTE_CLDILATE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute dilate. This function calls the following OpenCL kernels: +* +* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) +* -# @ref CLDilateKernel +* +*/ +class CLDilate : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode. + * + * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_CLDILATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h new file mode 100644 index 0000000000..d7182756b5 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ +#define __ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ + +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "arm_compute/runtime/CL/CLDistribution1D.h" +#include "arm_compute/runtime/CL/CLLut.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute histogram equalization. This function calls the following CL kernels: + * + * -# @ref CLHistogramKernel + * -# @ref CLTableLookupKernel + * + */ +class CLEqualizeHistogram : public IFunction +{ +public: + /** Default Constructor. */ + CLEqualizeHistogram(); + /** Initialise the kernel's inputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] output Output of same data type with equalized brightness and contrast. + */ + void configure(const ICLImage *input, ICLImage *output); + + // Inherited methods overridden: + void run() override; + +private: + CLHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ + CLHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */ + CLTableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ + CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */ + CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ + CLLut _cd_lut; /**< Holds the equalization lookuptable. */ + static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */ + static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */ +}; +} +#endif /*__ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h new file mode 100644 index 0000000000..cd2f5516e2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLErode.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLERODE_H__ +#define __ARM_COMPUTE_CLERODE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute erode. This function calls the following OpenCL kernels: +* +* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) +* -# @ref CLErodeKernel +* +*/ +class CLErode : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode + * + * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_CLERODE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h new file mode 100644 index 0000000000..79d82af462 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFastCorners.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFASTCORNERS_H__ +#define __ARM_COMPUTE_CLFASTCORNERS_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute fast corners. This function calls the following CL kernels: + * + * -# @ref CLFastCornersKernel + * -# @ref CLNonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) + * -# @ref CLCopyToArrayKernel + * + */ +class CLFastCorners : public IFunction +{ +public: + /** Constructor */ + CLFastCorners(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCorners(const CLFastCorners &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLFastCorners &operator=(const CLFastCorners &) = delete; + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. + * @param[out] corners Array of keypoints to store the results. + * @param[in,out] num_corners Record number of corners in the array + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const ICLImage *input, float threshold, bool nonmax_suppression, CLKeyPointArray *corners, unsigned int *num_corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + // Inherited methods overridden: + void run() override; + +private: + CLFastCornersKernel _fast_corners_kernel; + CLNonMaximaSuppression3x3 _suppr_func; + CLCopyToArrayKernel _copy_array_kernel; + CLImage _output; + CLImage _suppr; + Window _win; + bool _non_max; + unsigned int *_num_corners; + cl::Buffer _num_buffer; + CLKeyPointArray *_corners; + uint8_t _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_CLFASTCORNERS_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h new file mode 100644 index 0000000000..b4855475c3 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFillBorder.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFILLBORDER_H__ +#define __ARM_COMPUTE_CLFILLBORDER_H__ + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLFillBorderKernel */ +class CLFillBorder : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in,out] tensor Source tensor. Data types supported: U8, S16 + * @param[in] border_width The border width + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); +}; +} +#endif /*__ARM_COMPUTE_FILLBORDER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h new file mode 100644 index 0000000000..1edbdbc951 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ + +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" + +namespace arm_compute +{ +/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels: + * -# @ref CLConvolutionLayer (called when the weights have 4 dimensions. Pass the stride as 1 and padding as 0) + * -# @ref CLGEMM (called when the weights have 2 dimensions) + * -# @ref CLTransposeKernel (called when the weights have 2 dimensions) + * -# @ref CLGEMMMatrixAccumulateBiasesKernel (called when the weights have 2 dimensions) + * + * @note The fully connected layer accepts "weights" tensors only with 2 or 4 dimensions. In particular, the weights tensor has 4 dimensions, + * if the fully connected layer is computed after a convolution layer otherwise the tensor has 2 dimensions if the fully connected layer + * is computed after another fully connected layer + */ +class CLFullyConnectedLayer : public IFunction +{ +public: + /**Constructor */ + CLFullyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data type supported: F16, F32. (Written to only if @ref CLGEMM needs to pad with zeros the tensor) + * @param[in, out] weights Weights tensor. The weights can be 2 dimensional or 4 dimensional. Data type supported: Same as @p input. (Written to only if @ref CLGEMM needs to pad with zeros the tensor) + * @param[in] biases Bias tensor. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + */ + void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *biases, ICLTensor *output); + + //Inherited methods override + void run() override; + +private: + /** Run the convolution layer connect to fully connected layer case */ + void run_conv(); + /** Run the fully connected layer connect to fully connected layer case */ + void run_fc(); + /** Common signature for the functions to run */ + using FullyConnectedLayerFunction = void (CLFullyConnectedLayer::*)(void); + +private: + CLConvolutionLayer _conv_function; + CLGEMM _gemm_function; + CLTransposeKernel _transpose_kernel; + CLGEMMMatrixAccumulateBiasesKernel _acc_biases_kernel; + FullyConnectedLayerFunction _run_func; + CLTensor _weights_transpose; + bool _is_first_run; + bool _run_acc_biases; +}; +} +#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h new file mode 100644 index 0000000000..043b2b8115 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMM_H__ +#define __ARM_COMPUTE_CLGEMM_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute GEMM on OpenCL. Data types supported: F32, F16. This function calls the following OpenCL kernels: + * + * -# @ref CLGEMMInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref CLGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref CLGEMMMatrixMultiplyKernel + * -# @ref CLGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0) + * + */ +class CLGEMM : public IFunction +{ +public: + /** Default constructor. */ + CLGEMM(); + /** Initialise the kernel's inputs and output + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * + * @note All tensors must have the same data type. Data types supported: F32, F16 + * + * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix + * + * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F32, F16 + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a. + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. + * @param[out] output Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + */ + void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta); + + // Inherited methods overridden: + void run() override; + +private: + CLGEMMInterleave4x4Kernel _interleave_kernel; + CLGEMMTranspose1xWKernel _transpose_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLGEMMMatrixAdditionKernel _ma_kernel; + CLTensor _tmp_a; + CLTensor _tmp_b; + bool _run_vector_matrix_multiplication; + bool _run_addition; +}; +} + +#endif /* __ARM_COMPUTE_CLGEMM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowp.h b/arm_compute/runtime/CL/functions/CLGEMMLowp.h new file mode 100644 index 0000000000..da8883c3f8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGEMMLowp.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMLOWP_H__ +#define __ARM_COMPUTE_CLGEMMLOWP_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute GEMMLowp on OpenCL. This function calls the following OpenCL kernels: +* +* -# @ref CLGEMMInterleave4x4Kernel +* -# @ref CLGEMMTranspose1xWKernel +* -# @ref CLGEMMLowpMatrixMultiplyKernel +* +*/ +class CLGEMMLowp : public IFunction +{ +public: + /** Constructor */ + CLGEMMLowp(); + /** Initialise the kernel's inputs, output + * + * @note GEMM_LOWP: low precision matrix multiply kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + * @param[in] a First input tensor (Matrix A). Data types supported: U8. + * @param[in] b Second input tensor (Matrix B). Data types supported: same as @p a. + * @param[out] output Output tensor. Data types supported: same as @p a. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Multiplied with each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ICLTensor *a, const ICLTensor *b, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + + // Inherited methods overridden: + void run() override; + +private: + CLGEMMInterleave4x4Kernel _interleave_kernel; + CLGEMMTranspose1xWKernel _transpose_kernel; + CLGEMMLowpMatrixMultiplyKernel _mm_kernel; + CLTensor _tmp_a; + CLTensor _tmp_b; +}; +} +#endif /*__ARM_COMPUTE_CLGEMMLOWP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h new file mode 100644 index 0000000000..f8223bc5f5 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN3X3_H__ +#define __ARM_COMPUTE_CLGAUSSIAN3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian3x3Kernel + * + */ +class CLGaussian3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h new file mode 100644 index 0000000000..148b9a9924 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN5X5_H__ +#define __ARM_COMPUTE_CLGAUSSIAN5X5_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian5x5HorKernel + * -# @ref CLGaussian5x5VertKernel + * + */ +class CLGaussian5x5 : public IFunction +{ +public: + /** Default Constructor. */ + CLGaussian5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */ + CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp; /**< Temporary buffer */ +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN5X5_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h new file mode 100644 index 0000000000..d7f53c1e04 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ +#define __ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ + +#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" + +#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Common interface for all Gaussian pyramid functions + */ +class CLGaussianPyramid : public IFunction +{ +public: + /** Constructor */ + CLGaussianPyramid(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramid(const CLGaussianPyramid &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramid &operator=(const CLGaussianPyramid &) = delete; + /** Initialise the function's source, destinations and border mode. + * + * @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: U8. + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + virtual void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0; + +protected: + ICLTensor *_input; + CLPyramid *_pyramid; + CLPyramid _tmp; +}; + +/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussianPyramidHorKernel + * -# @ref CLGaussianPyramidVertKernel + */ +class CLGaussianPyramidHalf : public CLGaussianPyramid +{ +public: + /** Constructor */ + CLGaussianPyramidHalf(); + + // Inherited methods overridden: + void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _border_handler; + std::unique_ptr _horizontal_reduction; + std::unique_ptr _vertical_reduction; +}; + +/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian5x5 + * -# @ref CLScaleKernel + */ +class CLGaussianPyramidOrb : public CLGaussianPyramid +{ +public: + /** Constructor */ + CLGaussianPyramidOrb(); + + // Inherited methods overridden: + void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _gauss5x5; + std::unique_ptr _scale_nearest; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h new file mode 100644 index 0000000000..90da687435 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHARRISCORNERS_H__ +#define __ARM_COMPUTE_CLHARRISCORNERS_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute harris corners detection. This function calls the following CL and NEON kernels and functions: + * + * @note Requires CPU support for the kernels: CPPCornerCandidatesKernel and CPPSortEuclideanDistanceKernel. + * + * -# @ref CLSobel3x3 (if gradient_size == 3) or
+ * @ref CLSobel5x5 (if gradient_size == 5) or
+ * @ref CLSobel7x7 (if gradient_size == 7) + * -# @ref CLFillBorderKernel + * -# @ref CLHarrisScoreKernel + * -# @ref CLNonMaximaSuppression3x3 + * -# @ref CPPCornerCandidatesKernel + * -# @ref CPPSortEuclideanDistanceKernel + */ +class CLHarrisCorners : public IFunction +{ +public: + /** Constructor */ + CLHarrisCorners(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisCorners(const CLHarrisCorners &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete; + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] min_dist Radial Euclidean distance for the euclidean distance stage. + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 + * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLImage *input, float threshold, float min_dist, float sensitivity, + int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Sobel function */ + CLHarrisScoreKernel _harris_score; /**< Harris score kernel */ + CLNonMaximaSuppression3x3Kernel _non_max_suppr; /**< Non-maxima suppression function */ + CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ + CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ + CLFillBorderKernel _border_gx; /**< Border handler before running harris score */ + CLFillBorderKernel _border_gy; /**< Border handler before running harris score */ + CLImage _gx; /**< Source image - Gx component */ + CLImage _gy; /**< Source image - Gy component */ + CLImage _score; /**< Source image - Harris score */ + CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */ + std::unique_ptr _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ + int32_t _num_corner_candidates; /**< Number of potential corner candidates */ + ICLKeyPointArray *_corners; /**< Output corners array */ +}; +} +#endif /*__ARM_COMPUTE_CLHARRISCORNERS_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h new file mode 100644 index 0000000000..455b61812d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHistogram.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHISTOGRAM_H__ +#define __ARM_COMPUTE_CLHISTOGRAM_H__ + +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLDistribution1D; +class ICLTensor; +using ICLTensor = ICLImage; + +/** Basic function to execute histogram. This function calls the following OpenCL kernels: + * + * -# @ref CLHistogramKernel + * -# @ref CLHistogramBorderKernel + * + */ +class CLHistogram : public IFunction +{ +public: + /* + * @ Default constructor + */ + CLHistogram(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogram(const CLHistogram &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLHistogram &operator=(const CLHistogram &) = delete; + /** Initialize the function + * + * @param[in] input Source image. Data types supported: U8 + * @param[out] output Output distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run() override; + +private: + CLHistogramKernel _kernel; /**< kernel to run */ + CLHistogramBorderKernel _kernel_border; /**< Border kernel to run */ +}; +} +#endif /*__ARM_COMPUTE_CLHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h new file mode 100644 index 0000000000..25fc549b29 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLINTEGRALIMAGE_H__ +#define __ARM_COMPUTE_CLINTEGRALIMAGE_H__ + +#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute integral image. This function calls the following OpenCL kernels: + * + * -# @ref CLIntegralImageHorKernel + * -# @ref CLIntegralImageVertKernel + * + */ +class CLIntegralImage : public IFunction +{ +public: + /** Default Constructor. */ + CLIntegralImage(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +protected: + CLIntegralImageHorKernel _integral_hor; /**< Integral Image Horizontal kernel */ + CLIntegralImageVertKernel _integral_vert; /**< Integral Image Vertical kernel */ +}; +} +#endif /*__ARM_COMPUTE_CLINTEGRALIMAGE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h new file mode 100644 index 0000000000..0c6708aa73 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ +#define __ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute laplacian pyramid. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLGaussianPyramidHalf + * -# @ref CLGaussian5x5 + * -# @ref CLArithmeticSubtraction + * + * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then + * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. + * L(i) = I(i) - Gaussian5x5(I(i)) + * Level 0 has always the same first two dimensions as the input tensor. +*/ +class CLLaplacianPyramid : public IFunction +{ +public: + /** Constructor */ + CLLaplacianPyramid(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: S16. + * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16. + * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: + * output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + size_t _num_levels; + CLGaussianPyramidHalf _gaussian_pyr_function; + std::unique_ptr _convf; + std::unique_ptr _subf; + CLDepthConvert _depth_function; + CLPyramid _gauss_pyr; + CLPyramid _conv_pyr; +}; +} +#endif /*__ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h new file mode 100644 index 0000000000..4bc7eb65ce --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ +#define __ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLScale.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute laplacian reconstruction. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLArithmeticAddition + * -# @ref CLScale + * -# @ref CLDepthConvert + * + * This function reconstructs the original image from a Laplacian Image Pyramid. + * + * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the + * resolution of the next pyramid level. + * + * I(n-2) = upsample( input + L(n-1) + * + * For each pyramid level i, except i=0 and i=n-1: + * I(i-1) = upsample(I(i) + L(i)) + * + * output = I(0) + L(0) +*/ +class CLLaplacianReconstruct : public IFunction +{ +public: + /** Constructor */ + CLLaplacianReconstruct(); + /** Initialise the function's source, destinations and border mode. + * + * The Output image must have the same size as the first level of the pyramid. + * The Input image must have the same size as the last level of the pyramid. + * + * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. + * + * @param[in] pyramid Laplacian pyramid tensors, Data types supported at each level: S16. + * @param[in] input Source tensor. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const CLPyramid *pyramid, const ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + CLPyramid _tmp_pyr; + std::unique_ptr _addf; + std::unique_ptr _scalef; + CLDepthConvert _depthf; +}; +} +#endif /*__ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h new file mode 100644 index 0000000000..dc5f9139b3 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMagnitude.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMAGNITUDE_H__ +#define __ARM_COMPUTE_CLMAGNITUDE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLMagnitudePhaseKernel. */ +class CLMagnitude : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs. + * + * @param[in] input1 First tensor input. Data types supported: S16. + * @param[in] input2 Second tensor input. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: S16. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); +}; +} +#endif /*__ARM_COMPUTE_CLMAGNITUDE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h new file mode 100644 index 0000000000..e33bcdd779 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEANSTDDEV_H__ +#define __ARM_COMPUTE_CLMEANSTDDEV_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */ +class CLMeanStdDev : public IFunction +{ +public: + /** Default Constructor. */ + CLMeanStdDev(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional)Output standard deviation of pixel values. + */ + void configure(const ICLImage *input, float *mean, float *stddev = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ +}; +} +#endif /*__ARM_COMPUTE_CLMEANSTDDEV_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h new file mode 100644 index 0000000000..af84ba7289 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEDIAN3X3_H__ +#define __ARM_COMPUTE_CLMEDIAN3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute median filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLMedian3x3Kernel + * + */ +class CLMedian3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLMEDIAN3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h new file mode 100644 index 0000000000..84fd67515b --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMINMAXLOCATION_H__ +#define __ARM_COMPUTE_CLMINMAXLOCATION_H__ + +#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute min and max location. This function calls the following OpenCL kernels: + * + * -# @ref CLMinMaxKernel + * -# @ref CLMinMaxLocationKernel + */ +class CLMinMaxLocation : public IFunction +{ +public: + /** Constructor */ + CLMinMaxLocation(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocation(const CLMinMaxLocation &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocation &operator=(const CLMinMaxLocation &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLocation(CLMinMaxLocation &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default; + /** Initialise the kernel's inputs and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] input Input image. Data types supported: U8 or S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + * @param[out] min_count (Optional) Number of minimum value encounters. + * @param[out] max_count (Optional) Number of maximum value encounters. + */ + void configure(const ICLImage *input, int32_t *min, int32_t *max, + CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + CLMinMaxKernel _min_max_kernel; /**< Kernel that performs min/max */ + CLMinMaxLocationKernel _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */ + cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */ + cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Minimum value occurrences. */ + uint32_t *_max_count; /**< Maximum value occurrences. */ + CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */ + CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */ +}; +} +#endif /*__ARM_COMPUTE_CLMINMAXLOCATION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h new file mode 100644 index 0000000000..9eee33e0ba --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONLINEARFILTER_H__ +#define __ARM_COMPUTE_CLNONLINEARFILTER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute non linear filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLNonLinearFilterKernel + * + * @note Supported mask dimensions squares of sizes 3, 5 + */ +class CLNonLinearFilter : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLNONLINEARFILTER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h new file mode 100644 index 0000000000..ebb32ed217 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ +#define __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLNonMaximaSuppression3x3Kernel + */ +class CLNonMaximaSuppression3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * The constant values used with CONSTANT border mode is 0 + * + * @param[in,out] input Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data types supported: U8, F32. Must be the same data type as @p input. + * @param[in] border_mode Border mode to use for non-maxima suppression. + * The implementation supports just 2 border modes: UNDEFINED and CONSTANT + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode); +}; +} +#endif /* __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h new file mode 100644 index 0000000000..a4dae85c1d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to simulate a normalization layer. This function calls the following CL kernels: + * + * -# @ref CLPixelWiseMultiplicationKernel + * -# @ref CLFillBorderKernel + * -# @ref CLNormalizationLayerKernel + * + */ +class CLNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + CLNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run() override; + +private: + CLTensor _squared_input; /**< The intermediate buffer which stores results of squaring input*/ + CLNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */ + CLPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel to run */ + CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ +}; +} +#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h new file mode 100644 index 0000000000..ca3f86100e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLOPTICALFLOW_H__ +#define __ARM_COMPUTE_CLOPTICALFLOW_H__ + +#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLScharr3x3.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class CLPyramid; + +using CLLKInternalKeypointArray = CLArray; +using CLCoefficientTableArray = CLArray; +using CLOldValueArray = CLArray; + +/** Basic function to execute optical flow. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLScharr3x3 + * -# @ref CLLKTrackerInitKernel + * -# @ref CLLKTrackerStage0Kernel + * -# @ref CLLKTrackerStage1Kernel + * -# @ref CLLKTrackerFinalizeKernel + */ +class CLOpticalFlow : public IFunction +{ +public: + /** Default constructor */ + CLOpticalFlow(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLOpticalFlow(const CLOpticalFlow &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLOpticalFlow &operator=(const CLOpticalFlow &) = delete; + /** Allow instances of this class to be moved */ + CLOpticalFlow(CLOpticalFlow &&) = default; + /** Allow instances of this class to be moved */ + CLOpticalFlow &operator=(CLOpticalFlow &&) = default; + /** Initialise the function input and output + * + * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8 + * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data types supported U8 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] border_mode The border mode applied at scharr kernel stage + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT + * + */ + void configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid, + const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _tracker_init_kernel; + std::unique_ptr _tracker_stage0_kernel; + std::unique_ptr _tracker_stage1_kernel; + CLLKTrackerFinalizeKernel _tracker_finalize_kernel; + std::unique_ptr _func_scharr; + std::unique_ptr _scharr_gx; + std::unique_ptr _scharr_gy; + const ICLKeyPointArray *_old_points; + const ICLKeyPointArray *_new_points_estimates; + ICLKeyPointArray *_new_points; + std::unique_ptr _old_points_internal; + std::unique_ptr _new_points_internal; + std::unique_ptr _coefficient_table; + std::unique_ptr _old_values; + size_t _num_levels; +}; +} +#endif /*__ARM_COMPUTE_CLOPTICALFLOW_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h new file mode 100644 index 0000000000..7cdfab16e2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPhase.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPHASE_H__ +#define __ARM_COMPUTE_CLPHASE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute an @ref CLMagnitudePhaseKernel. */ +class CLPhase : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in] input1 First tensor input. Data types supported: S16. + * @param[in] input2 Second tensor input. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED); +}; +} +#endif /*__ARM_COMPUTE_CLPHASE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h new file mode 100644 index 0000000000..71754fc3f4 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ +#define __ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLPixelWiseMultiplicationKernel. */ +class CLPixelWiseMultiplication : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16 or F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16 or F32. + * @param[out] output Output tensor. Data types supported: U8(Only if both inputs are U8), S16, F16 or F32. + * @param[in] scale Scale to apply after multiplication. Must be positive. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); +}; +} +#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h new file mode 100644 index 0000000000..f92860e5b2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPOOLINGLAYER_H__ +#define __ARM_COMPUTE_CLPOOLINGLAYER_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if padding size is different from zero) + * -# @ref CLPoolingLayerKernel + */ +class CLPoolingLayer : public ICLSimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: F16, F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); +}; +} +#endif /* __ARM_COMPUTE_CLPOOLINGLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h new file mode 100644 index 0000000000..4cb2be90e7 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLRemap.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLREMAP_H__ +#define __ARM_COMPUTE_CLREMAP_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute remap. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLRemapKernel + */ +class CLRemap : public ICLSimpleFunction +{ +public: + /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] map_x Map for X coords. Data types supported: F32. + * @param[in] map_y Map for Y coords. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, + InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLREMAP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h new file mode 100644 index 0000000000..c2438ddf9b --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLScale.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCALE_H__ +#define __ARM_COMPUTE_CLSCALE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLScaleKernel */ +class CLScale : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8, S16. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSCALE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h new file mode 100644 index 0000000000..3ea0b84624 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHARR3X3_H__ +#define __ARM_COMPUTE_CLSCHARR3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLScharr3x3Kernel + * + */ +class CLScharr3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSCHARR3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h new file mode 100644 index 0000000000..7a4f47d0ed --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL3X3_H__ +#define __ARM_COMPUTE_CLSOBEL3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel3x3Kernel + * + */ +class CLSobel3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h new file mode 100644 index 0000000000..ad1f72faf8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL5X5_H__ +#define __ARM_COMPUTE_CLSOBEL5X5_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel5x5HorKernel + * -# @ref CLSobel5x5VertKernel + * + */ +class CLSobel5x5 : public IFunction +{ +public: + /** Default Constructor. */ + CLSobel5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ + CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp_x; /**< Temporary buffer for Sobel X */ + CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL5X5_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h new file mode 100644 index 0000000000..1a3fe1a50a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL7X7_H__ +#define __ARM_COMPUTE_CLSOBEL7X7_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel7x7HorKernel + * -# @ref CLSobel7x7VertKernel + * + */ +class CLSobel7x7 : public IFunction +{ +public: + /** Default Constructor. */ + CLSobel7x7(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32. + * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ + CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp_x; /**< Temporary buffer for Sobel X */ + CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL7X7_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h new file mode 100644 index 0000000000..42cfc06fc4 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOFTMAXLAYER_H__ +#define __ARM_COMPUTE_CLSOFTMAXLAYER_H__ + +#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to compute a SoftmaxLayer. + * + * Softmax is calculated by : + * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f] + * + * This function runs the following kernels: + * -# @ref CLLogits1DMaxKernel + * -# @ref CLLogits1DShiftExpSumKernel + * -# @ref CLLogits1DNormKernel + */ +class CLSoftmaxLayer : public IFunction +{ +public: + /** Constructor */ + CLSoftmaxLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + CLLogits1DMaxKernel _max_kernel; + CLLogits1DShiftExpSumKernel _shift_exp_sum_kernel; + CLLogits1DNormKernel _norm_kernel; + CLTensor _max; + CLTensor _sum; + CLTensor _tmp; +}; +} +#endif /* __ARM_COMPUTE_CLSOFTMAXLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h new file mode 100644 index 0000000000..ebe6593b6a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLTableLookup.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTABLELOOKUP_H__ +#define __ARM_COMPUTE_CLTABLELOOKUP_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; +class ICLLut; + +/** Basic function to run @ref CLTableLookupKernel */ +class CLTableLookup : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input First tensor input. Data types supported: U8 and S16 + * @param[in] lut Input lookup table. Data types supported: U8 and S16 + * @param[out] output Output tensor. Data types supported: U8 and S16 + */ + void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); +}; +} +#endif /*__ARM_COMPUTE_CLTABLELOOKUP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h new file mode 100644 index 0000000000..14c05786c1 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLThreshold.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTHRESHOLD_H__ +#define __ARM_COMPUTE_CLTHRESHOLD_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLThresholdKernel */ +class CLThreshold : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] input First tensor input. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold. + * @param[in] false_value Value to assign when the condition is false. + * @param[in] true_value value to assign when the condition is true. + * @param[in] type Thresholding type. Can either be BINARY or RANGE. + * @param[in] upper Upper threshold. Only used with RANGE thresholding + */ + void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value = 0, uint8_t true_value = 0, + ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); +}; +} +#endif /*__ARM_COMPUTE_CLTHRESHOLD_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h new file mode 100644 index 0000000000..9b57fe00a8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLTranspose.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSE_H__ +#define __ARM_COMPUTE_CLTRANSPOSE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel: + * + * -# @ref CLTransposeKernel + * + */ +class CLTranspose : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} + +#endif /* __ARM_COMPUTE_CLTRANSPOSE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h new file mode 100644 index 0000000000..aeab3f7b22 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPAFFINE_H__ +#define __ARM_COMPUTE_CLWARPAFFINE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation */ +class CLWarpAffine : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in,out] input Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The affine matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLWARPAFFINE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h new file mode 100644 index 0000000000..80237017aa --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPPERSPECTIVE_H__ +#define __ARM_COMPUTE_CLWARPPERSPECTIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation */ +class CLWarpPerspective : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLWARPPERSPECTIVE_H__ */ diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h new file mode 100644 index 0000000000..0828af6015 --- /dev/null +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPSCHEDULER_H__ +#define __ARM_COMPUTE_CPPSCHEDULER_H__ + +#include +#include + +namespace arm_compute +{ +class ICPPKernel; +class Thread; + +/** Pool of threads to automatically split a kernel's execution among several threads. */ +class CPPScheduler +{ +private: + /** Constructor: create a pool of threads. */ + CPPScheduler(); + +public: + /** Force the re-creation of the pool of threads to use the specified number of threads. + * + * @param[in] num_threads If set to 0, then std::thread::hardware_concurrency() threads will be used, otherwise the number of threads specified. + */ + void force_number_of_threads(int num_threads); + /** Returns the number of threads that the CPPScheduler has in his pool. + * + * @return Number of threads available in CPPScheduler. + */ + int num_threads() const + { + return _num_threads; + } + /** Access the scheduler singleton + * + * @return The scheduler + */ + static CPPScheduler &get(); + /** Multithread the execution of the passed kernel if possible. + * + * The kernel will run on a single thread if any of these conditions is true: + * - ICPPKernel::is_parallelisable() returns false + * - The scheduler has been initialized with only one thread. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window (By default 1/Y) + */ + void multithread(ICPPKernel *kernel, size_t split_dimension = 1); + +private: + int _num_threads; + std::unique_ptr _threads; +}; +} +#endif /* __ARM_COMPUTE_CPPSCHEDULER_H__ */ diff --git a/arm_compute/runtime/Distribution1D.h b/arm_compute/runtime/Distribution1D.h new file mode 100644 index 0000000000..7080e88075 --- /dev/null +++ b/arm_compute/runtime/Distribution1D.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DISTRIBUTION1D_H__ +#define __ARM_COMPUTE_DISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution1D.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Basic implementation of the 1D distribution interface */ +class Distribution1D : public IDistribution1D +{ +public: + /** Constructor: Creates a 1D Distribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + Distribution1D(size_t num_bins, int32_t offset, uint32_t range); + + // Inherited methods overridden: + uint32_t *buffer() const override; + +private: + std::unique_ptr _data; /**< The distribution data. */ +}; +} +#endif /* __ARM_COMPUTE_DISTRIBUTION1D_H__ */ diff --git a/arm_compute/runtime/HOG.h b/arm_compute/runtime/HOG.h new file mode 100644 index 0000000000..70d8034bef --- /dev/null +++ b/arm_compute/runtime/HOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HOG_H__ +#define __ARM_COMPUTE_HOG_H__ + +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** CPU implementation of HOG data-object */ +class HOG : public IHOG +{ +public: + /** Default constructor */ + HOG(); + /** Allocate the HOG descriptor using the given HOG's metadata + * + * @param[in] input HOG's metadata used to allocate the HOG descriptor + */ + void init(const HOGInfo &input); + + // Inherited method overridden: + const HOGInfo *info() const override; + float *descriptor() const override; + +private: + HOGInfo _info; + std::unique_ptr _descriptor; +}; +} +#endif /* __ARM_COMPUTE_HOG_H__ */ diff --git a/arm_compute/runtime/IFunction.h b/arm_compute/runtime/IFunction.h new file mode 100644 index 0000000000..0cd21b99ac --- /dev/null +++ b/arm_compute/runtime/IFunction.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IFUNCTION_H__ +#define __ARM_COMPUTE_IFUNCTION_H__ + +namespace arm_compute +{ +/** Base class for all functions */ +class IFunction +{ +public: + /** Run the kernels contained in the function + * + * For NEON kernels: + * - Multi-threading is used for the kernels which are parallelisable. + * - By default std::thread::hardware_concurrency() threads are used. + * + * @note @ref CPPScheduler::force_number_of_threads() can be used to manually set the number of threads + * + * For OpenCL kernels: + * - All the kernels are enqueued on the queue associated with CLScheduler. + * - The queue is then flushed. + * + * @note The function will not block until the kernels are executed. It is the user's responsibility to wait. + */ + virtual void run() = 0; + /** Destructor + * + */ + virtual ~IFunction() = default; +}; +} +#endif /*__ARM_COMPUTE_IFUNCTION_H__ */ diff --git a/arm_compute/runtime/ILutAllocator.h b/arm_compute/runtime/ILutAllocator.h new file mode 100644 index 0000000000..f23fbd2154 --- /dev/null +++ b/arm_compute/runtime/ILutAllocator.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ILUTALLOCATOR_H__ +#define __ARM_COMPUTE_ILUTALLOCATOR_H__ + +#include "arm_compute/core/Types.h" + +#include +#include + +namespace arm_compute +{ +/** Basic interface to allocate LUTs' */ +class ILutAllocator +{ +public: + /** Default constructor */ + ILutAllocator(); + /** Default virtual destructor */ + virtual ~ILutAllocator() = default; + /** Allow instances of this class to be move constructed */ + ILutAllocator(ILutAllocator &&) = default; + /** Allow instances of this class to be moved */ + ILutAllocator &operator=(ILutAllocator &&) = default; + /** Allocate an LUT of the requested number of elements and data_type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + void init(size_t num_elements, DataType data_type); + /** Returns the total number of elements in the LUT. + * + * @return Total number of elements. + */ + size_t num_elements() const; + /** Returns the type of the LUT. + * + * @return The type of the LUT. + */ + DataType type() const; + /** Returns the total size in bytes of the LUT. + * + * @return Total size of the LUT in bytes. + */ + size_t size() const; + +protected: + /** Interface to be implemented by the child class to allocate the LUT. */ + virtual void allocate() = 0; + /** Interface to be implemented by the child class to lock the memory allocation for the CPU to access. + * + * @return Pointer to a CPU mapping of the memory + */ + virtual uint8_t *lock() = 0; + /** Interface to be implemented by the child class to unlock the memory allocation after the CPU is done accessing it. */ + virtual void unlock() = 0; + +private: + size_t _num_elements; /**< Number of elements allocated */ + DataType _data_type; /**< Data type of LUT elements. */ +}; +} +#endif /* __ARM_COMPUTE_ILUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/ITensorAllocator.h b/arm_compute/runtime/ITensorAllocator.h new file mode 100644 index 0000000000..a246634c88 --- /dev/null +++ b/arm_compute/runtime/ITensorAllocator.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSORALLOCATOR_H__ +#define __ARM_COMPUTE_ITENSORALLOCATOR_H__ + +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Interface to allocate tensors */ +class ITensorAllocator +{ +public: + /** Default constructor. */ + ITensorAllocator(); + /** Allow instances of this class to be copy constructed */ + ITensorAllocator(const ITensorAllocator &) = default; + /** Allow instances of this class to be copied */ + ITensorAllocator &operator=(const ITensorAllocator &) = default; + /** Allow instances of this class to be move constructed */ + ITensorAllocator(ITensorAllocator &&) = default; + /** Allow instances of this class to be moved */ + ITensorAllocator &operator=(ITensorAllocator &&) = default; + /** Default virtual destructor. */ + virtual ~ITensorAllocator() = default; + + /** Initialize a tensor based on the passed @ref TensorInfo. + * + * @param[in] input TensorInfo object containing the description of the tensor to initialize. + */ + void init(const TensorInfo &input); + /** Return a reference to the tensor's metadata + * + * @return Reference to the tensor's metadata. + */ + TensorInfo &info(); + /** Return a constant reference to the tensor's metadata + * + * @return Constant reference to the tensor's metadata. + */ + const TensorInfo &info() const; + /** Interface to be implemented by the child class to allocate the tensor. + * + * @note The child is expected to use the TensorInfo to get the size of the memory allocation. + * @warning The tensor must not already be allocated. Otherwise calling the function will fail. + */ + virtual void allocate() = 0; + +protected: + /** Interface to be implemented by the child class to lock the memory allocation for the CPU to access. + * + * @return Pointer to a CPU mapping of the memory + */ + virtual uint8_t *lock() = 0; + /** Interface to be implemented by the child class to unlock the memory allocation after the CPU is done accessing it. */ + virtual void unlock() = 0; + +private: + TensorInfo _info; /**< Tensor's metadata. */ +}; +} +#endif /*__ARM_COMPUTE_ITENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/Lut.h b/arm_compute/runtime/Lut.h new file mode 100644 index 0000000000..87431feee4 --- /dev/null +++ b/arm_compute/runtime/Lut.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LUT_H__ +#define __ARM_COMPUTE_LUT_H__ + +#include "arm_compute/core/ILut.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/LutAllocator.h" + +#include +#include + +namespace arm_compute +{ +class ILutAllocator; + +/** Basic implementation of the LUT interface */ +class Lut : public ILut +{ +public: + /** Constructor */ + Lut(); + /** Constructor: initializes a LUT which can contain num_values values of data_type type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + Lut(size_t num_elements, DataType data_type); + /** Return a pointer to the lut's allocator + * + * @return A pointer to the lut's allocator + */ + ILutAllocator *allocator(); + + // Inherited methods overridden: + size_t num_elements() const override; + uint32_t index_offset() const override; + size_t size_in_bytes() const override; + DataType type() const override; + uint8_t *buffer() const override; + void clear() override; + +private: + LutAllocator _allocator; /**< Instance of the basic CPU allocator.*/ +}; +} +#endif /* __ARM_COMPUTE_LUT_H__ */ diff --git a/arm_compute/runtime/LutAllocator.h b/arm_compute/runtime/LutAllocator.h new file mode 100644 index 0000000000..76b596bfa0 --- /dev/null +++ b/arm_compute/runtime/LutAllocator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LUTALLOCATOR_H__ +#define __ARM_COMPUTE_LUTALLOCATOR_H__ + +#include "arm_compute/runtime/ILutAllocator.h" + +#include +#include + +namespace arm_compute +{ +/** Basic implementation of a CPU memory LUT allocator. */ +class LutAllocator : public ILutAllocator +{ +public: + /** Default constructor. */ + LutAllocator(); + /** Interface to be implemented by the child class to return the pointer to the allocate data. */ + uint8_t *data() const; + +protected: + /** Allocate num_elements() * sizeof(type()) of CPU memory. */ + void allocate() override; + /** No-op for CPU memory + * + * @return A pointer to the beginning of the look up table's allocation. + */ + uint8_t *lock() override; + /** No-op for CPU memory. */ + void unlock() override; + +private: + std::unique_ptr _buffer; /**< CPU memory allocation. */ +}; +} +#endif /* __ARM_COMPUTE_LUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/MultiHOG.h b/arm_compute/runtime/MultiHOG.h new file mode 100644 index 0000000000..486ae141d4 --- /dev/null +++ b/arm_compute/runtime/MultiHOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIHOG_H__ +#define __ARM_COMPUTE_MULTIHOG_H__ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/HOG.h" + +namespace arm_compute +{ +/** CPU implementation of multi HOG data-object */ +class MultiHOG : public IMultiHOG +{ +public: + /** Constructor + * + * @param[in] num_models Number of HOG data objects to contain + * + */ + MultiHOG(size_t num_models); + + // Inherited methods overridden: + size_t num_models() const override; + IHOG *model(size_t index) override; + const IHOG *model(size_t index) const override; + +private: + size_t _num_models; + std::unique_ptr _model; +}; +} + +#endif /* __ARM_COMPUTE_MULTIHOG_H__ */ diff --git a/arm_compute/runtime/MultiImage.h b/arm_compute/runtime/MultiImage.h new file mode 100644 index 0000000000..917e586ef8 --- /dev/null +++ b/arm_compute/runtime/MultiImage.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIIMAGE_H__ +#define __ARM_COMPUTE_MULTIIMAGE_H__ + +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class Coordinates; +class ITensor; +using IImage = ITensor; + +/** Basic implementation of the multi-planar image interface */ +class MultiImage : public IMultiImage +{ +public: + /** Constructor */ + MultiImage(); + /** Allocate the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init(unsigned int width, unsigned int height, Format format); + /** Allocate the multi-planar image + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init_auto_padding(unsigned int width, unsigned int height, Format format); + /** Allocated a previously initialised multi image + * + * @note The multi image must not already be allocated when calling this function. + * + **/ + void allocate(); + /** Create a subimage from an existing MultiImage. + * + * @param[in] image Image to use backing memory from + * @param[in] coords Starting coordinates of the new image. Should be within the parent image sizes + * @param[in] width The width of the subimage + * @param[in] height The height of the subimage + */ + void create_subimage(MultiImage *image, const Coordinates &coords, unsigned int width, unsigned int height); + + // Inherited methods overridden: + const MultiImageInfo *info() const override; + Image *plane(unsigned int index) override; + const Image *plane(unsigned int index) const override; + +private: + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + * @param[in] auto_padding Specifies whether the image uses auto padding + */ + void internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding); + + MultiImageInfo _info; /** Instance of the multi-planar image's meta data */ + std::array _plane; /* Instance Image to hold the planar's information */ +}; +} +#endif /*__ARM_COMPUTE_MULTIIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/INESimpleFunction.h b/arm_compute/runtime/NEON/INESimpleFunction.h new file mode 100644 index 0000000000..6e000d8fd8 --- /dev/null +++ b/arm_compute/runtime/NEON/INESimpleFunction.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INESIMPLEFUNCTION_H__ +#define __ARM_COMPUTE_INESIMPLEFUNCTION_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single NEON kernel */ +class INESimpleFunction : public IFunction +{ +public: + /** Constructor */ + INESimpleFunction(); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ + NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ +}; +} +#endif /*__ARM_COMPUTE_INESIMPLEFUNCTION_H__ */ diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h new file mode 100644 index 0000000000..d1dd15045a --- /dev/null +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFUNCTIONS_H__ +#define __ARM_COMPUTE_NEFUNCTIONS_H__ + +/* Header regrouping all the NEON functions */ +#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" +#include "arm_compute/runtime/NEON/functions/NEAccumulate.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" +#include "arm_compute/runtime/NEON/functions/NEBox3x3.h" +#include "arm_compute/runtime/NEON/functions/NECannyEdge.h" +#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" +#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" +#include "arm_compute/runtime/NEON/functions/NEColorConvert.h" +#include "arm_compute/runtime/NEON/functions/NEConvolution.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEDerivative.h" +#include "arm_compute/runtime/NEON/functions/NEDilate.h" +#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" +#include "arm_compute/runtime/NEON/functions/NEErode.h" +#include "arm_compute/runtime/NEON/functions/NEFastCorners.h" +#include "arm_compute/runtime/NEON/functions/NEFillBorder.h" +#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/runtime/NEON/functions/NEGEMM.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMLowp.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h" +#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" +#include "arm_compute/runtime/NEON/functions/NEHistogram.h" +#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" +#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" +#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" +#include "arm_compute/runtime/NEON/functions/NEMagnitude.h" +#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" +#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" +#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" +#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" +#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" +#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h" +#include "arm_compute/runtime/NEON/functions/NEPhase.h" +#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/runtime/NEON/functions/NERemap.h" +#include "arm_compute/runtime/NEON/functions/NEScale.h" +#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" +#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" +#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" +#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" +#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" +#include "arm_compute/runtime/NEON/functions/NETableLookup.h" +#include "arm_compute/runtime/NEON/functions/NEThreshold.h" +#include "arm_compute/runtime/NEON/functions/NETranspose.h" +#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" +#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" + +#endif /* __ARM_COMPUTE_NEFUNCTIONS_H__ */ diff --git a/arm_compute/runtime/NEON/NEScheduler.h b/arm_compute/runtime/NEON/NEScheduler.h new file mode 100644 index 0000000000..c65d6b7b17 --- /dev/null +++ b/arm_compute/runtime/NEON/NEScheduler.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHEDULER_H__ +#define __ARM_COMPUTE_NESCHEDULER_H__ + +#include "arm_compute/runtime/CPP/CPPScheduler.h" + +namespace arm_compute +{ +using NEScheduler = CPPScheduler; +} +#endif /*__ARM_COMPUTE_NESCHEDULER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h new file mode 100644 index 0000000000..266a27586a --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ +#define __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEAbsoluteDifferenceKernel + * + * @note The image data type for the inputs must be U8 or S16 + * @note The function calculates the absolute difference also when the 2 inputs have different image data types + */ +class NEAbsoluteDifference : public INESimpleFunction +{ +public: + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h new file mode 100644 index 0000000000..de532c37a0 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACCUMULATE_H__ +#define __ARM_COMPUTE_NEACCUMULATE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEAccumulateKernel */ +class NEAccumulate : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *output); +}; + +/** Basic function to run @ref NEAccumulateWeightedKernel */ +class NEAccumulateWeighted : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors, and the scale value + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] alpha The input scalar value with a value input the range of [0, 1.0] + * @param[in,out] output Accumulated tensor. Data type supported: U8. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16 = false); +}; + +/** Basic function to run @ref NEAccumulateSquaredKernel */ +class NEAccumulateSquared : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] shift The input with a value input the range of [0, 15] + * @param[in,out] output Accumulated tensor. Data type supported: S16. + */ + void configure(const ITensor *input, uint32_t shift, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEACCUMULATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h new file mode 100644 index 0000000000..3fb3e20261 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACTIVATIONLAYER_H__ +#define __ARM_COMPUTE_NEACTIVATIONLAYER_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEActivationLayerKernel + * + * @note The function simulates an activation layer with the specified activation function. + */ +class NEActivationLayer : public INESimpleFunction +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data type supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] activation_info Activation layer parameters. + */ + void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info); +}; +} +#endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h new file mode 100644 index 0000000000..8f66a6dba9 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICADDITION_H__ +#define __ARM_COMPUTE_NEARITHMETICADDITION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEArithmeticAdditionKernel */ +class NEArithmeticAddition : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8 or S16. + * @param[in] input2 Second tensor input. Data types supported: U8 or S16. + * @param[out] output Output tensor. Data types supported: U8 or S16. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); +}; +} +#endif /*__ARM_COMPUTE_NEARITHMETICADDITION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h new file mode 100644 index 0000000000..d0eaff7612 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ +#define __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEArithmeticSubtractionKernel */ +class NEArithmeticSubtraction : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8 or S16. + * @param[in] input2 Second tensor input. Data types supported: U8 or S16. + * @param[out] output Output tensor. Data types supported: U8 or S16. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h new file mode 100644 index 0000000000..0250293e97 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEAND_H__ +#define __ARM_COMPUTE_NEBITWISEAND_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseAndKernel */ +class NEBitwiseAnd : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEAND_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h new file mode 100644 index 0000000000..62c08ffcf9 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISENOT_H__ +#define __ARM_COMPUTE_NEBITWISENOT_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseNotKernel */ +class NEBitwiseNot : public INESimpleFunction +{ +public: + /** Initialise the kernel's input and output + * + * @param[in] input Input tensor. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISENOT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h new file mode 100644 index 0000000000..1c9a2f9d2e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEOR_H__ +#define __ARM_COMPUTE_NEBITWISEOR_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseOrKernel */ +class NEBitwiseOr : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h new file mode 100644 index 0000000000..4690f0a4e3 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEXOR_H__ +#define __ARM_COMPUTE_NEBITWISEXOR_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseXorKernel */ +class NEBitwiseXor : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEXOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h new file mode 100644 index 0000000000..2b5440a74c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBox3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBOX3x3_H__ +#define __ARM_COMPUTE_NEBOX3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute box filter 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEBox3x3Kernel + * + */ +class NEBox3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's input, output and border mode. + * + * @note The border handler is run on the input tensor. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); +}; +} +#endif /*__ARM_COMPUTE_NEBOX3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h new file mode 100644 index 0000000000..fbf2d90740 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NECannyEdge.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECANNYEDGE_H__ +#define __ARM_COMPUTE_NECANNYEDGE_H__ + +#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions: + * + * -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) + * -# @ref NESobel3x3 (if gradient_size == 3) or + * @ref NESobel5x5 (if gradient_size == 5) or + * @ref NESobel7x7 (if gradient_size == 7) + * -# @ref NEGradientKernel + * -# @ref NEEdgeNonMaxSuppressionKernel + * -# @ref NEEdgeTraceKernel + * + */ +class NECannyEdge : public IFunction +{ +public: + /** Constructor + * + * Initialize Sobel kernel to nullptr. + */ + NECannyEdge(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECannyEdge(const NECannyEdge &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECannyEdge &operator=(const NECannyEdge &) = delete; + /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] upper_thr Upper threhold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis. + * @param[in] gradient_size Gradient size (3, 5 or 7) + * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + * + */ + void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0, + bool use_fp16 = false); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Pointer to Sobel kernel */ + std::unique_ptr _gradient; /**< Gradient kernel */ + NEEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel */ + NEEdgeTraceKernel _edge_trace; /**< Edge tracing kernel */ + NEFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ + NEFillBorderKernel _border_edge_trace; /**< Fill border before edge trace */ + Tensor _gx; /**< Source tensor - Gx component */ + Tensor _gy; /**< Source tensor - Gy component */ + Tensor _magnitude; /**< Source tensor - Magnitude */ + Tensor _phase; /**< Source tensor - Phase */ + Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */ + ITensor *_output; /**< Output tensor provided by the user. */ +}; +} +#endif /* __ARM_COMPUTE_NECANNYEDGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h new file mode 100644 index 0000000000..7133553e1d --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELCOMBINE_H__ +#define __ARM_COMPUTE_NECHANNELCOMBINE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/**Basic function to run @ref NEChannelCombineKernel to perform channel combination. */ +class NEChannelCombine : public INESimpleFunction +{ +public: + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 + * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + */ + void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[out] output The multi planar output image. Formats supported: NV12/NV21/IYUV/YUV444 + */ + void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECHANNELCOMBINE_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h new file mode 100644 index 0000000000..5e46eef3a6 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELEXTRACT_H__ +#define __ARM_COMPUTE_NECHANNELEXTRACT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. */ +class NEChannelExtract : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input tensor to extract the channel from. Formats supported: Any single planar. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Format supported: U8 + */ + void configure(const ITensor *input, Channel channel, ITensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image to extract channel from. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Format supported: U8 + */ + void configure(const IMultiImage *input, Channel channel, IImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECHANNELEXTRACT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h new file mode 100644 index 0000000000..2997778ed5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECOLORCONVERT_H__ +#define __ARM_COMPUTE_NECOLORCONVERT_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; +class IMultiImage; +using IImage = ITensor; + +/**Basic function to run @ref NEColorConvertKernel to perform color conversion */ +class NEColorConvert : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input single-planar tensor from which to convert + * @param[in] output The converted single-planar output tensor + */ + void configure(const ITensor *input, ITensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted single-planar output image + */ + void configure(const IMultiImage *input, IImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The single-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const IImage *input, IMultiImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const IMultiImage *input, IMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECOLORCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h new file mode 100644 index 0000000000..7127dee9f3 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEConvolution.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTION_H__ +#define __ARM_COMPUTE_NECONVOLUTION_H__ + +#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolution3x3Kernel + * + */ +class NEConvolution3x3 : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; + +/** Basic function to execute convolution of size 5x5. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolution5x5Kernel or
+ * @ref NESeparableConvolution5x5HorKernel and @ref NESeparableConvolution5x5VertKernel (if convolution matrix is separable) + * + */ +class NEConvolution5x5 : public IFunction +{ +public: + /** Default constructor */ + NEConvolution5x5(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + NESeparableConvolution5x5HorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + NESeparableConvolution5x5VertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ + NEConvolution5x5Kernel _kernel; /**< kernel for non-separated convolution **/ + NEFillBorderKernel _border_handler; /**< kernel for border handling */ +}; + +/** Basic function to execute convolution of size 7x7. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolution7x7Kernel or
+ * @ref NESeparableConvolution7x7HorKernel and @ref NESeparableConvolution7x7VertKernel (if convolution matrix is separable) + * + */ +class NEConvolution7x7 : public IFunction +{ +public: + /** Default constructor */ + NEConvolution7x7(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + NESeparableConvolution7x7HorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + NESeparableConvolution7x7VertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ + NEConvolution7x7Kernel _kernel; /**< kernel for non-separated convolution **/ + NEFillBorderKernel _border_handler; /**< kernel for border handling */ +}; + +/** Basic function to execute convolution of size 9x9. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolution9x9Kernel or
+ * @ref NESeparableConvolution9x9HorKernel and @ref NESeparableConvolution9x9VertKernel (if convolution matrix is separable) + * + */ +class NEConvolution9x9 : public IFunction +{ +public: + /** Default constructor */ + NEConvolution9x9(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + NESeparableConvolution9x9HorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + NESeparableConvolution9x9VertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ + NEConvolution9x9Kernel _kernel; /**< kernel for non-separated convolution **/ + NEFillBorderKernel _border_handler; /**< kernel for border handling */ +}; + +/** Basic function to execute non-square convolution. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolutionRectangleKernel or
+ * + * @note Convolution rectangle should have dimensions of 3, 5, 7, 9 + */ +class NEConvolutionRectangle : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] rows Rows of convolution kernel. + * @param[in] cols Columns of convolution kernel. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NECONVOLUTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h new file mode 100644 index 0000000000..abc0b6cef2 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to simulate a convolution layer. This function calls the following OpenCL kernels: + * -# @ref NEConvolutionLayerWeightsReshapeKernel (executed only once for each configuration) + * -# @ref NEGEMMTranspose1xWKernel (executed only once for each configuration) + * -# @ref NEIm2ColKernel + * -# @ref NEGEMMInterleave4x4Kernel + * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref NECol2ImKernel + */ +class NEConvolutionLayer : public IFunction +{ +public: + /** Constructor */ + NEConvolutionLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + NEIm2ColKernel _input_im2col_kernel; + NEGEMMInterleave4x4Kernel _input_interleave_kernel; + NEConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel; + NEGEMMTranspose1xWKernel _weights_transposed_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NECol2ImKernel _output_col2im_kernel; + Tensor _input_im2col_reshaped; + Tensor _input_interleaved_reshaped; + Tensor _weights_reshaped; + Tensor _weights_transposed; + Tensor _gemm_output; + bool _is_first_run; + bool _has_bias; + bool _is_fc; +}; +} +#endif /* __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvert.h b/arm_compute/runtime/NEON/functions/NEDepthConvert.h new file mode 100644 index 0000000000..21ccca30c8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthConvert.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHCONVERT_H__ +#define __ARM_COMPUTE_NEDEPTHCONVERT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/**Basic function to run @ref NEDepthConvertKernel */ +class NEDepthConvert : public INESimpleFunction +{ +public: + /* Contructor */ + NEDepthConvert() = default; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEDepthConvert(const NEDepthConvert &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + const NEDepthConvert &operator=(const NEDepthConvert &) = delete; + /** Initialize the function's source, destination + * + * Input format must be different than output format. + * + * Valid conversions Input -> Output : + * U8 -> U16, S16, U32, S32 + * U16 -> U8, U32, S32 + * S16 -> U8, U32, S32 + * U32 -> U8, U16, S16 + * S32 -> U8, U16, S16 + * + * + * @param[in] input The input tensor to convert. Data type supported: U8, U16, S16, U32 or S32. + * @param[out] output The output tensor. Data type supported: U8, U16, S16, U32 or S32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift); +}; +} +#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h new file mode 100644 index 0000000000..57b7409b39 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDerivative.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDERIVATIVE_H__ +#define __ARM_COMPUTE_NEDERIVATIVE_H__ + +#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute first order derivative operator. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEDerivativeKernel + * + */ +class NEDerivative : public IFunction +{ +public: + /** Default constructor */ + NEDerivative(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data type supported: S16. + * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data type supported: S16. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + NEDerivativeKernel _kernel; /**< Derivative kernel */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /* __ARM_COMPUTE_NEDERIVATIVE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h new file mode 100644 index 0000000000..17bdb3363e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDilate.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDILATE_H__ +#define __ARM_COMPUTE_NEDILATE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute dilate. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEDilateKernel + * + */ +class NEDilate : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode. + * + * @param[in, out] input First tensor input. Data type supported: U8.(Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_NEDILATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h new file mode 100644 index 0000000000..2415233182 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ +#define __ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ + +#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "arm_compute/runtime/Distribution1D.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Lut.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute histogram equalization. This function calls the following NEON kernels: + * + * -# @ref NEHistogramKernel + * -# @ref NECumulativeDistributionKernel + * -# @ref NETableLookupKernel + * + */ +class NEEqualizeHistogram : public IFunction +{ +public: + /** Default Constructor. */ + NEEqualizeHistogram(); + /** Initialise the kernel's inputs. + * + * @note Currently the width of the input image must be a multiple of 16. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] output Output image. Data type supported: same as @p input + */ + void configure(const IImage *input, IImage *output); + + // Inherited methods overridden: + void run() override; + +private: + NEHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ + NEHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */ + NECumulativeDistributionKernel _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution + and creates the relevant LookupTable. */ + NETableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ + Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */ + Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ + Lut _cd_lut; /**< Holds the equalization lookuptable. */ + bool _run_border_hist; /**< Boolean that specifies if a separate histogram kernel has to run on the borders */ +private: + static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */ + static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */ +}; +} +#endif /*__ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h new file mode 100644 index 0000000000..940ae18471 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEErode.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEERODE_H__ +#define __ARM_COMPUTE_NEERODE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute erode. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEErodeKernel + * + */ +class NEErode : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode + * + * @param[in, out] input First tensor input. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_NEERODE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h new file mode 100644 index 0000000000..d2006d8687 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFastCorners.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFASTCORNERS_H__ +#define __ARM_COMPUTE_NEFASTCORNERS_H__ + +#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute fast corners. This function call the following NEON kernels: + * + * -# @ref NEFastCornersKernel + * -# @ref NEFillInnerBorderKernel (executed if nonmax_suppression == true) + * -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) + * -# @ref NEFillArrayKernel + * + */ +class NEFastCorners : public IFunction +{ +public: + /** Constructor */ + NEFastCorners(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + NEFastCornersKernel _fast_corners_kernel; + NEFillBorderKernel _border_handler; + NENonMaximaSuppression3x3Kernel _nonmax_kernel; + NEFillArrayKernel _fill_kernel; + NEFillInnerBorderKernel _out_border_handler_kernel; + Image _output; + Image _suppressed; + bool _non_max; +}; +} +#endif /*__ARM_COMPUTE_NEFASTCORNERS_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h new file mode 100644 index 0000000000..c69c285574 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLBORDER_H__ +#define __ARM_COMPUTE_NEFILLBORDER_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEFillBorderKernel */ +class NEFillBorder : public IFunction +{ +public: + /** Initialize the function's source, destination and border_mode. + * + * @note This function fills the borders within the XY-planes. + * + * @param[in, out] input Source tensor. Data type supported: U8, S16, S32, F32 + * @param[in] border_width Width of the tensor border in pixels. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run() override; + +private: + NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ +}; +} +#endif /*__ARM_COMPUTE_NEFILLBORDER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h new file mode 100644 index 0000000000..cdd72e5f92 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEGEMM.h" + +namespace arm_compute +{ +/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: + * -# @ref NEConvolutionLayer (called when the weights have 4 dimensions. Pass the stride as 1 and padding as 0) + * -# @ref NEGEMM (called when the weights have 2 dimensions) + * -# @ref NETransposeKernel (called when the weights have 2 dimensions) + * -# @ref NEGEMMMatrixAccumulateBiasesKernel (called when the weights have 2 dimensions) + * + * @note The fully connected layer accepts "weights" tensors only with 2 or 4 dimensions. In particular, the weights tensor has 4 dimensions, + * if the fully connected layer is computed after a convolution layer otherwise the tensor has 2 dimensions if the fully connected layer + * is computed after another fully connected layer + */ +class NEFullyConnectedLayer : public IFunction +{ +public: + /** Constructor */ + NEFullyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data type supported: F32. (Written to only if @ref NEGEMM needs to pad with zeros the tensor) + * @param[in, out] weights Weights tensor. The weights can be 2 dimensional or 4 dimensional. Data type supported: Same as @p input. (Written to only if @ref NEGEMM needs to pad with zeros the tensor) + * @param[in] biases Bias tensor. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + */ + void configure(ITensor *input, ITensor *weights, const ITensor *biases, ITensor *output); + + //Inherited methods override + void run() override; + +private: + /** Run the convolution layer connect to fully connected layer case */ + void run_conv(); + /** Run the fully connected layer connect to fully connected layer case */ + void run_fc(); + /** Common signature for the functions to run */ + using FullyConnectedLayerFunctionPtr = void (NEFullyConnectedLayer::*)(void); + +private: + NEConvolutionLayer _conv_function; + NEGEMM _gemm_function; + NETransposeKernel _transpose_kernel; + NEGEMMMatrixAccumulateBiasesKernel _acc_biases_kernel; + FullyConnectedLayerFunctionPtr _run_func; + Tensor _weights_transposed; + bool _is_first_run; + bool _run_acc_biases; +}; +} +#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h new file mode 100644 index 0000000000..b9346e777e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMM_H__ +#define __ARM_COMPUTE_NEGEMM_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: + * + * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0) + * + */ +class NEGEMM : public IFunction +{ +public: + /** Constructor */ + NEGEMM(); + /** Initialise the kernel's inputs, output + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * @note GEMM: The tensors a, b, c, d must have the same data type. All are either F32 or F16. You should not mix data types when calling this function. + * + * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: F32, F16. + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a + * @param[out] d Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + */ + void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta); + + // Inherited methods overridden: + void run() override; + +private: + NEGEMMInterleave4x4Kernel _interleave_kernel; + NEGEMMTranspose1xWKernel _transpose_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NEGEMMMatrixAdditionKernel _ma_kernel; + Tensor _tmp_a; + Tensor _tmp_b; + bool _run_vector_matrix_multiplication; + bool _run_addition; +}; +} +#endif /*__ARM_COMPUTE_NEGEMM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowp.h b/arm_compute/runtime/NEON/functions/NEGEMMLowp.h new file mode 100644 index 0000000000..bfb1a494b8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowp.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMLOWP_H__ +#define __ARM_COMPUTE_NEGEMMLOWP_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute GEMMLowp on NEON. This function calls the following NEON kernels: +* +* -# @ref NEGEMMInterleave4x4Kernel +* -# @ref NEGEMMTranspose1xWKernel +* -# @ref NEGEMMLowpMatrixMultiplyKernel +* +*/ +class NEGEMMLowp : public IFunction +{ +public: + /** Constructor */ + NEGEMMLowp(); + /** Initialise the kernel's inputs, output + * + * @note GEMM_LOWP: low precision GEMM kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + * @param[in] a First input tensor (Matrix A). Data type supported: U8. + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a + * @param[out] output Output tensor. Data type supported: same as @p a. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Value to be multiplied to each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ITensor *a, const ITensor *b, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + // Inherited methods overridden: + void run() override; + +private: + NEGEMMInterleave4x4Kernel _interleave_kernel; + NEGEMMTranspose1xWKernel _transpose_kernel; + NEGEMMLowpMatrixMultiplyKernel _mm_kernel; + Tensor _tmp_a; + Tensor _tmp_b; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMLOWP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h new file mode 100644 index 0000000000..a237e6f0e5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN3x3_H__ +#define __ARM_COMPUTE_NEGAUSSIAN3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute gaussian filter 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian3x3Kernel + * + */ +class NEGaussian3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's input, output and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h new file mode 100644 index 0000000000..7487f66bea --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN5x5_H__ +#define __ARM_COMPUTE_NEGAUSSIAN5x5_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian5x5HorKernel + * -# @ref NEGaussian5x5VertKernel + * + */ +class NEGaussian5x5 : public IFunction +{ +public: + /** Default constructor + */ + NEGaussian5x5(); + /** Initialise the function's input, output and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */ + NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */ + Tensor _tmp; /** temporary buffer for output of horizontal pass */ + NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN5x5_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h new file mode 100644 index 0000000000..9b5b1c9fa0 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ +#define __ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/Pyramid.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Common interface for all Gaussian pyramid functions */ +class NEGaussianPyramid : public IFunction +{ +public: + /**Constructor */ + NEGaussianPyramid(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramid(const NEGaussianPyramid &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramid &operator=(const NEGaussianPyramid &) = delete; + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: U8. + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + virtual void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) = 0; + +protected: + const ITensor *_input; + IPyramid *_pyramid; + Pyramid _tmp; +}; + +/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussianPyramidHorKernel + * -# @ref NEGaussianPyramidVertKernel + * + */ +class NEGaussianPyramidHalf : public NEGaussianPyramid +{ +public: + /** Constructor */ + NEGaussianPyramidHalf(); + + // Inherited methods overridden: + void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _border_handler; + std::unique_ptr _horizontal_reduction; + std::unique_ptr _vertical_reduction; +}; + +/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian5x5 + * -# @ref NEScaleKernel + * + */ +class NEGaussianPyramidOrb : public NEGaussianPyramid +{ +public: + /** Constructor */ + NEGaussianPyramidOrb(); + + // Inherited methods overridden: + void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _offsets; + std::unique_ptr _gaus5x5; + std::unique_ptr _scale_nearest; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h new file mode 100644 index 0000000000..b7b4909060 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ +#define __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ + +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class IHOG; +/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels: + * + * -# @ref NEHOGGradient + * -# @ref NEHOGOrientationBinningKernel + * -# @ref NEHOGBlockNormalizationKernel + * + */ +class NEHOGDescriptor : public IFunction +{ +public: + /** Default constructor */ + NEHOGDescriptor(); + /** Initialise the function's source, destination, HOG data-object and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block + * @param[in] hog HOG data object which describes the HOG descriptor + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + NEHOGGradient _gradient; + NEHOGOrientationBinningKernel _orient_bin; + NEHOGBlockNormalizationKernel _block_norm; + Tensor _mag; + Tensor _phase; + Tensor _hog_space; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h new file mode 100644 index 0000000000..46ab72c4de --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDETECTOR_H__ +#define __ARM_COMPUTE_NEHOGDETECTOR_H__ + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel: + * + * -# @ref NEHOGDetectorKernel + * + */ +class NEHOGDetector : public INESimpleFunction +{ +public: + /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class + * + * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 + * @param[in] hog HOG data-object that describes the HOG descriptor + * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the block stride stored in hog + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDETECTOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h new file mode 100644 index 0000000000..dd2d99adfe --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGGradient.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGGRADIENT_H__ +#define __ARM_COMPUTE_NEHOGGRADIENT_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEDerivative.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels: + * + * -# @ref NEDerivative + * -# NEMagnitudePhaseKernel + * + */ +class NEHOGGradient : public IFunction +{ +public: + /** Default constructor */ + NEHOGGradient(); + /** Initialise the function's source, destinations, phase type and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8. + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. + * @param[out] output_phase Output tensor.(phase). Format supported: U8 + * @param[in] phase_type Type of @ref PhaseType + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + NEDerivative _derivative; + std::unique_ptr _mag_phase; + Tensor _gx; + Tensor _gy; +}; +} +#endif /*__ARM_COMPUTE_NEHOGGRADIENT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h new file mode 100644 index 0000000000..9440ee0c21 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ +#define __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels: + * + * -# @ref NEHOGGradient + * -# @ref NEHOGOrientationBinningKernel + * -# @ref NEHOGBlockNormalizationKernel + * -# @ref NEHOGDetector + * -# @ref NEHOGNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) + * + * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: + * -# Phase type + -# Normalization type + -# L2 hysteresis threshold if the normalization type is L2HYS_NORM + * + */ +class NEHOGMultiDetection : public IFunction +{ +public: + /** Default constructor */ + NEHOGMultiDetection(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGMultiDetection(const NEHOGMultiDetection &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete; + /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. + * This container should store the HOG data-objects in descending or ascending cell_size width order. + * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects + * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects + * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object + * The dimension of this array must be the same of multi_hog->num_models() + * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. + * True if the non-maxima suppression stage has to be computed + * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage + * + */ + void configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode, + uint8_t constant_border_value = 0, + float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); + + // Inherited method overridden: + void run() override; + +private: + NEHOGGradient _gradient_kernel; + std::unique_ptr _orient_bin_kernel; + std::unique_ptr _block_norm_kernel; + std::unique_ptr _hog_detect_kernel; + std::unique_ptr _non_maxima_kernel; + std::unique_ptr _hog_space; + std::unique_ptr _hog_norm_space; + IDetectionWindowArray *_detection_windows; + Tensor _mag; + Tensor _phase; + bool _non_maxima_suppression; + size_t _num_orient_bin_kernel; + size_t _num_block_norm_kernel; + size_t _num_hog_detect_kernel; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h new file mode 100644 index 0000000000..a709871153 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHARRISCORNERS_H__ +#define __ARM_COMPUTE_NEHARRISCORNERS_H__ + +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions: + * + * -# @ref NESobel3x3 (if gradient_size == 3) or
+ * @ref NESobel5x5 (if gradient_size == 5) or
+ * @ref NESobel7x7 (if gradient_size == 7) + * -# @ref NEFillBorderKernel + * -# NEHarrisScoreKernel<3> (if block_size == 3) or
+ * NEHarrisScoreKernel<5> (if block_size == 5) or
+ * NEHarrisScoreKernel<7> (if block_size == 7) + * -# @ref NENonMaximaSuppression3x3 + * -# @ref CPPCornerCandidatesKernel + * -# @ref CPPSortEuclideanDistanceKernel + * + */ +class NEHarrisCorners : public IFunction +{ +public: + /** Constructor + * + * Initialize _sobel, _harris_score and _corner_list to nullptr. + */ + NEHarrisCorners(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] min_dist Radial Euclidean distance for the euclidean diatance stage + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 + * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(IImage *input, float threshold, float min_dist, float sensitivity, + int32_t gradient_size, int32_t block_size, KeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Sobel function */ + std::unique_ptr _harris_score; /**< Harris score kernel */ + NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ + CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ + CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ + NEFillBorderKernel _border_gx; /**< Border handler before running harris score */ + NEFillBorderKernel _border_gy; /**< Border handler before running harris score */ + Image _gx; /**< Source image - Gx component */ + Image _gy; /**< Source image - Gy component */ + Image _score; /**< Source image - Harris score */ + Image _nonmax; /**< Source image - Non-Maxima suppressed image */ + std::unique_ptr _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ + int32_t _num_corner_candidates; /**< Number of potential corner candidates */ +}; +} +#endif /*__ARM_COMPUTE_NEHARRISCORNERS_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h new file mode 100644 index 0000000000..105a38ace1 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHistogram.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHISTOGRAM_H__ +#define __ARM_COMPUTE_NEHISTOGRAM_H__ + +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class IDistribution1D; + +/** Basic function to execute histogram. This function calls the following NEON kernels: + * + * -# @ref NEHistogramKernel + * -# @ref NEHistogramBorderKernel + * + */ +class NEHistogram : public IFunction +{ +public: + /** Default Constructor. */ + NEHistogram(); + /** Initialise the kernel's inputs. + * + * @note Currently the width of the input image must be a multiple of 16. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] output Output distribution. + */ + void configure(const IImage *input, IDistribution1D *output); + + // Inherited methods overridden: + void run() override; + +private: + NEHistogramKernel _histogram_kernel; + NEHistogramBorderKernel _border_histogram_kernel; + std::unique_ptr _local_hist; + std::unique_ptr _window_lut; + size_t _local_hist_size; + bool _run_border_hist; + /** 256 possible pixel values as we handle only U8 images */ + static constexpr unsigned int window_lut_default_size = 256; +}; +} +#endif /*__ARM_COMPUTE_NEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h new file mode 100644 index 0000000000..6d7dd697e8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEIntegralImage.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEINTEGRALIMAGE_H__ +#define __ARM_COMPUTE_NEINTEGRALIMAGE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run a @ref NEIntegralImageKernel */ +class NEIntegralImage : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U32. + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEINTEGRALIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h new file mode 100644 index 0000000000..991ae7c293 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELAPLACIANPYRAMID_H__ +#define __ARM_COMPUTE_NELAPLACIANPYRAMID_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" +#include "arm_compute/runtime/Pyramid.h" + +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute laplacian pyramid. This function calls the following NEON kernels and functions: + * + * -# @ref NEGaussianPyramidHalf + * -# @ref NEGaussian5x5 + * -# @ref NEArithmeticSubtraction + * + * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then + * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. + * L(i) = I(i) - Gaussian5x5(I(i)) + * Level 0 has always the same first two dimensions as the input tensor. +*/ +class NELaplacianPyramid : public IFunction +{ +public: + /** Constructor */ + NELaplacianPyramid(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: S16. + * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data type supported: S16. + * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: + * out.width = in.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + size_t _num_levels; + NEGaussianPyramidHalf _gaussian_pyr_function; + std::unique_ptr _convf; + std::unique_ptr _subf; + Pyramid _gauss_pyr; + Pyramid _conv_pyr; + NEDepthConvert _depth_function; +}; +} +#endif /*__ARM_COMPUTE_NELAPLACIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h new file mode 100644 index 0000000000..4139733499 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ +#define __ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEScale.h" +#include "arm_compute/runtime/Pyramid.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute laplacian reconstruction. This function calls the following NEON kernels and functions: + * + * -# @ref NEArithmeticAddition + * -# @ref NEScale + * -# @ref NEDepthConvert + * + * This function reconstructs the original image from a Laplacian Image Pyramid. + * + * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the + * resolution of the next pyramid level. + * + * I(n-2) = upsample( input + L(n-1) + * + * For each pyramid level i, except i=0 and i=n-1: + * I(i-1) = upsample(I(i) + L(i)) + * + * output = I(0) + L(0) +*/ +class NELaplacianReconstruct : public IFunction +{ +public: + /** Constructor */ + NELaplacianReconstruct(); + /** Initialise the function's source, destinations and border mode. + * + * The Output image must have the same size as the first level of the pyramid. + * The Input image must have the same size as the last level of the pyramid. + * + * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. + * + * @param[in] pyramid Laplacian pyramid tensors, Data type supported at each level: S16. + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const IPyramid *pyramid, const ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + Pyramid _tmp_pyr; + std::unique_ptr _addf; + std::unique_ptr _scalef; + NEDepthConvert _depthf; +}; +} +#endif /*__ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h new file mode 100644 index 0000000000..6c1f988ef0 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMAGNITUDE_H__ +#define __ARM_COMPUTE_NEMAGNITUDE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run NEMagnitudePhaseKernel */ +class NEMagnitude : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs. + * + * @param[in] input1 First tensor input. Data type supported: S16. + * @param[in] input2 Second tensor input. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: S16. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, bool use_fp16 = false); +}; +} +#endif /*__ARM_COMPUTE_NEMAGNITUDE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h new file mode 100644 index 0000000000..3770b2a270 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEANSTDDEV_H__ +#define __ARM_COMPUTE_NEMEANSTDDEV_H__ + +#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic function to execute mean and std deviation. This function calls the following NEON kernels: + * + * @ref NEMeanStdDevKernel + * + */ +class NEMeanStdDev : public IFunction +{ +public: + /** Default Constructor. */ + NEMeanStdDev(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + */ + void configure(const IImage *input, float *mean, float *stddev = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ +}; +} +#endif /*__ARM_COMPUTE_NEMEANSTDDEV_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h new file mode 100644 index 0000000000..a3df687a35 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMedian3x3.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEDIAN3x3_H__ +#define __ARM_COMPUTE_NEMEDIAN3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute median filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEMedian3x3Kernel + * + */ +class NEMedian3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEMEDIAN3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h new file mode 100644 index 0000000000..e60349a004 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMINMAXLOCATION_H__ +#define __ARM_COMPUTE_NEMINMAXLOCATION_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute min and max location. This function calls the following NEON kernels: + * + * -# NEMinMaxKernel + * -# NEMinMaxLocationKernel + */ +class NEMinMaxLocation : public IFunction +{ +public: + /** Constructor */ + NEMinMaxLocation(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data types supported: U8 or S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc (Optional) Array of minimum value locations. + * @param[out] max_loc (Optional) Array of maximum value locations. + * @param[out] min_count (Optional) Number of minimum value encounters. + * @param[out] max_count (Optional) Number of maximum value encounters. + */ + void configure(const IImage *input, int32_t *min, int32_t *max, + ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + NEMinMaxKernel _min_max; /**< Kernel that performs min/max */ + NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */ +}; +} +#endif /*__ARM_COMPUTE_NEMINMAXLOCATION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h new file mode 100644 index 0000000000..d8a9eaebfb --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONLINEARFILTER_H__ +#define __ARM_COMPUTE_NENONLINEARFILTER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute non linear filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NENonLinearFilterKernel + * + * @note Supported mask dimensions squares of sizes 3, 5 + */ +class NENonLinearFilter : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, + uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NENONLINEARFILTER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h new file mode 100644 index 0000000000..d86ef2815e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ +#define __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NENonMaximaSuppression3x3Kernel + * + */ +class NENonMaximaSuppression3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * The constant values used with CONSTANT border mode is 0 + * + * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: F32. + * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode); +}; +} +#endif /* __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h new file mode 100644 index 0000000000..b7be34d006 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "arm_compute/runtime/Tensor.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to simulate a normalization layer. This function calls the following NEON kernels: + * + * -# @ref NEPixelWiseMultiplicationKernel + * -# @ref NEFillBorderKernel + * -# @ref NENormalizationLayerKernel + * + */ +class NENormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + NENormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data type supported: F32. Number of channels must be 1. + * @param[out] output Destination with the same dimensions, data type and number of channels of @p input + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run() override; + +private: + NENormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel */ + NEPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel */ + NEFillBorderKernel _border_handler; /**< Kernel to handle borders */ + Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ +}; +} +#endif /* __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h new file mode 100644 index 0000000000..4b7c4038f4 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEOPTICALFLOW_H__ +#define __ARM_COMPUTE_NEOPTICALFLOW_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include +#include + +namespace arm_compute +{ +class Pyramid; + +using LKInternalKeypointArray = Array; +/** Basic function to execute optical flow. This function calls the following NEON kernels and functions: + * + * -# @ref NEScharr3x3 + * -# @ref NELKTrackerKernel + * + */ +class NEOpticalFlow : public IFunction +{ +public: + /** Constructor */ + NEOpticalFlow(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEOpticalFlow(const NEOpticalFlow &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEOpticalFlow &operator=(const NEOpticalFlow &) = delete; + /** Initialise the function input and output + * + * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8 + * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data type supported U8 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] border_mode The border mode applied at scharr kernel stage + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT + * + */ + void configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, + IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension, + bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _func_scharr; + std::unique_ptr _kernel_tracker; + std::unique_ptr _scharr_gx; + std::unique_ptr _scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + LKInternalKeypointArray _new_points_internal; + LKInternalKeypointArray _old_points_internal; + size_t _num_levels; +}; +} +#endif /*__ARM_COMPUTE_NEOPTICALFLOW_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h new file mode 100644 index 0000000000..985ba84c4c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPhase.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPHASE_H__ +#define __ARM_COMPUTE_NEPHASE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run NEMagnitudePhaseKernel */ +class NEPhase : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in] input1 First tensor input. Data type supported: S16. + * @param[in] input2 Second tensor input. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEPHASE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h new file mode 100644 index 0000000000..835bd13f6c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ +#define __ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEPixelWiseMultiplicationKernel */ +class NEPixelWiseMultiplication : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8 or S16. + * @param[in] input2 Second tensor input. Data types supported: U8 or S16. + * @param[out] output Output tensor. Data types supported: U8 or S16. + * @param[in] scale Scale to apply after multiplication. Must be positive. + * @param[in] overflow_policy Overflow policy. + * @param[in] rounding_policy Rounding policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); +}; +} +#endif /*__ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h new file mode 100644 index 0000000000..5d67830be6 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPOOLINGLAYER_H__ +#define __ARM_COMPUTE_NEPOOLINGLAYER_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if padding size is different from zero) + * -# @ref NEPoolingLayerKernel + */ +class NEPoolingLayer : public INESimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); +}; +} +#endif /* __ARM_COMPUTE_NEPOOLINGLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h new file mode 100644 index 0000000000..b1ec559817 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NERemap.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEREMAP_H__ +#define __ARM_COMPUTE_NEREMAP_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute remap. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NERemapKernel + */ +class NERemap : public INESimpleFunction +{ +public: + /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] map_x Map for X coordinates. Data type supported: F32. + * @param[in] map_y Map for Y coordinates. Data type supported: F32. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, + InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEREMAP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h new file mode 100644 index 0000000000..e1da891dcf --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCALEIMAGE_H__ +#define __ARM_COMPUTE_NESCALEIMAGE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEScaleKernel */ +class NEScale : public INESimpleFunction +{ +public: + /** Constructor + * + * Initialize NEScale + */ + NEScale(); + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); + +private: + Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */ + Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */ + Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */ +}; +} +#endif /*__ARM_COMPUTE_NESCALEIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEScharr3x3.h b/arm_compute/runtime/NEON/functions/NEScharr3x3.h new file mode 100644 index 0000000000..db24723902 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEScharr3x3.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHARR3x3_H__ +#define __ARM_COMPUTE_NESCHARR3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute scharr 3x3 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEScharr3x3Kernel + * + */ +class NEScharr3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NESCHARR3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h new file mode 100644 index 0000000000..e2896ba058 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel3x3.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL3x3_H__ +#define __ARM_COMPUTE_NESOBEL3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 3x3 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel3x3Kernel + * + */ +class NESobel3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NESOBEL3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h new file mode 100644 index 0000000000..fc4d665a70 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel5x5.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL5x5_H__ +#define __ARM_COMPUTE_NESOBEL5x5_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel5x5HorKernel + * -# @ref NESobel5x5VertKernel + * + */ +class NESobel5x5 : public IFunction +{ +public: + /** Default constructor */ + NESobel5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NESobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ + NESobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ + Tensor _tmp_x; /**< Temporary buffer for Sobel X */ + Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL5x5_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h new file mode 100644 index 0000000000..06b7c80ad6 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel7x7.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL7x7_H__ +#define __ARM_COMPUTE_NESOBEL7x7_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel7x7HorKernel + * -# @ref NESobel7x7VertKernel + * + */ +class NESobel7x7 : public IFunction +{ +public: + /** Default constructor */ + NESobel7x7(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data type supported: S32. + * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data type supported: S32. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NESobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ + NESobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ + Tensor _tmp_x; /**< Temporary buffer for Sobel X */ + Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL7x7_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h new file mode 100644 index 0000000000..82e015d86d --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOFTMAXLAYER_H__ +#define __ARM_COMPUTE_NESOFTMAXLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to compute a SoftmaxLayer. + * + * Softmax is calculated by : + * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f] + * + * This function runs the following kernels: + * -# @ref NELogits1DMaxKernel + * -# @ref NELogits1DShiftExpSumKernel + * -# @ref NELogits1DNormKernel + */ +class NESoftmaxLayer : public IFunction +{ +public: + /** Constructor */ + NESoftmaxLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + */ + void configure(ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run() override; + +private: + NELogits1DMaxKernel _max_kernel; + NELogits1DShiftExpSumKernel _shift_exp_sum_kernel; + NELogits1DNormKernel _norm_kernel; + NEFillBorderKernel _fill_border_kernel; + Tensor _max; + Tensor _sum; + Tensor _tmp; +}; +} +#endif /* __ARM_COMPUTE_NESOFTMAXLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h new file mode 100644 index 0000000000..d2f9d307f0 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NETableLookup.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETABLELOOKUP_H__ +#define __ARM_COMPUTE_NETABLELOOKUP_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; +class ILut; + +/** Basic function to run @ref NETableLookupKernel */ +class NETableLookup : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input First tensor input. Data types supported: U8 and S16 + * @param[in] lut Input lookup table. + * @param[out] output Output tensor. Data types supported: U8 and S16. + */ + void configure(const ITensor *input, const ILut *lut, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NETABLELOOKUP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h new file mode 100644 index 0000000000..d407ee5b15 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEThreshold.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETHRESHOLD_H__ +#define __ARM_COMPUTE_NETHRESHOLD_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEThresholdKernel */ +class NEThreshold : public INESimpleFunction +{ +public: + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] input First tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold + * @param[in] false_value Value to assign when the condition is false + * @param[in] true_value value to assign when the condition is true + * @param[in] type Thresholding type. Can either be BINARY or RANGE. + * @param[in] upper Upper threshold. Only used with RANGE thresholding + */ + void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, + ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLD_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h new file mode 100644 index 0000000000..1b88715f78 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETRANSPOSE_H__ +#define __ARM_COMPUTE_NETRANSPOSE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel: + * + * -# @ref NETransposeKernel + * + */ +class NETranspose : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ITensor *input, ITensor *output); +}; +} + +#endif /* __ARM_COMPUTE_NETRANSPOSE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h new file mode 100644 index 0000000000..f8eebe8d2a --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEWarpAffine.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPAFFINE_H__ +#define __ARM_COMPUTE_NEWARPAFFINE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEWarpAffineKernel */ +class NEWarpAffine : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEWARPAFFINE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h new file mode 100644 index 0000000000..d0699291b1 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPPERSPECTIVE_H__ +#define __ARM_COMPUTE_NEWARPPERSPECTIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEWarpPerspectiveKernel */ +class NEWarpPerspective : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEWARPPERSPECTIVE_H__ */ diff --git a/arm_compute/runtime/Pyramid.h b/arm_compute/runtime/Pyramid.h new file mode 100644 index 0000000000..2e7613759f --- /dev/null +++ b/arm_compute/runtime/Pyramid.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PYRAMID_H__ +#define __ARM_COMPUTE_PYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class Tensor; + +/** Basic implementation of the pyramid interface */ +class Pyramid : public IPyramid +{ +public: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + */ + void init(const PyramidInfo &info); + + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] info Pyramid's metadata + */ + void init_auto_padding(const PyramidInfo &info); + + /** Allocate the planes in the pyramid */ + void allocate(); + + // Inherited method overridden + const PyramidInfo *info() const override; + Tensor *get_pyramid_level(size_t index) const override; + +private: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + * @param[in] auto_padding Specifies whether the image in the pyramid use auto padding + */ + void internal_init(const PyramidInfo &info, bool auto_padding); + + PyramidInfo _info{}; + std::unique_ptr _pyramid{ nullptr }; +}; +} +#endif /*__ARM_COMPUTE_PYRAMID_H__ */ diff --git a/arm_compute/runtime/Tensor.h b/arm_compute/runtime/Tensor.h new file mode 100644 index 0000000000..e491635e9f --- /dev/null +++ b/arm_compute/runtime/Tensor.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSOR_H__ +#define __ARM_COMPUTE_TENSOR_H__ + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include + +namespace arm_compute +{ +class TensorInfo; + +/** Basic implementation of the tensor interface */ +class Tensor : public ITensor +{ +public: + /** Constructor */ + Tensor(); + /** Destructor: free the tensor's memory */ + ~Tensor() = default; + /** Allow instances of this class to be move constructed */ + Tensor(Tensor &&) = default; + /** Allow instances of this class to be moved */ + Tensor &operator=(Tensor &&) = default; + /** Return a pointer to the tensor's allocator + * + * @return A pointer to the tensor's allocator + */ + TensorAllocator *allocator(); + + // Inherited methods overridden: + TensorInfo *info() const override; + TensorInfo *info() override; + uint8_t *buffer() const override; + +private: + mutable TensorAllocator _allocator; /**< Instance of the basic CPU allocator.*/ +}; + +using Image = Tensor; +} +#endif /*__ARM_COMPUTE_TENSOR_H__ */ diff --git a/arm_compute/runtime/TensorAllocator.h b/arm_compute/runtime/TensorAllocator.h new file mode 100644 index 0000000000..cd7aabff33 --- /dev/null +++ b/arm_compute/runtime/TensorAllocator.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORALLOCATOR_H__ +#define __ARM_COMPUTE_TENSORALLOCATOR_H__ + +#include "arm_compute/runtime/ITensorAllocator.h" + +#include +#include +#include + +namespace arm_compute +{ +class Coordinates; +class TensorInfo; + +/** Basic implementation of a CPU memory tensor allocator. */ +class TensorAllocator : public ITensorAllocator +{ +public: + /** Default constructor. */ + TensorAllocator(); + + /** Make ITensorAllocator's init methods available */ + using ITensorAllocator::init; + + /** Shares the same backing memory with another tensor allocator, while the tensor info might be different. + * In other words this can be used to create a sub-tensor from another tensor while sharing the same memory. + * + * @note TensorAllocator have to be of the same specialized type. + * + * @param[in] allocator The allocator that owns the backing memory to be shared. Ownership becomes shared afterwards. + * @param[in] coords The starting coordinates of the new tensor inside the parent tensor. + * @param[in] sub_info The new tensor information (e.g. shape etc) + */ + void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo sub_info); + + /** Returns the pointer to the allocated data. */ + uint8_t *data() const; + + /** Allocate size specified by TensorInfo of CPU memory. + * + * @note The tensor must not already be allocated when calling this function. + * + **/ + void allocate() override; + +protected: + /** No-op for CPU memory + * + * @return A pointer to the beginning of the tensor's allocation. + */ + uint8_t *lock() override; + + /** No-op for CPU memory. */ + void unlock() override; + +private: + std::shared_ptr> _buffer; /**< CPU memory allocation. */ +}; +} +#endif /* __ARM_COMPUTE_TENSORALLOCATOR_H__ */ diff --git a/docs/Doxyfile b/docs/Doxyfile new file mode 100644 index 0000000000..c3400cf4a3 --- /dev/null +++ b/docs/Doxyfile @@ -0,0 +1,2455 @@ +# Doxyfile 1.8.9.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "ARM Compute Library" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = 17.03.1 + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = build/arm_compute/ + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +#ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = cl=C + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = NO + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = YES + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO, these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES, upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = YES + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +#HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +#SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = YES + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = YES + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line:[DOXY_WARN] $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. +# Note: If this tag is empty the current directory is searched. + +INPUT = ./docs/arm_compute.dox \ + ./arm_compute/ \ + ./src/core/CL/cl_kernels/ \ + ./examples/ \ + ./test_helpers/ + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank the +# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, +# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, +# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, +# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, +# *.qsf, *.as and *.js. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.f90 \ + *.f \ + *.for \ + *.tcl \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf \ + *.as \ + *.js \ + *.cl + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = examples/ \ + . \ + + +# "." is Needed by the release script + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = ./docs/ + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = YES + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# compiled with the --with-libclang option. +# The default value is: NO. + +#CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +#CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .xhtml + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = ./docs/header.html + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the master .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = YES + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = YES + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /