diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/benchmark.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/benchmark.h index 3d5c0cda3..ed42c1acb 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/benchmark.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/benchmark.h @@ -24,6 +24,9 @@ namespace ncnn { // get now timestamp in ms NCNN_EXPORT double get_current_time(); +// sleep milliseconds +NCNN_EXPORT void sleep(unsigned long long int milliseconds = 1000); + #if NCNN_BENCHMARK NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/cpu.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/cpu.h index d03e7e8b3..7d6bfce11 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/cpu.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/cpu.h @@ -60,6 +60,8 @@ NCNN_EXPORT int cpu_support_arm_neon(); NCNN_EXPORT int cpu_support_arm_vfpv4(); // asimdhp = aarch64 asimd half precision NCNN_EXPORT int cpu_support_arm_asimdhp(); +// cpuid = aarch64 cpuid info +NCNN_EXPORT int cpu_support_arm_cpuid(); // asimddp = aarch64 asimd dot product NCNN_EXPORT int cpu_support_arm_asimddp(); // asimdfhm = aarch64 asimd fhm @@ -147,6 +149,9 @@ NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); // set explicit thread affinity NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); +// runtime thread affinity info +NCNN_EXPORT int is_current_thread_running_on_a53_a55(); + // misc function wrapper for openmp routines NCNN_EXPORT int get_omp_num_threads(); NCNN_EXPORT void set_omp_num_threads(int num_threads); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/gpu.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/gpu.h index 1cda18200..345329f7d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/gpu.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/gpu.h @@ -28,7 +28,19 @@ namespace ncnn { // instance + +// Create VkInstance and initialize some objects that need to be calculated by GPU +// Creates a VkInstance object, Checks the extended attributes supported by the Vulkan instance concerned, +// Initializes, and creates Vulkan validation layers (if ENABLE_VALIDATION_LAYER is enabled), +// Iterates over all supported physical devices, etc. NCNN_EXPORT int create_gpu_instance(); + +// Get global VkInstance variable +// Must be called after create_gpu_instance() and before destroy_gpu_instance() +NCNN_EXPORT VkInstance get_gpu_instance(); + +// Destroy VkInstance object and free the memory of the associated object +// Usually called in the destructor of the main program exit NCNN_EXPORT void destroy_gpu_instance(); // instance extension capability @@ -37,6 +49,8 @@ extern int support_VK_KHR_get_physical_device_properties2; extern int support_VK_KHR_get_surface_capabilities2; extern int support_VK_KHR_surface; extern int support_VK_EXT_debug_utils; +extern int support_VK_EXT_validation_features; +extern int support_VK_EXT_validation_flags; #if __ANDROID_API__ >= 26 extern int support_VK_KHR_android_surface; #endif // __ANDROID_API__ >= 26 @@ -167,6 +181,7 @@ class NCNN_EXPORT GpuInfo int support_VK_KHR_8bit_storage() const; int support_VK_KHR_16bit_storage() const; int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_buffer_device_address() const; int support_VK_KHR_create_renderpass2() const; int support_VK_KHR_dedicated_allocation() const; int support_VK_KHR_descriptor_update_template() const; @@ -183,9 +198,12 @@ class NCNN_EXPORT GpuInfo int support_VK_KHR_shader_float_controls() const; int support_VK_KHR_storage_buffer_storage_class() const; int support_VK_KHR_swapchain() const; + int support_VK_EXT_buffer_device_address() const; int support_VK_EXT_descriptor_indexing() const; int support_VK_EXT_memory_budget() const; + int support_VK_EXT_memory_priority() const; int support_VK_EXT_queue_family_foreign() const; + int support_VK_AMD_device_coherent_memory() const; #if __ANDROID_API__ >= 26 int support_VK_ANDROID_external_memory_android_hardware_buffer() const; #endif // __ANDROID_API__ >= 26 @@ -269,6 +287,11 @@ class NCNN_EXPORT VulkanDevice PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + // VK_KHR_buffer_device_address + PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; + PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; + PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; + // VK_KHR_create_renderpass2 PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; @@ -306,6 +329,9 @@ class NCNN_EXPORT VulkanDevice PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; PFN_vkQueuePresentKHR vkQueuePresentKHR; + // VK_EXT_buffer_device_address + PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; + #if __ANDROID_API__ >= 26 // VK_ANDROID_external_memory_android_hardware_buffer PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/layer.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/layer.h index d02f65bbc..ae4a8430d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/layer.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/layer.h @@ -188,6 +188,16 @@ struct custom_layer_registry_entry void* userdata; }; +struct overwrite_builtin_layer_registry_entry +{ + // layer type index + int typeindex; + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + #if NCNN_STRING // get layer type from type name NCNN_EXPORT int layer_to_index(const char* type); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/layer_shader_type_enum.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/layer_shader_type_enum.h index bad560545..916ceb200 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/layer_shader_type_enum.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/layer_shader_type_enum.h @@ -364,6 +364,15 @@ mish_pack8 = 358, swish = 359, swish_pack4 = 360, swish_pack8 = 361, -convert_ycbcr = 362, -vulkan_activation = 363, +gemm = 362, +multiheadattention_qk_cross = 363, +multiheadattention_qk_cross_pack1to4 = 364, +multiheadattention_qk_cross_pack4 = 365, +multiheadattention_qk_cross_pack4to1 = 366, +multiheadattention_qkv_cross = 367, +multiheadattention_qkv_cross_pack1to4 = 368, +multiheadattention_qkv_cross_pack4 = 369, +multiheadattention_qkv_cross_pack4to1 = 370, +convert_ycbcr = 371, +vulkan_activation = 372, diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/modelbin.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/modelbin.h index 15d2b9c0d..aada5f61c 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/modelbin.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/modelbin.h @@ -31,7 +31,7 @@ class NCNN_EXPORT ModelBin // 2 = float16 // 3 = int8 // load vec - virtual Mat load(int w, int type) const = 0; + virtual Mat load(int w, int type) const; // load image virtual Mat load(int w, int h, int type) const; // load dim diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/net.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/net.h index 94070422f..98e3ec335 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/net.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/net.h @@ -58,12 +58,12 @@ class NCNN_EXPORT Net #endif // NCNN_VULKAN #if NCNN_STRING - // register custom layer by layer type name + // register custom layer or overwrite built-in layer by layer type name // return 0 if success int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); virtual int custom_layer_to_index(const char* type); #endif // NCNN_STRING - // register custom layer by layer type + // register custom layer or overwrite built-in layer by layer type // return 0 if success int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); @@ -149,8 +149,10 @@ class NCNN_EXPORT Net int find_blob_index_by_name(const char* name) const; int find_layer_index_by_name(const char* name) const; virtual Layer* create_custom_layer(const char* type); + virtual Layer* create_overwrite_builtin_layer(const char* type); #endif // NCNN_STRING virtual Layer* create_custom_layer(int index); + virtual Layer* create_overwrite_builtin_layer(int typeindex); private: Net(const Net&); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/option.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/option.h index 3fda80893..7d0cc60ba 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/option.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/option.h @@ -140,7 +140,10 @@ class NCNN_EXPORT Option bool use_winograd43_convolution; bool use_winograd63_convolution; - bool use_reserved_6; + // this option is turned on for A53/A55 automatically + // but you can force this on/off if you wish + bool use_a53_a55_optimized_kernel; + bool use_reserved_7; bool use_reserved_8; bool use_reserved_9; diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/platform.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/platform.h index 5ad57afbf..037e8dfe2 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/platform.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/platform.h @@ -42,8 +42,7 @@ #define NCNN_AVX512VNNI 0 #define NCNN_AVX512BF16 0 #define NCNN_AVX512FP16 0 -#define NCNN_VFPV4 0 -#if __aarch64__ +#define NCNN_VFPV4 1 #define NCNN_ARM82 1 #define NCNN_ARM82DOT 1 #define NCNN_ARM82FP16FML 1 @@ -54,7 +53,6 @@ #define NCNN_ARM86SVEBF16 1 #define NCNN_ARM86SVEI8MM 1 #define NCNN_ARM86SVEF32MM 1 -#endif // __aarch64__ #define NCNN_MSA 0 #define NCNN_LSX 0 #define NCNN_MMI 0 @@ -63,7 +61,7 @@ #define NCNN_BF16 1 #define NCNN_FORCE_INLINE 1 -#define NCNN_VERSION_STRING "1.0.20230223" +#define NCNN_VERSION_STRING "1.0.20230517" #include "ncnn_export.h" diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/simpleocv.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/simpleocv.h index 3f41076c8..54b22d9f9 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/simpleocv.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/simpleocv.h @@ -448,6 +448,8 @@ enum ImreadModes NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); +NCNN_EXPORT Mat imdecode(const std::vector& buf, int flags = IMREAD_COLOR); + enum ImwriteFlags { IMWRITE_JPEG_QUALITY = 1 @@ -498,4 +500,4 @@ NCNN_EXPORT Size getTextSize(const std::string& text, int fontFace, double fontS #endif // NCNN_SIMPLEOCV -#endif // NCNN_SIMPLEOCV_H \ No newline at end of file +#endif // NCNN_SIMPLEOCV_H diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/vulkan_header_fix.h b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/vulkan_header_fix.h index 103ac3e5e..cd1efed46 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/vulkan_header_fix.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/include/ncnn/vulkan_header_fix.h @@ -177,6 +177,15 @@ typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR #if VK_HEADER_VERSION < 97 #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT (VkStructureType)1000238000 +#define VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT (VkStructureType)1000238001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT (VkStructureType)1000244000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT (VkStructureType)1000244002 +#define VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT (VkStructureType)1000247000 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT (VkBufferUsageFlagBits)0x00020000 +typedef uint64_t VkDeviceAddress; typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT { VkStructureType sType; @@ -184,6 +193,71 @@ typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; } VkPhysicalDeviceMemoryBudgetPropertiesEXT; +typedef struct VkPhysicalDeviceMemoryPriorityFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 memoryPriority; +} VkPhysicalDeviceMemoryPriorityFeaturesEXT; +typedef struct VkMemoryPriorityAllocateInfoEXT +{ + VkStructureType sType; + const void* pNext; + float priority; +} VkMemoryPriorityAllocateInfoEXT; +typedef struct VkPhysicalDeviceBufferAddressFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferAddressFeaturesEXT; +typedef struct VkBufferDeviceAddressInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoEXT; +typedef struct VkBufferDeviceAddressCreateInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkDeviceSize deviceAddress; +} VkBufferDeviceAddressCreateInfoEXT; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressEXT)(VkDevice device, const VkBufferDeviceAddressInfoEXT* pInfo); +typedef enum VkValidationFeatureEnableEXT +{ + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, + VK_VALIDATION_FEATURE_ENABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1), + VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureEnableEXT; +typedef enum VkValidationFeatureDisableEXT +{ + VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0, + VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1, + VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2, + VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3, + VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4, + VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5, + VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6, + VK_VALIDATION_FEATURE_DISABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_ALL_EXT, + VK_VALIDATION_FEATURE_DISABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT, + VK_VALIDATION_FEATURE_DISABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT - VK_VALIDATION_FEATURE_DISABLE_ALL_EXT + 1), + VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureDisableEXT; +typedef struct VkValidationFeaturesEXT +{ + VkStructureType sType; + const void* pNext; + uint32_t enabledValidationFeatureCount; + const VkValidationFeatureEnableEXT* pEnabledValidationFeatures; + uint32_t disabledValidationFeatureCount; + const VkValidationFeatureDisableEXT* pDisabledValidationFeatures; +} VkValidationFeaturesEXT; #endif // VK_HEADER_VERSION < 97 #if VK_HEADER_VERSION < 101 @@ -248,6 +322,65 @@ typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); #endif // VK_HEADER_VERSION < 101 +#if VK_HEADER_VERSION < 121 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD (VkStructureType)1000229000 +#define VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +#define VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +typedef struct VkPhysicalDeviceCoherentMemoryFeaturesAMD +{ + VkStructureType sType; + void* pNext; + VkBool32 deviceCoherentMemory; +} VkPhysicalDeviceCoherentMemoryFeaturesAMD; +#endif // VK_HEADER_VERSION < 121 + +#if VK_HEADER_VERSION < 129 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR (VkStructureType)1000257000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO_KHR (VkStructureType)1000257002 +#define VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR (VkStructureType)1000257003 +#define VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO_KHR (VkStructureType)1000257004 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR (VkBufferUsageFlagBits)0x00020000 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR (VkMemoryAllocateFlagBits)0x00000002 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkMemoryAllocateFlagBits)0x00000004 +typedef struct VkPhysicalDeviceBufferDeviceAddressFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferDeviceAddressFeaturesKHR; +typedef struct VkBufferDeviceAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoKHR; +typedef struct VkBufferOpaqueCaptureAddressCreateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkBufferOpaqueCaptureAddressCreateInfoKHR; +typedef struct VkMemoryOpaqueCaptureAddressAllocateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkMemoryOpaqueCaptureAddressAllocateInfoKHR; +typedef struct VkDeviceMemoryOpaqueCaptureAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkDeviceMemory memory; +} VkDeviceMemoryOpaqueCaptureAddressInfoKHR; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetBufferOpaqueCaptureAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR)(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo); +#endif // VK_HEADER_VERSION < 129 + #if VK_HEADER_VERSION < 208 typedef enum VkInstanceCreateFlagBits { diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/glslang/glslang-config-version.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/glslang/glslang-config-version.cmake index c7acdbaf5..9119e1093 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/glslang/glslang-config-version.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/glslang/glslang-config-version.cmake @@ -52,11 +52,6 @@ else() endif() -# if the installed project requested no architecture check, don't perform the check -if("FALSE") - return() -endif() - # if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "") return() diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets.cmake index 2173b87cd..d346d4be2 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets.cmake @@ -7,7 +7,7 @@ if(CMAKE_VERSION VERSION_LESS "2.8.3") message(FATAL_ERROR "CMake >= 2.8.3 required") endif() cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.23) +cmake_policy(VERSION 2.8.3...3.24) #---------------------------------------------------------------- # Generated CMake target import file. #---------------------------------------------------------------- diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/ncnn/ncnn.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/ncnn/ncnn.cmake index f224c1b8e..2da90b1a1 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/ncnn/ncnn.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/cmake/ncnn/ncnn.cmake @@ -7,7 +7,7 @@ if(CMAKE_VERSION VERSION_LESS "2.8.3") message(FATAL_ERROR "CMake >= 2.8.3 required") endif() cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.23) +cmake_policy(VERSION 2.8.3...3.24) #---------------------------------------------------------------- # Generated CMake target import file. #---------------------------------------------------------------- diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/libncnn.a b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/libncnn.a index 84c302ac0..ab11f8fbd 100644 Binary files a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/libncnn.a and b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/libncnn.a differ diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/pkgconfig/ncnn.pc b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/pkgconfig/ncnn.pc index e683e4ccb..02c7d718b 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/pkgconfig/ncnn.pc +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/arm64-v8a/lib/pkgconfig/ncnn.pc @@ -4,7 +4,7 @@ includedir=${prefix}/include Name: ncnn Description: high-performance neural network inference framework optimized for the mobile platform -Version: 1.0.20230223 +Version: 1.0.20230517 URL: https://github.com/Tencent/ncnn Libs: -L"${librarydir}" -lncnn Cflags: -I"${includedir}" diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/benchmark.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/benchmark.h index 3d5c0cda3..ed42c1acb 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/benchmark.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/benchmark.h @@ -24,6 +24,9 @@ namespace ncnn { // get now timestamp in ms NCNN_EXPORT double get_current_time(); +// sleep milliseconds +NCNN_EXPORT void sleep(unsigned long long int milliseconds = 1000); + #if NCNN_BENCHMARK NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/cpu.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/cpu.h index d03e7e8b3..7d6bfce11 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/cpu.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/cpu.h @@ -60,6 +60,8 @@ NCNN_EXPORT int cpu_support_arm_neon(); NCNN_EXPORT int cpu_support_arm_vfpv4(); // asimdhp = aarch64 asimd half precision NCNN_EXPORT int cpu_support_arm_asimdhp(); +// cpuid = aarch64 cpuid info +NCNN_EXPORT int cpu_support_arm_cpuid(); // asimddp = aarch64 asimd dot product NCNN_EXPORT int cpu_support_arm_asimddp(); // asimdfhm = aarch64 asimd fhm @@ -147,6 +149,9 @@ NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); // set explicit thread affinity NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); +// runtime thread affinity info +NCNN_EXPORT int is_current_thread_running_on_a53_a55(); + // misc function wrapper for openmp routines NCNN_EXPORT int get_omp_num_threads(); NCNN_EXPORT void set_omp_num_threads(int num_threads); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/gpu.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/gpu.h index 1cda18200..345329f7d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/gpu.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/gpu.h @@ -28,7 +28,19 @@ namespace ncnn { // instance + +// Create VkInstance and initialize some objects that need to be calculated by GPU +// Creates a VkInstance object, Checks the extended attributes supported by the Vulkan instance concerned, +// Initializes, and creates Vulkan validation layers (if ENABLE_VALIDATION_LAYER is enabled), +// Iterates over all supported physical devices, etc. NCNN_EXPORT int create_gpu_instance(); + +// Get global VkInstance variable +// Must be called after create_gpu_instance() and before destroy_gpu_instance() +NCNN_EXPORT VkInstance get_gpu_instance(); + +// Destroy VkInstance object and free the memory of the associated object +// Usually called in the destructor of the main program exit NCNN_EXPORT void destroy_gpu_instance(); // instance extension capability @@ -37,6 +49,8 @@ extern int support_VK_KHR_get_physical_device_properties2; extern int support_VK_KHR_get_surface_capabilities2; extern int support_VK_KHR_surface; extern int support_VK_EXT_debug_utils; +extern int support_VK_EXT_validation_features; +extern int support_VK_EXT_validation_flags; #if __ANDROID_API__ >= 26 extern int support_VK_KHR_android_surface; #endif // __ANDROID_API__ >= 26 @@ -167,6 +181,7 @@ class NCNN_EXPORT GpuInfo int support_VK_KHR_8bit_storage() const; int support_VK_KHR_16bit_storage() const; int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_buffer_device_address() const; int support_VK_KHR_create_renderpass2() const; int support_VK_KHR_dedicated_allocation() const; int support_VK_KHR_descriptor_update_template() const; @@ -183,9 +198,12 @@ class NCNN_EXPORT GpuInfo int support_VK_KHR_shader_float_controls() const; int support_VK_KHR_storage_buffer_storage_class() const; int support_VK_KHR_swapchain() const; + int support_VK_EXT_buffer_device_address() const; int support_VK_EXT_descriptor_indexing() const; int support_VK_EXT_memory_budget() const; + int support_VK_EXT_memory_priority() const; int support_VK_EXT_queue_family_foreign() const; + int support_VK_AMD_device_coherent_memory() const; #if __ANDROID_API__ >= 26 int support_VK_ANDROID_external_memory_android_hardware_buffer() const; #endif // __ANDROID_API__ >= 26 @@ -269,6 +287,11 @@ class NCNN_EXPORT VulkanDevice PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + // VK_KHR_buffer_device_address + PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; + PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; + PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; + // VK_KHR_create_renderpass2 PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; @@ -306,6 +329,9 @@ class NCNN_EXPORT VulkanDevice PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; PFN_vkQueuePresentKHR vkQueuePresentKHR; + // VK_EXT_buffer_device_address + PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; + #if __ANDROID_API__ >= 26 // VK_ANDROID_external_memory_android_hardware_buffer PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/layer.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/layer.h index d02f65bbc..ae4a8430d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/layer.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/layer.h @@ -188,6 +188,16 @@ struct custom_layer_registry_entry void* userdata; }; +struct overwrite_builtin_layer_registry_entry +{ + // layer type index + int typeindex; + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + #if NCNN_STRING // get layer type from type name NCNN_EXPORT int layer_to_index(const char* type); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/layer_shader_type_enum.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/layer_shader_type_enum.h index bad560545..916ceb200 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/layer_shader_type_enum.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/layer_shader_type_enum.h @@ -364,6 +364,15 @@ mish_pack8 = 358, swish = 359, swish_pack4 = 360, swish_pack8 = 361, -convert_ycbcr = 362, -vulkan_activation = 363, +gemm = 362, +multiheadattention_qk_cross = 363, +multiheadattention_qk_cross_pack1to4 = 364, +multiheadattention_qk_cross_pack4 = 365, +multiheadattention_qk_cross_pack4to1 = 366, +multiheadattention_qkv_cross = 367, +multiheadattention_qkv_cross_pack1to4 = 368, +multiheadattention_qkv_cross_pack4 = 369, +multiheadattention_qkv_cross_pack4to1 = 370, +convert_ycbcr = 371, +vulkan_activation = 372, diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/modelbin.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/modelbin.h index 15d2b9c0d..aada5f61c 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/modelbin.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/modelbin.h @@ -31,7 +31,7 @@ class NCNN_EXPORT ModelBin // 2 = float16 // 3 = int8 // load vec - virtual Mat load(int w, int type) const = 0; + virtual Mat load(int w, int type) const; // load image virtual Mat load(int w, int h, int type) const; // load dim diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/net.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/net.h index 94070422f..98e3ec335 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/net.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/net.h @@ -58,12 +58,12 @@ class NCNN_EXPORT Net #endif // NCNN_VULKAN #if NCNN_STRING - // register custom layer by layer type name + // register custom layer or overwrite built-in layer by layer type name // return 0 if success int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); virtual int custom_layer_to_index(const char* type); #endif // NCNN_STRING - // register custom layer by layer type + // register custom layer or overwrite built-in layer by layer type // return 0 if success int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); @@ -149,8 +149,10 @@ class NCNN_EXPORT Net int find_blob_index_by_name(const char* name) const; int find_layer_index_by_name(const char* name) const; virtual Layer* create_custom_layer(const char* type); + virtual Layer* create_overwrite_builtin_layer(const char* type); #endif // NCNN_STRING virtual Layer* create_custom_layer(int index); + virtual Layer* create_overwrite_builtin_layer(int typeindex); private: Net(const Net&); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/option.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/option.h index 3fda80893..7d0cc60ba 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/option.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/option.h @@ -140,7 +140,10 @@ class NCNN_EXPORT Option bool use_winograd43_convolution; bool use_winograd63_convolution; - bool use_reserved_6; + // this option is turned on for A53/A55 automatically + // but you can force this on/off if you wish + bool use_a53_a55_optimized_kernel; + bool use_reserved_7; bool use_reserved_8; bool use_reserved_9; diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/platform.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/platform.h index 76c6ff74f..580fdb22d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/platform.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/platform.h @@ -43,7 +43,6 @@ #define NCNN_AVX512BF16 0 #define NCNN_AVX512FP16 0 #define NCNN_VFPV4 1 -#if __aarch64__ #define NCNN_ARM82 0 #define NCNN_ARM82DOT 0 #define NCNN_ARM82FP16FML 0 @@ -54,7 +53,6 @@ #define NCNN_ARM86SVEBF16 0 #define NCNN_ARM86SVEI8MM 0 #define NCNN_ARM86SVEF32MM 0 -#endif // __aarch64__ #define NCNN_MSA 0 #define NCNN_LSX 0 #define NCNN_MMI 0 @@ -63,7 +61,7 @@ #define NCNN_BF16 1 #define NCNN_FORCE_INLINE 1 -#define NCNN_VERSION_STRING "1.0.20230223" +#define NCNN_VERSION_STRING "1.0.20230517" #include "ncnn_export.h" diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/simpleocv.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/simpleocv.h index 55ede15b7..54b22d9f9 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/simpleocv.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/simpleocv.h @@ -448,6 +448,8 @@ enum ImreadModes NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); +NCNN_EXPORT Mat imdecode(const std::vector& buf, int flags = IMREAD_COLOR); + enum ImwriteFlags { IMWRITE_JPEG_QUALITY = 1 diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/vulkan_header_fix.h b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/vulkan_header_fix.h index 103ac3e5e..cd1efed46 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/vulkan_header_fix.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/include/ncnn/vulkan_header_fix.h @@ -177,6 +177,15 @@ typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR #if VK_HEADER_VERSION < 97 #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT (VkStructureType)1000238000 +#define VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT (VkStructureType)1000238001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT (VkStructureType)1000244000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT (VkStructureType)1000244002 +#define VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT (VkStructureType)1000247000 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT (VkBufferUsageFlagBits)0x00020000 +typedef uint64_t VkDeviceAddress; typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT { VkStructureType sType; @@ -184,6 +193,71 @@ typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; } VkPhysicalDeviceMemoryBudgetPropertiesEXT; +typedef struct VkPhysicalDeviceMemoryPriorityFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 memoryPriority; +} VkPhysicalDeviceMemoryPriorityFeaturesEXT; +typedef struct VkMemoryPriorityAllocateInfoEXT +{ + VkStructureType sType; + const void* pNext; + float priority; +} VkMemoryPriorityAllocateInfoEXT; +typedef struct VkPhysicalDeviceBufferAddressFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferAddressFeaturesEXT; +typedef struct VkBufferDeviceAddressInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoEXT; +typedef struct VkBufferDeviceAddressCreateInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkDeviceSize deviceAddress; +} VkBufferDeviceAddressCreateInfoEXT; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressEXT)(VkDevice device, const VkBufferDeviceAddressInfoEXT* pInfo); +typedef enum VkValidationFeatureEnableEXT +{ + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, + VK_VALIDATION_FEATURE_ENABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1), + VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureEnableEXT; +typedef enum VkValidationFeatureDisableEXT +{ + VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0, + VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1, + VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2, + VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3, + VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4, + VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5, + VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6, + VK_VALIDATION_FEATURE_DISABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_ALL_EXT, + VK_VALIDATION_FEATURE_DISABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT, + VK_VALIDATION_FEATURE_DISABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT - VK_VALIDATION_FEATURE_DISABLE_ALL_EXT + 1), + VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureDisableEXT; +typedef struct VkValidationFeaturesEXT +{ + VkStructureType sType; + const void* pNext; + uint32_t enabledValidationFeatureCount; + const VkValidationFeatureEnableEXT* pEnabledValidationFeatures; + uint32_t disabledValidationFeatureCount; + const VkValidationFeatureDisableEXT* pDisabledValidationFeatures; +} VkValidationFeaturesEXT; #endif // VK_HEADER_VERSION < 97 #if VK_HEADER_VERSION < 101 @@ -248,6 +322,65 @@ typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); #endif // VK_HEADER_VERSION < 101 +#if VK_HEADER_VERSION < 121 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD (VkStructureType)1000229000 +#define VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +#define VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +typedef struct VkPhysicalDeviceCoherentMemoryFeaturesAMD +{ + VkStructureType sType; + void* pNext; + VkBool32 deviceCoherentMemory; +} VkPhysicalDeviceCoherentMemoryFeaturesAMD; +#endif // VK_HEADER_VERSION < 121 + +#if VK_HEADER_VERSION < 129 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR (VkStructureType)1000257000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO_KHR (VkStructureType)1000257002 +#define VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR (VkStructureType)1000257003 +#define VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO_KHR (VkStructureType)1000257004 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR (VkBufferUsageFlagBits)0x00020000 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR (VkMemoryAllocateFlagBits)0x00000002 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkMemoryAllocateFlagBits)0x00000004 +typedef struct VkPhysicalDeviceBufferDeviceAddressFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferDeviceAddressFeaturesKHR; +typedef struct VkBufferDeviceAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoKHR; +typedef struct VkBufferOpaqueCaptureAddressCreateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkBufferOpaqueCaptureAddressCreateInfoKHR; +typedef struct VkMemoryOpaqueCaptureAddressAllocateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkMemoryOpaqueCaptureAddressAllocateInfoKHR; +typedef struct VkDeviceMemoryOpaqueCaptureAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkDeviceMemory memory; +} VkDeviceMemoryOpaqueCaptureAddressInfoKHR; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetBufferOpaqueCaptureAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR)(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo); +#endif // VK_HEADER_VERSION < 129 + #if VK_HEADER_VERSION < 208 typedef enum VkInstanceCreateFlagBits { diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config-version.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config-version.cmake index 83b16db66..c906a2331 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config-version.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config-version.cmake @@ -52,11 +52,6 @@ else() endif() -# if the installed project requested no architecture check, don't perform the check -if("FALSE") - return() -endif() - # if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "4" STREQUAL "") return() diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets.cmake index 2173b87cd..d346d4be2 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets.cmake @@ -7,7 +7,7 @@ if(CMAKE_VERSION VERSION_LESS "2.8.3") message(FATAL_ERROR "CMake >= 2.8.3 required") endif() cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.23) +cmake_policy(VERSION 2.8.3...3.24) #---------------------------------------------------------------- # Generated CMake target import file. #---------------------------------------------------------------- diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn.cmake index f224c1b8e..2da90b1a1 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn.cmake @@ -7,7 +7,7 @@ if(CMAKE_VERSION VERSION_LESS "2.8.3") message(FATAL_ERROR "CMake >= 2.8.3 required") endif() cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.23) +cmake_policy(VERSION 2.8.3...3.24) #---------------------------------------------------------------- # Generated CMake target import file. #---------------------------------------------------------------- diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/libncnn.a b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/libncnn.a index 98340ab73..4350b3150 100644 Binary files a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/libncnn.a and b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/libncnn.a differ diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/pkgconfig/ncnn.pc b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/pkgconfig/ncnn.pc index e683e4ccb..02c7d718b 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/pkgconfig/ncnn.pc +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/armeabi-v7a/lib/pkgconfig/ncnn.pc @@ -4,7 +4,7 @@ includedir=${prefix}/include Name: ncnn Description: high-performance neural network inference framework optimized for the mobile platform -Version: 1.0.20230223 +Version: 1.0.20230517 URL: https://github.com/Tencent/ncnn Libs: -L"${librarydir}" -lncnn Cflags: -I"${includedir}" diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/benchmark.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/benchmark.h index 3d5c0cda3..ed42c1acb 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/benchmark.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/benchmark.h @@ -24,6 +24,9 @@ namespace ncnn { // get now timestamp in ms NCNN_EXPORT double get_current_time(); +// sleep milliseconds +NCNN_EXPORT void sleep(unsigned long long int milliseconds = 1000); + #if NCNN_BENCHMARK NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/cpu.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/cpu.h index d03e7e8b3..7d6bfce11 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/cpu.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/cpu.h @@ -60,6 +60,8 @@ NCNN_EXPORT int cpu_support_arm_neon(); NCNN_EXPORT int cpu_support_arm_vfpv4(); // asimdhp = aarch64 asimd half precision NCNN_EXPORT int cpu_support_arm_asimdhp(); +// cpuid = aarch64 cpuid info +NCNN_EXPORT int cpu_support_arm_cpuid(); // asimddp = aarch64 asimd dot product NCNN_EXPORT int cpu_support_arm_asimddp(); // asimdfhm = aarch64 asimd fhm @@ -147,6 +149,9 @@ NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); // set explicit thread affinity NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); +// runtime thread affinity info +NCNN_EXPORT int is_current_thread_running_on_a53_a55(); + // misc function wrapper for openmp routines NCNN_EXPORT int get_omp_num_threads(); NCNN_EXPORT void set_omp_num_threads(int num_threads); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/gpu.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/gpu.h index 1cda18200..345329f7d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/gpu.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/gpu.h @@ -28,7 +28,19 @@ namespace ncnn { // instance + +// Create VkInstance and initialize some objects that need to be calculated by GPU +// Creates a VkInstance object, Checks the extended attributes supported by the Vulkan instance concerned, +// Initializes, and creates Vulkan validation layers (if ENABLE_VALIDATION_LAYER is enabled), +// Iterates over all supported physical devices, etc. NCNN_EXPORT int create_gpu_instance(); + +// Get global VkInstance variable +// Must be called after create_gpu_instance() and before destroy_gpu_instance() +NCNN_EXPORT VkInstance get_gpu_instance(); + +// Destroy VkInstance object and free the memory of the associated object +// Usually called in the destructor of the main program exit NCNN_EXPORT void destroy_gpu_instance(); // instance extension capability @@ -37,6 +49,8 @@ extern int support_VK_KHR_get_physical_device_properties2; extern int support_VK_KHR_get_surface_capabilities2; extern int support_VK_KHR_surface; extern int support_VK_EXT_debug_utils; +extern int support_VK_EXT_validation_features; +extern int support_VK_EXT_validation_flags; #if __ANDROID_API__ >= 26 extern int support_VK_KHR_android_surface; #endif // __ANDROID_API__ >= 26 @@ -167,6 +181,7 @@ class NCNN_EXPORT GpuInfo int support_VK_KHR_8bit_storage() const; int support_VK_KHR_16bit_storage() const; int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_buffer_device_address() const; int support_VK_KHR_create_renderpass2() const; int support_VK_KHR_dedicated_allocation() const; int support_VK_KHR_descriptor_update_template() const; @@ -183,9 +198,12 @@ class NCNN_EXPORT GpuInfo int support_VK_KHR_shader_float_controls() const; int support_VK_KHR_storage_buffer_storage_class() const; int support_VK_KHR_swapchain() const; + int support_VK_EXT_buffer_device_address() const; int support_VK_EXT_descriptor_indexing() const; int support_VK_EXT_memory_budget() const; + int support_VK_EXT_memory_priority() const; int support_VK_EXT_queue_family_foreign() const; + int support_VK_AMD_device_coherent_memory() const; #if __ANDROID_API__ >= 26 int support_VK_ANDROID_external_memory_android_hardware_buffer() const; #endif // __ANDROID_API__ >= 26 @@ -269,6 +287,11 @@ class NCNN_EXPORT VulkanDevice PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + // VK_KHR_buffer_device_address + PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; + PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; + PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; + // VK_KHR_create_renderpass2 PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; @@ -306,6 +329,9 @@ class NCNN_EXPORT VulkanDevice PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; PFN_vkQueuePresentKHR vkQueuePresentKHR; + // VK_EXT_buffer_device_address + PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; + #if __ANDROID_API__ >= 26 // VK_ANDROID_external_memory_android_hardware_buffer PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/layer.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/layer.h index d02f65bbc..ae4a8430d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/layer.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/layer.h @@ -188,6 +188,16 @@ struct custom_layer_registry_entry void* userdata; }; +struct overwrite_builtin_layer_registry_entry +{ + // layer type index + int typeindex; + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + #if NCNN_STRING // get layer type from type name NCNN_EXPORT int layer_to_index(const char* type); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/layer_shader_type_enum.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/layer_shader_type_enum.h index bad560545..916ceb200 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/layer_shader_type_enum.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/layer_shader_type_enum.h @@ -364,6 +364,15 @@ mish_pack8 = 358, swish = 359, swish_pack4 = 360, swish_pack8 = 361, -convert_ycbcr = 362, -vulkan_activation = 363, +gemm = 362, +multiheadattention_qk_cross = 363, +multiheadattention_qk_cross_pack1to4 = 364, +multiheadattention_qk_cross_pack4 = 365, +multiheadattention_qk_cross_pack4to1 = 366, +multiheadattention_qkv_cross = 367, +multiheadattention_qkv_cross_pack1to4 = 368, +multiheadattention_qkv_cross_pack4 = 369, +multiheadattention_qkv_cross_pack4to1 = 370, +convert_ycbcr = 371, +vulkan_activation = 372, diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/modelbin.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/modelbin.h index 15d2b9c0d..aada5f61c 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/modelbin.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/modelbin.h @@ -31,7 +31,7 @@ class NCNN_EXPORT ModelBin // 2 = float16 // 3 = int8 // load vec - virtual Mat load(int w, int type) const = 0; + virtual Mat load(int w, int type) const; // load image virtual Mat load(int w, int h, int type) const; // load dim diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/net.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/net.h index 94070422f..98e3ec335 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/net.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/net.h @@ -58,12 +58,12 @@ class NCNN_EXPORT Net #endif // NCNN_VULKAN #if NCNN_STRING - // register custom layer by layer type name + // register custom layer or overwrite built-in layer by layer type name // return 0 if success int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); virtual int custom_layer_to_index(const char* type); #endif // NCNN_STRING - // register custom layer by layer type + // register custom layer or overwrite built-in layer by layer type // return 0 if success int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); @@ -149,8 +149,10 @@ class NCNN_EXPORT Net int find_blob_index_by_name(const char* name) const; int find_layer_index_by_name(const char* name) const; virtual Layer* create_custom_layer(const char* type); + virtual Layer* create_overwrite_builtin_layer(const char* type); #endif // NCNN_STRING virtual Layer* create_custom_layer(int index); + virtual Layer* create_overwrite_builtin_layer(int typeindex); private: Net(const Net&); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/option.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/option.h index 3fda80893..7d0cc60ba 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/option.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/option.h @@ -140,7 +140,10 @@ class NCNN_EXPORT Option bool use_winograd43_convolution; bool use_winograd63_convolution; - bool use_reserved_6; + // this option is turned on for A53/A55 automatically + // but you can force this on/off if you wish + bool use_a53_a55_optimized_kernel; + bool use_reserved_7; bool use_reserved_8; bool use_reserved_9; diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/platform.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/platform.h index 295703694..f6634be8d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/platform.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/platform.h @@ -43,7 +43,6 @@ #define NCNN_AVX512BF16 1 #define NCNN_AVX512FP16 1 #define NCNN_VFPV4 0 -#if __aarch64__ #define NCNN_ARM82 0 #define NCNN_ARM82DOT 0 #define NCNN_ARM82FP16FML 0 @@ -54,7 +53,6 @@ #define NCNN_ARM86SVEBF16 0 #define NCNN_ARM86SVEI8MM 0 #define NCNN_ARM86SVEF32MM 0 -#endif // __aarch64__ #define NCNN_MSA 0 #define NCNN_LSX 0 #define NCNN_MMI 0 @@ -63,7 +61,7 @@ #define NCNN_BF16 1 #define NCNN_FORCE_INLINE 1 -#define NCNN_VERSION_STRING "1.0.20230223" +#define NCNN_VERSION_STRING "1.0.20230517" #include "ncnn_export.h" diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/simpleocv.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/simpleocv.h index 55ede15b7..54b22d9f9 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/simpleocv.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/simpleocv.h @@ -448,6 +448,8 @@ enum ImreadModes NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); +NCNN_EXPORT Mat imdecode(const std::vector& buf, int flags = IMREAD_COLOR); + enum ImwriteFlags { IMWRITE_JPEG_QUALITY = 1 diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/vulkan_header_fix.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/vulkan_header_fix.h index 103ac3e5e..cd1efed46 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/vulkan_header_fix.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/include/ncnn/vulkan_header_fix.h @@ -177,6 +177,15 @@ typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR #if VK_HEADER_VERSION < 97 #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT (VkStructureType)1000238000 +#define VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT (VkStructureType)1000238001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT (VkStructureType)1000244000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT (VkStructureType)1000244002 +#define VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT (VkStructureType)1000247000 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT (VkBufferUsageFlagBits)0x00020000 +typedef uint64_t VkDeviceAddress; typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT { VkStructureType sType; @@ -184,6 +193,71 @@ typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; } VkPhysicalDeviceMemoryBudgetPropertiesEXT; +typedef struct VkPhysicalDeviceMemoryPriorityFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 memoryPriority; +} VkPhysicalDeviceMemoryPriorityFeaturesEXT; +typedef struct VkMemoryPriorityAllocateInfoEXT +{ + VkStructureType sType; + const void* pNext; + float priority; +} VkMemoryPriorityAllocateInfoEXT; +typedef struct VkPhysicalDeviceBufferAddressFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferAddressFeaturesEXT; +typedef struct VkBufferDeviceAddressInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoEXT; +typedef struct VkBufferDeviceAddressCreateInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkDeviceSize deviceAddress; +} VkBufferDeviceAddressCreateInfoEXT; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressEXT)(VkDevice device, const VkBufferDeviceAddressInfoEXT* pInfo); +typedef enum VkValidationFeatureEnableEXT +{ + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, + VK_VALIDATION_FEATURE_ENABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1), + VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureEnableEXT; +typedef enum VkValidationFeatureDisableEXT +{ + VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0, + VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1, + VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2, + VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3, + VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4, + VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5, + VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6, + VK_VALIDATION_FEATURE_DISABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_ALL_EXT, + VK_VALIDATION_FEATURE_DISABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT, + VK_VALIDATION_FEATURE_DISABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT - VK_VALIDATION_FEATURE_DISABLE_ALL_EXT + 1), + VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureDisableEXT; +typedef struct VkValidationFeaturesEXT +{ + VkStructureType sType; + const void* pNext; + uint32_t enabledValidationFeatureCount; + const VkValidationFeatureEnableEXT* pEnabledValidationFeatures; + uint32_t disabledValidationFeatureCount; + const VkValidationFeatureDisableEXT* pDisabledValidationFeatures; +} VkValidationFeaturesEXT; #endif // VK_HEADER_VERSION < 97 #if VK_HEADER_VERSION < 101 @@ -248,6 +322,65 @@ typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); #endif // VK_HEADER_VERSION < 101 +#if VK_HEADER_VERSION < 121 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD (VkStructureType)1000229000 +#define VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +#define VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +typedef struct VkPhysicalDeviceCoherentMemoryFeaturesAMD +{ + VkStructureType sType; + void* pNext; + VkBool32 deviceCoherentMemory; +} VkPhysicalDeviceCoherentMemoryFeaturesAMD; +#endif // VK_HEADER_VERSION < 121 + +#if VK_HEADER_VERSION < 129 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR (VkStructureType)1000257000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO_KHR (VkStructureType)1000257002 +#define VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR (VkStructureType)1000257003 +#define VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO_KHR (VkStructureType)1000257004 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR (VkBufferUsageFlagBits)0x00020000 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR (VkMemoryAllocateFlagBits)0x00000002 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkMemoryAllocateFlagBits)0x00000004 +typedef struct VkPhysicalDeviceBufferDeviceAddressFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferDeviceAddressFeaturesKHR; +typedef struct VkBufferDeviceAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoKHR; +typedef struct VkBufferOpaqueCaptureAddressCreateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkBufferOpaqueCaptureAddressCreateInfoKHR; +typedef struct VkMemoryOpaqueCaptureAddressAllocateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkMemoryOpaqueCaptureAddressAllocateInfoKHR; +typedef struct VkDeviceMemoryOpaqueCaptureAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkDeviceMemory memory; +} VkDeviceMemoryOpaqueCaptureAddressInfoKHR; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetBufferOpaqueCaptureAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR)(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo); +#endif // VK_HEADER_VERSION < 129 + #if VK_HEADER_VERSION < 208 typedef enum VkInstanceCreateFlagBits { diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/glslang/glslang-config-version.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/glslang/glslang-config-version.cmake index 83b16db66..c906a2331 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/glslang/glslang-config-version.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/glslang/glslang-config-version.cmake @@ -52,11 +52,6 @@ else() endif() -# if the installed project requested no architecture check, don't perform the check -if("FALSE") - return() -endif() - # if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "4" STREQUAL "") return() diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/glslang/glslang-targets.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/glslang/glslang-targets.cmake index 2173b87cd..d346d4be2 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/glslang/glslang-targets.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/glslang/glslang-targets.cmake @@ -7,7 +7,7 @@ if(CMAKE_VERSION VERSION_LESS "2.8.3") message(FATAL_ERROR "CMake >= 2.8.3 required") endif() cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.23) +cmake_policy(VERSION 2.8.3...3.24) #---------------------------------------------------------------- # Generated CMake target import file. #---------------------------------------------------------------- diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/ncnn/ncnn.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/ncnn/ncnn.cmake index f224c1b8e..2da90b1a1 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/ncnn/ncnn.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/cmake/ncnn/ncnn.cmake @@ -7,7 +7,7 @@ if(CMAKE_VERSION VERSION_LESS "2.8.3") message(FATAL_ERROR "CMake >= 2.8.3 required") endif() cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.23) +cmake_policy(VERSION 2.8.3...3.24) #---------------------------------------------------------------- # Generated CMake target import file. #---------------------------------------------------------------- diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/libncnn.a b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/libncnn.a index 63b776415..621fc67ef 100644 Binary files a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/libncnn.a and b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/libncnn.a differ diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/pkgconfig/ncnn.pc b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/pkgconfig/ncnn.pc index e683e4ccb..02c7d718b 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/pkgconfig/ncnn.pc +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86/lib/pkgconfig/ncnn.pc @@ -4,7 +4,7 @@ includedir=${prefix}/include Name: ncnn Description: high-performance neural network inference framework optimized for the mobile platform -Version: 1.0.20230223 +Version: 1.0.20230517 URL: https://github.com/Tencent/ncnn Libs: -L"${librarydir}" -lncnn Cflags: -I"${includedir}" diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/benchmark.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/benchmark.h index 3d5c0cda3..ed42c1acb 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/benchmark.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/benchmark.h @@ -24,6 +24,9 @@ namespace ncnn { // get now timestamp in ms NCNN_EXPORT double get_current_time(); +// sleep milliseconds +NCNN_EXPORT void sleep(unsigned long long int milliseconds = 1000); + #if NCNN_BENCHMARK NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/cpu.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/cpu.h index d03e7e8b3..7d6bfce11 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/cpu.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/cpu.h @@ -60,6 +60,8 @@ NCNN_EXPORT int cpu_support_arm_neon(); NCNN_EXPORT int cpu_support_arm_vfpv4(); // asimdhp = aarch64 asimd half precision NCNN_EXPORT int cpu_support_arm_asimdhp(); +// cpuid = aarch64 cpuid info +NCNN_EXPORT int cpu_support_arm_cpuid(); // asimddp = aarch64 asimd dot product NCNN_EXPORT int cpu_support_arm_asimddp(); // asimdfhm = aarch64 asimd fhm @@ -147,6 +149,9 @@ NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); // set explicit thread affinity NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); +// runtime thread affinity info +NCNN_EXPORT int is_current_thread_running_on_a53_a55(); + // misc function wrapper for openmp routines NCNN_EXPORT int get_omp_num_threads(); NCNN_EXPORT void set_omp_num_threads(int num_threads); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/gpu.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/gpu.h index 1cda18200..345329f7d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/gpu.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/gpu.h @@ -28,7 +28,19 @@ namespace ncnn { // instance + +// Create VkInstance and initialize some objects that need to be calculated by GPU +// Creates a VkInstance object, Checks the extended attributes supported by the Vulkan instance concerned, +// Initializes, and creates Vulkan validation layers (if ENABLE_VALIDATION_LAYER is enabled), +// Iterates over all supported physical devices, etc. NCNN_EXPORT int create_gpu_instance(); + +// Get global VkInstance variable +// Must be called after create_gpu_instance() and before destroy_gpu_instance() +NCNN_EXPORT VkInstance get_gpu_instance(); + +// Destroy VkInstance object and free the memory of the associated object +// Usually called in the destructor of the main program exit NCNN_EXPORT void destroy_gpu_instance(); // instance extension capability @@ -37,6 +49,8 @@ extern int support_VK_KHR_get_physical_device_properties2; extern int support_VK_KHR_get_surface_capabilities2; extern int support_VK_KHR_surface; extern int support_VK_EXT_debug_utils; +extern int support_VK_EXT_validation_features; +extern int support_VK_EXT_validation_flags; #if __ANDROID_API__ >= 26 extern int support_VK_KHR_android_surface; #endif // __ANDROID_API__ >= 26 @@ -167,6 +181,7 @@ class NCNN_EXPORT GpuInfo int support_VK_KHR_8bit_storage() const; int support_VK_KHR_16bit_storage() const; int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_buffer_device_address() const; int support_VK_KHR_create_renderpass2() const; int support_VK_KHR_dedicated_allocation() const; int support_VK_KHR_descriptor_update_template() const; @@ -183,9 +198,12 @@ class NCNN_EXPORT GpuInfo int support_VK_KHR_shader_float_controls() const; int support_VK_KHR_storage_buffer_storage_class() const; int support_VK_KHR_swapchain() const; + int support_VK_EXT_buffer_device_address() const; int support_VK_EXT_descriptor_indexing() const; int support_VK_EXT_memory_budget() const; + int support_VK_EXT_memory_priority() const; int support_VK_EXT_queue_family_foreign() const; + int support_VK_AMD_device_coherent_memory() const; #if __ANDROID_API__ >= 26 int support_VK_ANDROID_external_memory_android_hardware_buffer() const; #endif // __ANDROID_API__ >= 26 @@ -269,6 +287,11 @@ class NCNN_EXPORT VulkanDevice PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + // VK_KHR_buffer_device_address + PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; + PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; + PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; + // VK_KHR_create_renderpass2 PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; @@ -306,6 +329,9 @@ class NCNN_EXPORT VulkanDevice PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; PFN_vkQueuePresentKHR vkQueuePresentKHR; + // VK_EXT_buffer_device_address + PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; + #if __ANDROID_API__ >= 26 // VK_ANDROID_external_memory_android_hardware_buffer PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/layer.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/layer.h index d02f65bbc..ae4a8430d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/layer.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/layer.h @@ -188,6 +188,16 @@ struct custom_layer_registry_entry void* userdata; }; +struct overwrite_builtin_layer_registry_entry +{ + // layer type index + int typeindex; + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + #if NCNN_STRING // get layer type from type name NCNN_EXPORT int layer_to_index(const char* type); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/layer_shader_type_enum.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/layer_shader_type_enum.h index bad560545..916ceb200 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/layer_shader_type_enum.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/layer_shader_type_enum.h @@ -364,6 +364,15 @@ mish_pack8 = 358, swish = 359, swish_pack4 = 360, swish_pack8 = 361, -convert_ycbcr = 362, -vulkan_activation = 363, +gemm = 362, +multiheadattention_qk_cross = 363, +multiheadattention_qk_cross_pack1to4 = 364, +multiheadattention_qk_cross_pack4 = 365, +multiheadattention_qk_cross_pack4to1 = 366, +multiheadattention_qkv_cross = 367, +multiheadattention_qkv_cross_pack1to4 = 368, +multiheadattention_qkv_cross_pack4 = 369, +multiheadattention_qkv_cross_pack4to1 = 370, +convert_ycbcr = 371, +vulkan_activation = 372, diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/modelbin.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/modelbin.h index 15d2b9c0d..aada5f61c 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/modelbin.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/modelbin.h @@ -31,7 +31,7 @@ class NCNN_EXPORT ModelBin // 2 = float16 // 3 = int8 // load vec - virtual Mat load(int w, int type) const = 0; + virtual Mat load(int w, int type) const; // load image virtual Mat load(int w, int h, int type) const; // load dim diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/net.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/net.h index 94070422f..98e3ec335 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/net.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/net.h @@ -58,12 +58,12 @@ class NCNN_EXPORT Net #endif // NCNN_VULKAN #if NCNN_STRING - // register custom layer by layer type name + // register custom layer or overwrite built-in layer by layer type name // return 0 if success int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); virtual int custom_layer_to_index(const char* type); #endif // NCNN_STRING - // register custom layer by layer type + // register custom layer or overwrite built-in layer by layer type // return 0 if success int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); @@ -149,8 +149,10 @@ class NCNN_EXPORT Net int find_blob_index_by_name(const char* name) const; int find_layer_index_by_name(const char* name) const; virtual Layer* create_custom_layer(const char* type); + virtual Layer* create_overwrite_builtin_layer(const char* type); #endif // NCNN_STRING virtual Layer* create_custom_layer(int index); + virtual Layer* create_overwrite_builtin_layer(int typeindex); private: Net(const Net&); diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/option.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/option.h index 3fda80893..7d0cc60ba 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/option.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/option.h @@ -140,7 +140,10 @@ class NCNN_EXPORT Option bool use_winograd43_convolution; bool use_winograd63_convolution; - bool use_reserved_6; + // this option is turned on for A53/A55 automatically + // but you can force this on/off if you wish + bool use_a53_a55_optimized_kernel; + bool use_reserved_7; bool use_reserved_8; bool use_reserved_9; diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/platform.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/platform.h index 295703694..f6634be8d 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/platform.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/platform.h @@ -43,7 +43,6 @@ #define NCNN_AVX512BF16 1 #define NCNN_AVX512FP16 1 #define NCNN_VFPV4 0 -#if __aarch64__ #define NCNN_ARM82 0 #define NCNN_ARM82DOT 0 #define NCNN_ARM82FP16FML 0 @@ -54,7 +53,6 @@ #define NCNN_ARM86SVEBF16 0 #define NCNN_ARM86SVEI8MM 0 #define NCNN_ARM86SVEF32MM 0 -#endif // __aarch64__ #define NCNN_MSA 0 #define NCNN_LSX 0 #define NCNN_MMI 0 @@ -63,7 +61,7 @@ #define NCNN_BF16 1 #define NCNN_FORCE_INLINE 1 -#define NCNN_VERSION_STRING "1.0.20230223" +#define NCNN_VERSION_STRING "1.0.20230517" #include "ncnn_export.h" diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/simpleocv.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/simpleocv.h index 55ede15b7..54b22d9f9 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/simpleocv.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/simpleocv.h @@ -448,6 +448,8 @@ enum ImreadModes NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); +NCNN_EXPORT Mat imdecode(const std::vector& buf, int flags = IMREAD_COLOR); + enum ImwriteFlags { IMWRITE_JPEG_QUALITY = 1 diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/vulkan_header_fix.h b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/vulkan_header_fix.h index 103ac3e5e..cd1efed46 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/vulkan_header_fix.h +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/include/ncnn/vulkan_header_fix.h @@ -177,6 +177,15 @@ typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR #if VK_HEADER_VERSION < 97 #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT (VkStructureType)1000238000 +#define VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT (VkStructureType)1000238001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT (VkStructureType)1000244000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT (VkStructureType)1000244002 +#define VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT (VkStructureType)1000247000 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT (VkBufferUsageFlagBits)0x00020000 +typedef uint64_t VkDeviceAddress; typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT { VkStructureType sType; @@ -184,6 +193,71 @@ typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; } VkPhysicalDeviceMemoryBudgetPropertiesEXT; +typedef struct VkPhysicalDeviceMemoryPriorityFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 memoryPriority; +} VkPhysicalDeviceMemoryPriorityFeaturesEXT; +typedef struct VkMemoryPriorityAllocateInfoEXT +{ + VkStructureType sType; + const void* pNext; + float priority; +} VkMemoryPriorityAllocateInfoEXT; +typedef struct VkPhysicalDeviceBufferAddressFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferAddressFeaturesEXT; +typedef struct VkBufferDeviceAddressInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoEXT; +typedef struct VkBufferDeviceAddressCreateInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkDeviceSize deviceAddress; +} VkBufferDeviceAddressCreateInfoEXT; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressEXT)(VkDevice device, const VkBufferDeviceAddressInfoEXT* pInfo); +typedef enum VkValidationFeatureEnableEXT +{ + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, + VK_VALIDATION_FEATURE_ENABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1), + VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureEnableEXT; +typedef enum VkValidationFeatureDisableEXT +{ + VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0, + VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1, + VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2, + VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3, + VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4, + VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5, + VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6, + VK_VALIDATION_FEATURE_DISABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_ALL_EXT, + VK_VALIDATION_FEATURE_DISABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT, + VK_VALIDATION_FEATURE_DISABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT - VK_VALIDATION_FEATURE_DISABLE_ALL_EXT + 1), + VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureDisableEXT; +typedef struct VkValidationFeaturesEXT +{ + VkStructureType sType; + const void* pNext; + uint32_t enabledValidationFeatureCount; + const VkValidationFeatureEnableEXT* pEnabledValidationFeatures; + uint32_t disabledValidationFeatureCount; + const VkValidationFeatureDisableEXT* pDisabledValidationFeatures; +} VkValidationFeaturesEXT; #endif // VK_HEADER_VERSION < 97 #if VK_HEADER_VERSION < 101 @@ -248,6 +322,65 @@ typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); #endif // VK_HEADER_VERSION < 101 +#if VK_HEADER_VERSION < 121 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD (VkStructureType)1000229000 +#define VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +#define VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +typedef struct VkPhysicalDeviceCoherentMemoryFeaturesAMD +{ + VkStructureType sType; + void* pNext; + VkBool32 deviceCoherentMemory; +} VkPhysicalDeviceCoherentMemoryFeaturesAMD; +#endif // VK_HEADER_VERSION < 121 + +#if VK_HEADER_VERSION < 129 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR (VkStructureType)1000257000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO_KHR (VkStructureType)1000257002 +#define VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR (VkStructureType)1000257003 +#define VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO_KHR (VkStructureType)1000257004 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR (VkBufferUsageFlagBits)0x00020000 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR (VkMemoryAllocateFlagBits)0x00000002 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkMemoryAllocateFlagBits)0x00000004 +typedef struct VkPhysicalDeviceBufferDeviceAddressFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferDeviceAddressFeaturesKHR; +typedef struct VkBufferDeviceAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoKHR; +typedef struct VkBufferOpaqueCaptureAddressCreateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkBufferOpaqueCaptureAddressCreateInfoKHR; +typedef struct VkMemoryOpaqueCaptureAddressAllocateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkMemoryOpaqueCaptureAddressAllocateInfoKHR; +typedef struct VkDeviceMemoryOpaqueCaptureAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkDeviceMemory memory; +} VkDeviceMemoryOpaqueCaptureAddressInfoKHR; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetBufferOpaqueCaptureAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR)(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo); +#endif // VK_HEADER_VERSION < 129 + #if VK_HEADER_VERSION < 208 typedef enum VkInstanceCreateFlagBits { diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/glslang/glslang-config-version.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/glslang/glslang-config-version.cmake index c7acdbaf5..9119e1093 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/glslang/glslang-config-version.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/glslang/glslang-config-version.cmake @@ -52,11 +52,6 @@ else() endif() -# if the installed project requested no architecture check, don't perform the check -if("FALSE") - return() -endif() - # if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "") return() diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/glslang/glslang-targets.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/glslang/glslang-targets.cmake index 2173b87cd..d346d4be2 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/glslang/glslang-targets.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/glslang/glslang-targets.cmake @@ -7,7 +7,7 @@ if(CMAKE_VERSION VERSION_LESS "2.8.3") message(FATAL_ERROR "CMake >= 2.8.3 required") endif() cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.23) +cmake_policy(VERSION 2.8.3...3.24) #---------------------------------------------------------------- # Generated CMake target import file. #---------------------------------------------------------------- diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/ncnn/ncnn.cmake b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/ncnn/ncnn.cmake index f224c1b8e..2da90b1a1 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/ncnn/ncnn.cmake +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/cmake/ncnn/ncnn.cmake @@ -7,7 +7,7 @@ if(CMAKE_VERSION VERSION_LESS "2.8.3") message(FATAL_ERROR "CMake >= 2.8.3 required") endif() cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.23) +cmake_policy(VERSION 2.8.3...3.24) #---------------------------------------------------------------- # Generated CMake target import file. #---------------------------------------------------------------- diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/libncnn.a b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/libncnn.a index feec7b47d..f444cb1ee 100644 Binary files a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/libncnn.a and b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/libncnn.a differ diff --git a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/pkgconfig/ncnn.pc b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/pkgconfig/ncnn.pc index e683e4ccb..02c7d718b 100644 --- a/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/pkgconfig/ncnn.pc +++ b/feature/camera/src/main/jni/ncnn_android_vulkan/x86_64/lib/pkgconfig/ncnn.pc @@ -4,7 +4,7 @@ includedir=${prefix}/include Name: ncnn Description: high-performance neural network inference framework optimized for the mobile platform -Version: 1.0.20230223 +Version: 1.0.20230517 URL: https://github.com/Tencent/ncnn Libs: -L"${librarydir}" -lncnn Cflags: -I"${includedir}" diff --git a/feature/camera/src/main/jni/ndkcamera.cpp b/feature/camera/src/main/jni/ndkcamera.cpp index 561e50ee7..80c3792d4 100644 --- a/feature/camera/src/main/jni/ndkcamera.cpp +++ b/feature/camera/src/main/jni/ndkcamera.cpp @@ -12,9 +12,6 @@ #include #include -static constexpr size_t num_tasks = 3; -static std::array, num_tasks> futures; - static void onDisconnected(void *context, ACameraDevice *device) { __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onDisconnected %p", device); } @@ -24,96 +21,87 @@ static void onError(void *context, ACameraDevice *device, int error) { } static void onImageAvailable(void *context, AImageReader *reader) { - for (size_t i = 0; i < num_tasks; ++i) { - if (futures[i].wait_for(std::chrono::seconds(0)) != std::future_status::ready) { - return; - } - - futures[i] = std::async(std::launch::async, [&reader, &context] { - AImage *image = 0; - media_status_t status = AImageReader_acquireLatestImage(reader, &image); - - if (status != AMEDIA_OK) - return; - - - int32_t format; - AImage_getFormat(image, &format); - - - int32_t width = 0; - int32_t height = 0; - AImage_getWidth(image, &width); - AImage_getHeight(image, &height); - - int32_t y_pixelStride = 0; - int32_t u_pixelStride = 0; - int32_t v_pixelStride = 0; - AImage_getPlanePixelStride(image, 0, &y_pixelStride); - AImage_getPlanePixelStride(image, 1, &u_pixelStride); - AImage_getPlanePixelStride(image, 2, &v_pixelStride); - - int32_t y_rowStride = 0; - int32_t u_rowStride = 0; - int32_t v_rowStride = 0; - AImage_getPlaneRowStride(image, 0, &y_rowStride); - AImage_getPlaneRowStride(image, 1, &u_rowStride); - AImage_getPlaneRowStride(image, 2, &v_rowStride); - - uint8_t *y_data = 0; - uint8_t *u_data = 0; - uint8_t *v_data = 0; - int y_len = 0; - int u_len = 0; - int v_len = 0; - AImage_getPlaneData(image, 0, &y_data, &y_len); - AImage_getPlaneData(image, 1, &u_data, &u_len); - AImage_getPlaneData(image, 2, &v_data, &v_len); - - if (u_data == v_data + 1 && v_data == y_data + width * height && y_pixelStride == 1 && u_pixelStride == 2 && - v_pixelStride == 2 && - y_rowStride == width && u_rowStride == width && v_rowStride == width) { - // already nv21 :) - ((NdkCamera *) context)->on_image((unsigned char *) y_data, (int) width, (int) height); - } else { - // construct nv21 - unsigned char *nv21 = new unsigned char[width * height + width * height / 2]; - { - // Y - unsigned char *yptr = nv21; - for (int y = 0; y < height; y++) { - const unsigned char *y_data_ptr = y_data + y_rowStride * y; - for (int x = 0; x < width; x++) { - yptr[0] = y_data_ptr[0]; - yptr++; - y_data_ptr += y_pixelStride; - } - } - - // UV - unsigned char *uvptr = nv21 + width * height; - for (int y = 0; y < height / 2; y++) { - const unsigned char *v_data_ptr = v_data + v_rowStride * y; - const unsigned char *u_data_ptr = u_data + u_rowStride * y; - for (int x = 0; x < width / 2; x++) { - uvptr[0] = v_data_ptr[0]; - uvptr[1] = u_data_ptr[0]; - uvptr += 2; - v_data_ptr += v_pixelStride; - u_data_ptr += u_pixelStride; - } - } + AImage *image = 0; + media_status_t status = AImageReader_acquireLatestImage(reader, &image); + + if (status != AMEDIA_OK) + return; + + + int32_t format; + AImage_getFormat(image, &format); + + + int32_t width = 0; + int32_t height = 0; + AImage_getWidth(image, &width); + AImage_getHeight(image, &height); + + int32_t y_pixelStride = 0; + int32_t u_pixelStride = 0; + int32_t v_pixelStride = 0; + AImage_getPlanePixelStride(image, 0, &y_pixelStride); + AImage_getPlanePixelStride(image, 1, &u_pixelStride); + AImage_getPlanePixelStride(image, 2, &v_pixelStride); + + int32_t y_rowStride = 0; + int32_t u_rowStride = 0; + int32_t v_rowStride = 0; + AImage_getPlaneRowStride(image, 0, &y_rowStride); + AImage_getPlaneRowStride(image, 1, &u_rowStride); + AImage_getPlaneRowStride(image, 2, &v_rowStride); + + uint8_t *y_data = 0; + uint8_t *u_data = 0; + uint8_t *v_data = 0; + int y_len = 0; + int u_len = 0; + int v_len = 0; + AImage_getPlaneData(image, 0, &y_data, &y_len); + AImage_getPlaneData(image, 1, &u_data, &u_len); + AImage_getPlaneData(image, 2, &v_data, &v_len); + + if (u_data == v_data + 1 && v_data == y_data + width * height && y_pixelStride == 1 && u_pixelStride == 2 && + v_pixelStride == 2 && + y_rowStride == width && u_rowStride == width && v_rowStride == width) { + // already nv21 :) + ((NdkCamera *) context)->on_image((unsigned char *) y_data, (int) width, (int) height); + } else { + // construct nv21 + unsigned char *nv21 = new unsigned char[width * height + width * height / 2]; + { + // Y + unsigned char *yptr = nv21; + for (int y = 0; y < height; y++) { + const unsigned char *y_data_ptr = y_data + y_rowStride * y; + for (int x = 0; x < width; x++) { + yptr[0] = y_data_ptr[0]; + yptr++; + y_data_ptr += y_pixelStride; } + } - ((NdkCamera *) context)->on_image((unsigned char *) nv21, (int) width, (int) height); - - delete[] nv21; + // UV + unsigned char *uvptr = nv21 + width * height; + for (int y = 0; y < height / 2; y++) { + const unsigned char *v_data_ptr = v_data + v_rowStride * y; + const unsigned char *u_data_ptr = u_data + u_rowStride * y; + for (int x = 0; x < width / 2; x++) { + uvptr[0] = v_data_ptr[0]; + uvptr[1] = u_data_ptr[0]; + uvptr += 2; + v_data_ptr += v_pixelStride; + u_data_ptr += u_pixelStride; + } } + } + + ((NdkCamera *) context)->on_image((unsigned char *) nv21, (int) width, (int) height); - AImage_delete(image); - }); + delete[] nv21; } + AImage_delete(image); } @@ -420,7 +408,12 @@ void NdkCameraWindow::set_window(ANativeWindow *_win) { void NdkCameraWindow::on_image_render(cv::Mat &rgb) const { } +static std::chrono::system_clock::time_point start; +static std::chrono::system_clock::time_point end; + void NdkCameraWindow::on_image(const unsigned char *nv21, int nv21_width, int nv21_height) const { + + start = std::chrono::system_clock::now(); // resolve orientation from camera_orientation and accelerometer_sensor { if (!sensor_event_queue) { @@ -575,6 +568,12 @@ void NdkCameraWindow::on_image(const unsigned char *nv21, int nv21_width, int nv cv::Mat rgb_render(render_h, render_w, CV_8UC3); ncnn::kanna_rotate_c3(rgb.data, roi_w, roi_h, rgb_render.data, render_w, render_h, render_rotate_type); + // 속도개선위해 일부 데이터 스킵 + + end = std::chrono::system_clock::now(); + if ((end - start).count() % 2 == 0)return; + + ANativeWindow_setBuffersGeometry(win, render_w, render_h, AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM); ANativeWindow_Buffer buf; diff --git a/feature/camera/src/main/jni/yolo.cpp b/feature/camera/src/main/jni/yolo.cpp index 88752a5ef..eae0187cd 100644 --- a/feature/camera/src/main/jni/yolo.cpp +++ b/feature/camera/src/main/jni/yolo.cpp @@ -8,7 +8,7 @@ #include "cpu.h" -static const float prob_threshold = 0.4f; +static const float prob_threshold = 0.45f; static const float nms_threshold = 0.45f; static float fast_exp(float x) { @@ -215,9 +215,10 @@ Yolo::load(AAssetManager *mgr, const char *modeltype, int _target_size, const fl ncnn::set_omp_num_threads(ncnn::get_cpu_count()); yolo.opt = ncnn::Option(); + yolo.opt.use_vulkan_compute = false; #if NCNN_VULKAN - yolo.opt.use_vulkan_compute = use_gpu; + #endif yolo.opt.num_threads = ncnn::get_cpu_count(); diff --git a/feature/camera/src/main/jni/yolov8ncnn.cpp b/feature/camera/src/main/jni/yolov8ncnn.cpp index be6b4c2d9..7c68ee733 100644 --- a/feature/camera/src/main/jni/yolov8ncnn.cpp +++ b/feature/camera/src/main/jni/yolov8ncnn.cpp @@ -159,17 +159,15 @@ static std::vector objs; void MyNdkCamera::on_image_render(cv::Mat &rgb) const { { - //ncnn::MutexLockGuard g(lock); + ncnn::MutexLockGuard g(lock); if (g_yolo) { objs.clear(); g_yolo->detect(rgb, objs); g_yolo->draw(rgb, objs); - if (!objs.empty()) { - currentRgb = &rgb; - detectedObjects = objs; - } + currentRgb = &rgb; + detectedObjects = objs; } } @@ -188,7 +186,6 @@ Java_com_android_mediproject_feature_camera_aimodel_Yolo_detectedObjects(JNIEnv env->FindClass("com/android/mediproject/feature/camera/aimodel/DetectedObject"), nullptr); - __android_log_print(ANDROID_LOG_DEBUG, "finalDetectedObjects", "finalDetectedObjects size: %d", finalDetectedObjects->size()); for (int i = 0; i < finalDetectedObjects->size(); i++) { cv::Mat croppedMat = finalRgb->operator()(finalDetectedObjects->at(i)); @@ -315,12 +312,12 @@ Java_com_android_mediproject_feature_camera_aimodel_Yolo_setOutputWindow(JNIEnv extern "C" JNIEXPORT jobject JNICALL Java_com_android_mediproject_feature_camera_aimodel_Yolo_getCurrentImage(JNIEnv *env, jobject thiz) { - if (currentRgb != nullptr) { + if (currentRgb != nullptr) delete finalRgb; - } - if (finalDetectedObjects != nullptr) { + + if (finalDetectedObjects != nullptr) delete finalDetectedObjects; - } + finalRgb = new cv::Mat(); currentRgb->copyTo(*finalRgb); @@ -329,12 +326,6 @@ Java_com_android_mediproject_feature_camera_aimodel_Yolo_getCurrentImage(JNIEnv for (int i = 0; i < detectedObjects.size(); i++) { finalDetectedObjects->push_back(detectedObjects[i].rect); } - - __android_log_print(ANDROID_LOG_DEBUG, "detectedObjects", "detectedObjects size: %d", detectedObjects.size()); - - __android_log_print(ANDROID_LOG_DEBUG, "finalDetectedObjects", "finalDetectedObjects size: %d", finalDetectedObjects->size()); - - cv::cvtColor(*finalRgb, *finalRgb, cv::COLOR_BGR2RGB); std::string base64String = mat2str(*finalRgb);