diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index ec86ce4c84670..6c579da52d917 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -151,6 +151,172 @@ Status ReadBinaryFromFile(const std::string& file_path, uint8_t* buffer, size_t return Status::OK(); } +bool QnnBackendManager::IsTimerThreadRunning() { + std::chrono::microseconds remainUs = std::chrono::microseconds::zero(); + unsigned long remaining_duration = 0; + if (timer_->TimerInUse() && timer_->RemainingDuration(remainUs)) { + remaining_duration = static_cast(remainUs.count()); + return remaining_duration > 0 && remaining_duration < timer_resource_.sustained_timer_duration_; + } + return false; +} + +Status QnnBackendManager::SetSustainedHighPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode) { + std::lock_guard lk(perf_mutex_); + Status status = Status::OK(); + + std::chrono::microseconds sustainedDurationMs(timer_resource_.sustained_timer_duration_); + + switch (graph_state_) { + case GraphState::RUN_DONE: + if (IsTimerThreadRunning()) { + timer_->AbortTimer(); + } + ORT_RETURN_IF_NOT(timer_->Launch(sustainedDurationMs), "Not able to launch timer thread."); + graph_state_ = GraphState::NONE; + timer_resource_.caller_busy_ = false; + break; + case GraphState::RUN_START: + if (IsTimerThreadRunning()) { + timer_->AbortTimer(); + } else { + status = SetHtpPowerConfig(htp_power_config_client_id, performance_mode); + } + graph_state_ = GraphState::NONE; + timer_resource_.caller_busy_ = true; + break; + case GraphState::INIT_DONE: + status = SetRelaxedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT); + graph_state_ = GraphState::NONE; + timer_resource_.caller_busy_ = false; + break; + case GraphState::INIT_START: + if (IsTimerThreadRunning()) { + timer_->AbortTimer(); + } else { + status = SetHtpPowerConfig(htp_power_config_client_id, performance_mode); + } + graph_state_ = GraphState::NONE; + timer_resource_.caller_busy_ = true; + break; + case GraphState::TIMEOUT: + if (!timer_resource_.caller_busy_) { + status = SetRelaxedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT); + graph_state_ = GraphState::NONE; + } + break; + default: + LOGS(*logger_, VERBOSE) << "Invalid graph state"; + break; + } + return status; +} + +Status QnnBackendManager::SetPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode) { + std::lock_guard lk(perf_mutex_); + Status status = Status::OK(); + switch (graph_state_) { + case GraphState::RUN_DONE: + case GraphState::INIT_DONE: + switch (performance_mode) { + case qnn::HtpPerformanceMode::kHtpLowBalanced: + case qnn::HtpPerformanceMode::kHtpBalanced: + case qnn::HtpPerformanceMode::kHtpHighPerformance: + status = SetRelaxedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT); + break; + case qnn::HtpPerformanceMode::kHtpExtremePowerSaver: + status = SetExtremeLowPerfPowerConfig(htp_power_config_client_id); + break; + case qnn::HtpPerformanceMode::kHtpLowPowerSaver: + case qnn::HtpPerformanceMode::kHtpHighPowerSaver: + case qnn::HtpPerformanceMode::kHtpPowerSaver: + status = SetReleasedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT); + break; + default: + LOGS(*logger_, VERBOSE) << "Invalid performance mode"; + break; + } + graph_state_ = GraphState::NONE; + break; + case GraphState::RUN_START: + case GraphState::INIT_START: + status = SetHtpPowerConfig(htp_power_config_client_id, performance_mode); + graph_state_ = GraphState::NONE; + break; + default: + LOGS(*logger_, VERBOSE) << "Invalid graph state"; + break; + } + return status; +} + +Status QnnBackendManager::SetState(GraphState state, uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode perfMode) { + std::lock_guard lk(state_mutex_); + if (state != graph_state_) { + graph_state_ = state; + if (perfMode == qnn::HtpPerformanceMode::kHtpSustainedHighPerformance || perfMode == qnn::HtpPerformanceMode::kHtpBurst) { + ORT_RETURN_IF(timer_ == nullptr, "timer is not started"); + return SetSustainedHighPerformance(htp_power_config_client_id, perfMode); + } else if (perfMode == qnn::HtpPerformanceMode::kHtpDefault) { + return Status::OK(); + } else { + return SetPerformance(htp_power_config_client_id, perfMode); + } + } + return Status::OK(); +} + +void QnnBackendManager::TimerCallback(void* user_data) { + TimerCallbackArg* args = static_cast(user_data); + QnnBackendManager* instance = args->instance_; + auto rt = instance->SetState(GraphState::TIMEOUT, args->power_config_id_, qnn::HtpPerformanceMode::kHtpSustainedHighPerformance); + if (rt != Status::OK()) { + LOGS_DEFAULT(VERBOSE) << "State update failed"; + } +} + +void QnnBackendManager::CreateTimerThread(uint32_t htp_power_config_client_id) { + std::lock_guard lk(state_mutex_); + if (timer_ == nullptr) { + std::unique_ptr temp(new Timer()); + if (temp != nullptr) { + timer_ = std::move(temp); + timer_callback_arg_ = std::make_unique(htp_power_config_client_id, this); + if (timer_callback_arg_ == nullptr) { + LOGS(*logger_, VERBOSE) << "Failed to create Timer argument"; + timer_.reset(); + return; + } + if (!timer_->Initialize(TimerCallback, timer_callback_arg_.get())) { + LOGS(*logger_, VERBOSE) << "Failed to create timer to set performance"; + timer_callback_arg_.reset(); + timer_.reset(); + } + } else { + LOGS(*logger_, VERBOSE) << "Failed: Timer is nullptr"; + } + } else { + LOGS(*logger_, VERBOSE) << "Timer already created"; + } +} + +void QnnBackendManager::ReleaseTimerThread(uint32_t htp_power_config_client_id) { + std::lock_guard lk(state_mutex_); + if (timer_ != nullptr) { + timer_->DeInitialize(); + graph_state_ = GraphState::NONE; + timer_resource_.caller_busy_ = false; + } + + timer_callback_arg_.reset(); + timer_.reset(); + + auto status = SetReleasedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT); + if (status != Status::OK()) { + LOGS_DEFAULT(VERBOSE) << "Not able to set Power config to relaxed"; + } +} + Status QnnBackendManager::ParseLoraConfig(std::string lora_config_path) { LOGS_DEFAULT(INFO) << "Acquiring the QnnInterface " << lora_config_path; @@ -899,7 +1065,6 @@ Status QnnBackendManager::CreateContext(bool enable_htp_weight_sharing) { LOGS_DEFAULT(INFO) << "Context created already."; return Status::OK(); } - QnnContext_Config_t context_config_weight_sharing = QNN_CONTEXT_CONFIG_INIT; QnnHtpContext_CustomConfig_t custom_config; custom_config.option = QNN_HTP_CONTEXT_CONFIG_OPTION_WEIGHT_SHARING_ENABLED; @@ -1336,10 +1501,148 @@ Status QnnBackendManager::SetupBackend(const logging::Logger& logger, LOGS_DEFAULT(WARNING) << "Failed to setup so cleaning up"; ReleaseResources(); } - return status; } +Status QnnBackendManager::SetExtremeLowPerfPowerConfig(uint32_t htp_power_config_client_id) { + QnnDevice_Infrastructure_t qnn_device_infra = nullptr; + auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); + ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); + + auto* htp_infra = static_cast(qnn_device_infra); + ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType, + "HTP infra type = ", htp_infra->infraType, ", which is not perf infra type."); + QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra; + + constexpr const int kNumConfigs = 1; + std::vector power_configs( + kNumConfigs); + QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0]; + dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; + QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config; + dcvs_v3.contextId = htp_power_config_client_id; + dcvs_v3.dcvsEnable = 1; + dcvs_v3.setDcvsEnable = 1; + dcvs_v3.sleepLatency = kSleepHigherLatency; + dcvs_v3.setSleepLatency = 1; + dcvs_v3.sleepDisable = 0; + dcvs_v3.setSleepDisable = 0; + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.setBusParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.setCoreParams = 1; + + std::vector perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs); + status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data()); + ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode."); + return Status::OK(); +} + +Status QnnBackendManager::SetRelaxedPerfPowerConfig(uint32_t htp_power_config_client_id, DcvsState_t dcvsState) { + QnnDevice_Infrastructure_t qnn_device_infra = nullptr; + auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); + ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); + + auto* htp_infra = static_cast(qnn_device_infra); + ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType, + "HTP infra type = ", htp_infra->infraType, ", which is not perf infra type."); + QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra; + + constexpr const int kNumConfigs = 1; + std::vector power_configs( + kNumConfigs); + QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0]; + dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; + QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config; + dcvs_v3.contextId = htp_power_config_client_id; + dcvs_v3.dcvsEnable = 1; + dcvs_v3.setDcvsEnable = 1; + dcvs_v3.sleepLatency = kSleepHighLatency; + dcvs_v3.setSleepLatency = 1; + dcvs_v3.sleepDisable = 0; + dcvs_v3.setSleepDisable = 0; + if (dcvsState == DcvsState_t::DCVS_ENABLE) { + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_UP_DOWN; + } else { + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE; + } + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.setBusParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.setCoreParams = 1; + + std::vector perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs); + status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data()); + ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode."); + + return Status::OK(); +} + +Status QnnBackendManager::SetReleasedPerfPowerConfig(uint32_t htp_power_config_client_id, DcvsState_t dcvsState) { + QnnDevice_Infrastructure_t qnn_device_infra = nullptr; + auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); + ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); + + auto* htp_infra = static_cast(qnn_device_infra); + ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType, + "HTP infra type = ", htp_infra->infraType, ", which is not perf infra type."); + QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra; + constexpr const int kNumConfigs = 1; + std::vector power_configs( + kNumConfigs); + QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0]; + dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; + QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config; + dcvs_v3.contextId = htp_power_config_client_id; + dcvs_v3.dcvsEnable = 1; + dcvs_v3.setDcvsEnable = 1; + dcvs_v3.sleepLatency = kSleepHigherLatency; + dcvs_v3.setSleepLatency = 1; + dcvs_v3.sleepDisable = 0; + dcvs_v3.setSleepDisable = 0; + + if (dcvsState == DcvsState_t::DCVS_ENABLE) { + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_UP_DOWN; + } else { + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE; + } + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.setBusParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER; + dcvs_v3.setCoreParams = 1; + + std::vector perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs); + status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data()); + ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode."); + + return Status::OK(); +} + +Status QnnBackendManager::InitializePowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) { + ORT_RETURN_IF_ERROR(CreateHtpPowerCfgId(device_id, core_id, htp_power_config_id)); + CreateTimerThread(htp_power_config_id); + return Status::OK(); +} + +Status QnnBackendManager::DeInitializePowerCfgId(uint32_t htp_power_config_id) { + ReleaseTimerThread(htp_power_config_id); + ORT_RETURN_IF_ERROR(DestroyHTPPowerConfigID(htp_power_config_id)); + return Status::OK(); +} + Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) { // This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned // to a different EP. Therefore, we have to check that backend setup actually completed before trying to diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h index 75ab01013bdfd..c990400b76f2c 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h @@ -29,6 +29,7 @@ #include "core/providers/qnn/builder/qnn_def.h" #include "core/providers/qnn/builder/qnn_profile_serializer.h" #include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h" +#include "core/providers/qnn/builder/timer.h" namespace onnxruntime { namespace qnn { @@ -119,6 +120,22 @@ struct QnnBackendManagerConfig { bool skip_qnn_version_check; }; +enum class DcvsState_t { + DCVS_DEFAULT = 0, + DCVS_DISABLE = 1, + DCVS_ENABLE = 2, + DCVS_NUM_STATES +}; + +enum class GraphState { + INIT_START, + INIT_DONE, + RUN_START, + RUN_DONE, + TIMEOUT, + NONE +}; + class QnnBackendManager : public std::enable_shared_from_this { private: // private tag to pass to constructor to ensure that constructor cannot be directly called externally @@ -163,7 +180,9 @@ class QnnBackendManager : public std::enable_shared_from_this bool enable_vtcm_backup_buffer_sharing, std::unordered_map>>& context_bin_map); - Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id); + Status InitializePowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id); + + Status DeInitializePowerCfgId(uint32_t htp_power_config_id); Status SetHtpPowerConfig(uint32_t htp_power_config_client_id, HtpPerformanceMode htp_performance_mode); @@ -207,8 +226,6 @@ class QnnBackendManager : public std::enable_shared_from_this const std::string& GetSdkVersion() { return sdk_build_version_; } - Status DestroyHTPPowerConfigID(uint32_t htp_power_config_id); - Status GetMaxSpillFillBufferSize(unsigned char* buffer, uint64_t buffer_length, uint64_t& max_spill_fill_buffer_size); @@ -239,6 +256,8 @@ class QnnBackendManager : public std::enable_shared_from_this bool ProfilingEnabled() { return profiling_enabled_; } #endif + Status SetState(GraphState state, uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode perfMode); + private: Status LoadBackend(); @@ -283,6 +302,28 @@ class QnnBackendManager : public std::enable_shared_from_this void* LibFunction(void* handle, const char* symbol, std::string& error_msg); + bool IsTimerThreadRunning(); + + Status SetRelaxedPerfPowerConfig(uint32_t htp_power_config_client_id, DcvsState_t dcvsState); + + Status SetReleasedPerfPowerConfig(uint32_t htp_power_config_client_id, DcvsState_t dcvsState); + + Status SetExtremeLowPerfPowerConfig(uint32_t htp_power_config_client_id); + + Status SetSustainedHighPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode); + + Status SetPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode); + + static void TimerCallback(void* user_data); + + Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id); + + Status DestroyHTPPowerConfigID(uint32_t htp_power_config_id); + + void CreateTimerThread(uint32_t htp_power_config_client_id); + + void ReleaseTimerThread(uint32_t htp_power_config_client_id); + template inline T ResolveSymbol(void* lib_handle, const char* sym, const logging::Logger& logger) { std::string error_msg = ""; @@ -463,6 +504,23 @@ class QnnBackendManager : public std::enable_shared_from_this QnnHtpDevice_Arch_t htp_arch_ = QNN_HTP_DEVICE_ARCH_NONE; uint32_t soc_model_ = QNN_SOC_MODEL_UNKNOWN; const std::vector op_packages_; + std::mutex perf_mutex_; + std::mutex state_mutex_; + std::unique_ptr timer_; + struct TimerResource { + static const unsigned long sustained_timer_duration_ = 300000; + std::atomic caller_busy_ = false; + }; + TimerResource timer_resource_; + std::atomic graph_state_ = GraphState::NONE; + struct TimerCallbackArg { + uint32_t power_config_id_; + QnnBackendManager* instance_; + + TimerCallbackArg(uint32_t id, QnnBackendManager* manager) + : power_config_id_(id), instance_(manager) {} + }; + std::unique_ptr timer_callback_arg_; bool skip_qnn_version_check_ = false; }; diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index 42f4d7bb60f34..f7c398ff8dfba 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -109,6 +109,7 @@ constexpr const int kSleepMinLatency = 40; constexpr const int kSleepLowLatency = 100; constexpr const int kSleepMediumLatency = 1000; constexpr const int kSleepHighLatency = 2000; +constexpr const int kSleepHigherLatency = 65535; constexpr const int kDcvsDisable = 0; constexpr const int kDcvsEnable = 1; diff --git a/onnxruntime/core/providers/qnn/builder/timer.cc b/onnxruntime/core/providers/qnn/builder/timer.cc new file mode 100644 index 0000000000000..9d4ae84104c98 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/timer.cc @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#include "timer.h" + +void Timer::DeInitialize() { + std::unique_lock lk(mtx_); + is_timer_deinit_ = true; + cv_.notify_all(); + lk.unlock(); + if (bkg_thread_.joinable()) { + bkg_thread_.join(); + } +} + +Timer::~Timer() { this->DeInitialize(); } + +void Timer::BkgTimer() { + { + std::unique_lock lk(mtx_); + thread_status_ = threadState::IDLE; + cv_.notify_all(); + } + while (true) { + std::unique_lock lk(mtx_); + + if (thread_status_ == threadState::IDLE) { + cv_.wait(lk, [&]() { + return is_timer_launched_ || is_timer_stopped_ || is_timer_deinit_; + }); + } + + if (is_timer_deinit_) { + thread_status_ = threadState::DEINIT; + is_timer_deinit_ = false; + return; + } + + if (is_timer_stopped_) { + thread_status_ = threadState::IDLE; + is_timer_stopped_ = false; + cv_.notify_all(); + } + + if (thread_status_ == threadState::LAUNCH) { + bool isElapsed = !cv_.wait_until(lk, end_time_, [&]() { + return is_timer_stopped_ || is_timer_deinit_; + }); + if (isElapsed) { + thread_status_ = threadState::CALLING; + lk.unlock(); + timeout_fn_(timeout_arg_); + lk.lock(); + thread_status_ = threadState::IDLE; + } + is_timer_launched_ = false; + } + } +} + +bool Timer::Initialize(std::function callbackFn, void* callbackArg) { + std::unique_lock lk(mtx_); + timeout_arg_ = callbackArg; + timeout_fn_ = callbackFn; + bkg_thread_ = std::thread(&Timer::BkgTimer, this); + cv_.wait(lk, [&] { return thread_status_ == threadState::IDLE; }); + return true; +} + +void Timer::AbortTimer() { + std::unique_lock lk(mtx_); + is_timer_stopped_ = true; + cv_.notify_all(); + cv_.wait(lk, [&] { return thread_status_ == threadState::IDLE; }); +} + +bool Timer::TimerInUse() { + return thread_status_ == threadState::LAUNCH; +} diff --git a/onnxruntime/core/providers/qnn/builder/timer.h b/onnxruntime/core/providers/qnn/builder/timer.h new file mode 100644 index 0000000000000..8fae4313aba6e --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/timer.h @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#include +#include +#include +#include +#include +#include + +class Timer { + public: + enum class threadState { + IDLE, // Timer is created + LAUNCH, // Timer starts counting down + CALLING, // Callback function is called + DEINIT // Timer is deinit + }; + // constructor + Timer() = default; + // destructor + ~Timer(); + + template + bool RemainingDuration(std::chrono::duration& duration) { + std::unique_lock lk(mtx_); + if (thread_status_ == threadState::LAUNCH) { + duration = std::chrono::duration_cast>(end_time_ - std::chrono::steady_clock::now()); + return true; + } else if (thread_status_ == threadState::CALLING || thread_status_ == threadState::IDLE) { + duration = std::chrono::duration::zero(); + return true; + } else { + duration = std::chrono::duration::zero(); + return false; + } + } + + template + bool Launch(const std::chrono::duration& timeoutVal) { + std::unique_lock lk(mtx_); + if (thread_status_ != threadState::IDLE) { + return false; + } + end_time_ = std::chrono::steady_clock::now() + timeoutVal; + thread_status_ = threadState::LAUNCH; + is_timer_launched_ = true; + cv_.notify_all(); + return true; + } + + bool Initialize(std::function callbackFn, void* callbackArg); + void DeInitialize(); + void AbortTimer(); + + bool TimerInUse(); + + private: + std::thread bkg_thread_; + void BkgTimer(); + std::mutex mtx_; + std::condition_variable cv_; + std::function timeout_fn_; + void* timeout_arg_{nullptr}; + std::atomic thread_status_{threadState::DEINIT}; + std::chrono::time_point end_time_; + std::atomic is_timer_stopped_ = false; + std::atomic is_timer_deinit_ = false; + std::atomic is_timer_launched_ = false; +}; diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index fcf25a04b656a..29a278a6b4d37 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -1203,7 +1203,15 @@ Status QNNExecutionProvider::CompileFromOrtGraph(const std::vectorComposeGraph(graph_viewer, fused_node, model_settings_, logger, all_graph_configs_ptr, json_graph_filepath)); - ORT_RETURN_IF_ERROR(qnn_model->FinalizeGraphs(logger)); + + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetState(onnxruntime::qnn::GraphState::INIT_START, GetPerThreadContext().GetHtpPowerConfigId(), default_htp_performance_mode_)); + + auto rtVal = qnn_model->FinalizeGraphs(logger); + + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetState(onnxruntime::qnn::GraphState::INIT_DONE, GetPerThreadContext().GetHtpPowerConfigId(), default_htp_performance_mode_)); + + ORT_RETURN_IF_ERROR(rtVal); + ORT_RETURN_IF_ERROR(qnn_model->SetupQnnInputOutput(logger)); LOGS(logger, VERBOSE) << "fused node name: " << fused_node.Name(); @@ -1270,12 +1278,18 @@ Status QNNExecutionProvider::Compile(const std::vector& fused for (auto main_context_pos : main_context_pos_list) { const onnxruntime::GraphViewer& main_ctx_graph_viewer(fused_nodes_and_graphs[main_context_pos].filtered_graph); // Create QNN context from the cached binary, deserialize the QNN graph from the binary - ORT_RETURN_IF_ERROR(qnn::LoadQnnCtxFromOnnxGraph(main_ctx_graph_viewer, - context_model_path, - qnn_backend_manager_.get(), - qnn_models, - logger, - max_spill_fill_size)); + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetState(onnxruntime::qnn::GraphState::INIT_START, GetPerThreadContext().GetHtpPowerConfigId(), default_htp_performance_mode_)); + + Status rtVal = qnn::LoadQnnCtxFromOnnxGraph(main_ctx_graph_viewer, + context_model_path, + qnn_backend_manager_.get(), + qnn_models, + logger, + max_spill_fill_size); + + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetState(onnxruntime::qnn::GraphState::INIT_DONE, GetPerThreadContext().GetHtpPowerConfigId(), default_htp_performance_mode_)); + + ORT_RETURN_IF_ERROR(rtVal); } for (auto fused_node_and_graph : fused_nodes_and_graphs) { @@ -1363,19 +1377,14 @@ const InlinedVector QNNExecutionProvider::GetEpContextNodes() const QNNExecutionProvider::PerThreadContext::PerThreadContext(qnn::QnnBackendManager* qnn_backend_manager, uint32_t device_id, uint32_t core_id, - qnn::HtpPerformanceMode default_htp_performance_mode, uint32_t default_rpc_control_latency, uint32_t default_rpc_polling_time) : qnn_backend_manager_(qnn_backend_manager) { - Status rt = qnn_backend_manager_->CreateHtpPowerCfgId(device_id, core_id, htp_power_config_id_); + Status rt = qnn_backend_manager_->InitializePowerCfgId(device_id, core_id, htp_power_config_id_); is_htp_power_config_id_valid_ = rt.IsOK(); - // default_htp_performance_mode and default_rpc_control_latency are from QNN EP option. + // default_rpc_control_latency are from QNN EP option. // set it once only for each thread as default so user don't need to set it for every session run if (is_htp_power_config_id_valid_) { - if (qnn::HtpPerformanceMode::kHtpDefault != default_htp_performance_mode) { - ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetHtpPowerConfig(htp_power_config_id_, - default_htp_performance_mode)); - } if (default_rpc_control_latency > 0 || default_rpc_polling_time > 0) { ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetRpcPowerConfigs(htp_power_config_id_, default_rpc_control_latency, @@ -1386,7 +1395,7 @@ QNNExecutionProvider::PerThreadContext::PerThreadContext(qnn::QnnBackendManager* QNNExecutionProvider::PerThreadContext::~PerThreadContext() { if (is_htp_power_config_id_valid_) { - ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->DestroyHTPPowerConfigID(htp_power_config_id_)); + ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->DeInitializePowerCfgId(htp_power_config_id_)); } } @@ -1410,7 +1419,7 @@ QNNExecutionProvider::PerThreadContext& QNNExecutionProvider::GetPerThreadContex if (context_state_.retired_context_pool.empty()) { uint32_t core_id = 0; context = std::make_shared(qnn_backend_manager_.get(), device_id_, core_id, - default_htp_performance_mode_, default_rpc_control_latency_, + default_rpc_control_latency_, default_rpc_polling_time_); } else { context = context_state_.retired_context_pool.back(); @@ -1486,8 +1495,9 @@ Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_optio if (GetPerThreadContext().IsHtpPowerConfigIdValid()) { if (qnn::HtpPerformanceMode::kHtpDefault != htp_performance_mode) { - ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetHtpPowerConfig(GetPerThreadContext().GetHtpPowerConfigId(), - htp_performance_mode)); + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetState(onnxruntime::qnn::GraphState::RUN_START, GetPerThreadContext().GetHtpPowerConfigId(), htp_performance_mode)); + } else if (qnn::HtpPerformanceMode::kHtpDefault != default_htp_performance_mode_) { + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetState(onnxruntime::qnn::GraphState::RUN_START, GetPerThreadContext().GetHtpPowerConfigId(), default_htp_performance_mode_)); } if (rpc_control_latency > 0 || rpc_polling_time > 0) { @@ -1502,7 +1512,6 @@ Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_optio LOGS_DEFAULT(VERBOSE) << "lora_config: " << lora_config; ORT_RETURN_IF_ERROR(qnn_backend_manager_->ParseLoraConfig(lora_config)); } - return Status::OK(); } @@ -1521,12 +1530,12 @@ Status QNNExecutionProvider::OnRunEnd(bool /*sync_stream*/, const onnxruntime::R ParseHtpPerformanceMode(htp_perf_mode, htp_performance_mode); } - if (qnn::HtpPerformanceMode::kHtpDefault != htp_performance_mode) { - if (!GetPerThreadContext().IsHtpPowerConfigIdValid()) { - return Status::OK(); + if (GetPerThreadContext().IsHtpPowerConfigIdValid()) { + if (qnn::HtpPerformanceMode::kHtpDefault != htp_performance_mode) { + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetState(onnxruntime::qnn::GraphState::RUN_DONE, GetPerThreadContext().GetHtpPowerConfigId(), htp_performance_mode)); + } else if (qnn::HtpPerformanceMode::kHtpDefault != default_htp_performance_mode_) { + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetState(onnxruntime::qnn::GraphState::RUN_DONE, GetPerThreadContext().GetHtpPowerConfigId(), default_htp_performance_mode_)); } - ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetHtpPowerConfig(GetPerThreadContext().GetHtpPowerConfigId(), - htp_performance_mode)); } return Status::OK(); diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index 6adf613932d66..000993c5e716c 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -119,7 +119,6 @@ class QNNExecutionProvider : public IExecutionProvider { public: PerThreadContext(qnn::QnnBackendManager* qnn_backend_manager, uint32_t device_id, uint32_t core_id, - qnn::HtpPerformanceMode default_htp_performance_mode, uint32_t default_rpc_control_latency, uint32_t default_rpc_polling_time); ~PerThreadContext(); diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index 87ca6e32c82f9..a86adb9abb007 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -851,6 +851,60 @@ TEST_F(QnnHTPBackendTests, MultithreadDefaultHtpPowerCfgFromEpOption) { } } +// Tests running a single session in multiple threads on the HTP backend with EP option to set default power config to sustained high performance +TEST_F(QnnHTPBackendTests, MultithreadSustainedHighPowerCfgFromEpOption) { + std::unique_ptr model; + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + std::vector shape = {1, 3, 2}; + std::vector> output_shapes = {shape}; + std::vector> output_values = {{3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}}; + + CreateModelInMemory(model, + QDQBuildAdd3Tensors(TestInputDef(shape, false, input_data), + TestInputDef(shape, false, input_data), + TestInputDef(shape, false, input_data)), + "add3.qdq"); + + SessionOptions session_opts; + session_opts.session_logid = "logger0"; + + RunOptions run_opts; + run_opts.run_tag = session_opts.session_logid; + + InferenceSession session_obj{session_opts, GetEnvironment()}; + onnxruntime::ProviderOptions options; + +#if defined(_WIN32) + options["backend_path"] = "QnnHtp.dll"; +#else + options["backend_path"] = "libQnnHtp.so"; +#endif + options["offload_graph_io_quantization"] = "0"; + options["htp_performance_mode"] = "sustained_high_performance"; + + auto qnn_ep = QnnExecutionProviderWithOptions(options, &session_opts); + EXPECT_TRUE(session_obj.RegisterExecutionProvider(std::move(qnn_ep)).IsOK()); + + auto status = session_obj.Load(model->model_data.data(), static_cast(model->model_data.size())); + ASSERT_TRUE(status.IsOK()); + status = session_obj.Initialize(); + ASSERT_TRUE(status.IsOK()); + + std::vector threads; + constexpr int num_threads = 5; + constexpr int loop_count = 10; + + for (int i = 0; i < num_threads; i++) { + threads.push_back(std::thread(RunSessionAndVerify, std::ref(session_obj), run_opts, + model->builder.feeds_, model->builder.output_names_, + output_shapes, output_values, loop_count)); + } + + for (auto& th : threads) { + th.join(); + } +} + // Tests running a single session in multiple threads on the HTP backend with // EP option to set default power config + run option to set power config for each run TEST_F(QnnHTPBackendTests, MultithreadHtpPowerCfgDefaultAndRunOption) {