Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
307 changes: 305 additions & 2 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,172 @@ Status ReadBinaryFromFile(const std::string& file_path, uint8_t* buffer, size_t
return Status::OK();
}

bool QnnBackendManager::IsTimerThreadRunning() {
std::chrono::microseconds remainUs = std::chrono::microseconds::zero();
unsigned long remaining_duration = 0;
if (timer_->TimerInUse() && timer_->RemainingDuration(remainUs)) {
remaining_duration = static_cast<unsigned long>(remainUs.count());
return remaining_duration > 0 && remaining_duration < timer_resource_.sustained_timer_duration_;
}
return false;
}

Status QnnBackendManager::SetSustainedHighPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode) {
std::lock_guard<std::mutex> lk(perf_mutex_);
Status status = Status::OK();

std::chrono::microseconds sustainedDurationMs(timer_resource_.sustained_timer_duration_);

switch (graph_state_) {
case GraphState::RUN_DONE:
if (IsTimerThreadRunning()) {
timer_->AbortTimer();
}
ORT_RETURN_IF_NOT(timer_->Launch(sustainedDurationMs), "Not able to launch timer thread.");
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = false;
break;
case GraphState::RUN_START:
if (IsTimerThreadRunning()) {
timer_->AbortTimer();
} else {
status = SetHtpPowerConfig(htp_power_config_client_id, performance_mode);
}
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = true;
break;
case GraphState::INIT_DONE:
status = SetRelaxedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = false;
break;
case GraphState::INIT_START:
if (IsTimerThreadRunning()) {
timer_->AbortTimer();
} else {
status = SetHtpPowerConfig(htp_power_config_client_id, performance_mode);
}
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = true;
break;
case GraphState::TIMEOUT:
if (!timer_resource_.caller_busy_) {
status = SetRelaxedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
graph_state_ = GraphState::NONE;
}
break;
default:
LOGS(*logger_, VERBOSE) << "Invalid graph state";
break;
}
return status;
}

Status QnnBackendManager::SetPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode) {
std::lock_guard<std::mutex> lk(perf_mutex_);
Status status = Status::OK();
switch (graph_state_) {
case GraphState::RUN_DONE:
case GraphState::INIT_DONE:
switch (performance_mode) {
case qnn::HtpPerformanceMode::kHtpLowBalanced:
case qnn::HtpPerformanceMode::kHtpBalanced:
case qnn::HtpPerformanceMode::kHtpHighPerformance:
status = SetRelaxedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
break;
case qnn::HtpPerformanceMode::kHtpExtremePowerSaver:
status = SetExtremeLowPerfPowerConfig(htp_power_config_client_id);
break;
case qnn::HtpPerformanceMode::kHtpLowPowerSaver:
case qnn::HtpPerformanceMode::kHtpHighPowerSaver:
case qnn::HtpPerformanceMode::kHtpPowerSaver:
status = SetReleasedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
break;
default:
LOGS(*logger_, VERBOSE) << "Invalid performance mode";
break;
}
graph_state_ = GraphState::NONE;
break;
case GraphState::RUN_START:
case GraphState::INIT_START:
status = SetHtpPowerConfig(htp_power_config_client_id, performance_mode);
graph_state_ = GraphState::NONE;
break;
default:
LOGS(*logger_, VERBOSE) << "Invalid graph state";
break;
}
return status;
}

Status QnnBackendManager::SetState(GraphState state, uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode perfMode) {
std::lock_guard<std::mutex> lk(state_mutex_);
if (state != graph_state_) {
graph_state_ = state;
if (perfMode == qnn::HtpPerformanceMode::kHtpSustainedHighPerformance || perfMode == qnn::HtpPerformanceMode::kHtpBurst) {
ORT_RETURN_IF(timer_ == nullptr, "timer is not started");
return SetSustainedHighPerformance(htp_power_config_client_id, perfMode);
} else if (perfMode == qnn::HtpPerformanceMode::kHtpDefault) {
return Status::OK();
} else {
return SetPerformance(htp_power_config_client_id, perfMode);
}
}
return Status::OK();
}

void QnnBackendManager::TimerCallback(void* user_data) {
TimerCallbackArg* args = static_cast<TimerCallbackArg*>(user_data);
QnnBackendManager* instance = args->instance_;
auto rt = instance->SetState(GraphState::TIMEOUT, args->power_config_id_, qnn::HtpPerformanceMode::kHtpSustainedHighPerformance);
if (rt != Status::OK()) {
LOGS_DEFAULT(VERBOSE) << "State update failed";
}
}

void QnnBackendManager::CreateTimerThread(uint32_t htp_power_config_client_id) {
std::lock_guard<std::mutex> lk(state_mutex_);
if (timer_ == nullptr) {
std::unique_ptr<Timer> temp(new Timer());
if (temp != nullptr) {
timer_ = std::move(temp);
timer_callback_arg_ = std::make_unique<TimerCallbackArg>(htp_power_config_client_id, this);
if (timer_callback_arg_ == nullptr) {
LOGS(*logger_, VERBOSE) << "Failed to create Timer argument";
timer_.reset();
return;
}
if (!timer_->Initialize(TimerCallback, timer_callback_arg_.get())) {
LOGS(*logger_, VERBOSE) << "Failed to create timer to set performance";
timer_callback_arg_.reset();
timer_.reset();
}
} else {
LOGS(*logger_, VERBOSE) << "Failed: Timer is nullptr";
}
} else {
LOGS(*logger_, VERBOSE) << "Timer already created";
}
}

void QnnBackendManager::ReleaseTimerThread(uint32_t htp_power_config_client_id) {
std::lock_guard<std::mutex> lk(state_mutex_);
if (timer_ != nullptr) {
timer_->DeInitialize();
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = false;
}

timer_callback_arg_.reset();
timer_.reset();

auto status = SetReleasedPerfPowerConfig(htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
if (status != Status::OK()) {
LOGS_DEFAULT(VERBOSE) << "Not able to set Power config to relaxed";
}
}

Status QnnBackendManager::ParseLoraConfig(std::string lora_config_path) {
LOGS_DEFAULT(INFO) << "Acquiring the QnnInterface " << lora_config_path;

Expand Down Expand Up @@ -899,7 +1065,6 @@ Status QnnBackendManager::CreateContext(bool enable_htp_weight_sharing) {
LOGS_DEFAULT(INFO) << "Context created already.";
return Status::OK();
}

QnnContext_Config_t context_config_weight_sharing = QNN_CONTEXT_CONFIG_INIT;
QnnHtpContext_CustomConfig_t custom_config;
custom_config.option = QNN_HTP_CONTEXT_CONFIG_OPTION_WEIGHT_SHARING_ENABLED;
Expand Down Expand Up @@ -1336,10 +1501,148 @@ Status QnnBackendManager::SetupBackend(const logging::Logger& logger,
LOGS_DEFAULT(WARNING) << "Failed to setup so cleaning up";
ReleaseResources();
}

return status;
}

Status QnnBackendManager::SetExtremeLowPerfPowerConfig(uint32_t htp_power_config_client_id) {
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");

auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;

constexpr const int kNumConfigs = 1;
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> power_configs(
kNumConfigs);
QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0];
dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config;
dcvs_v3.contextId = htp_power_config_client_id;
dcvs_v3.dcvsEnable = 1;
dcvs_v3.setDcvsEnable = 1;
dcvs_v3.sleepLatency = kSleepHigherLatency;
dcvs_v3.setSleepLatency = 1;
dcvs_v3.sleepDisable = 0;
dcvs_v3.setSleepDisable = 0;
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.setBusParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.setCoreParams = 1;

std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode.");
return Status::OK();
}

Status QnnBackendManager::SetRelaxedPerfPowerConfig(uint32_t htp_power_config_client_id, DcvsState_t dcvsState) {
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");

auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;

constexpr const int kNumConfigs = 1;
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> power_configs(
kNumConfigs);
QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0];
dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config;
dcvs_v3.contextId = htp_power_config_client_id;
dcvs_v3.dcvsEnable = 1;
dcvs_v3.setDcvsEnable = 1;
dcvs_v3.sleepLatency = kSleepHighLatency;
dcvs_v3.setSleepLatency = 1;
dcvs_v3.sleepDisable = 0;
dcvs_v3.setSleepDisable = 0;
if (dcvsState == DcvsState_t::DCVS_ENABLE) {
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_UP_DOWN;
} else {
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE;
}
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.setBusParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.setCoreParams = 1;

std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode.");

return Status::OK();
}

Status QnnBackendManager::SetReleasedPerfPowerConfig(uint32_t htp_power_config_client_id, DcvsState_t dcvsState) {
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");

auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;
constexpr const int kNumConfigs = 1;
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> power_configs(
kNumConfigs);
QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0];
dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config;
dcvs_v3.contextId = htp_power_config_client_id;
dcvs_v3.dcvsEnable = 1;
dcvs_v3.setDcvsEnable = 1;
dcvs_v3.sleepLatency = kSleepHigherLatency;
dcvs_v3.setSleepLatency = 1;
dcvs_v3.sleepDisable = 0;
dcvs_v3.setSleepDisable = 0;

if (dcvsState == DcvsState_t::DCVS_ENABLE) {
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_UP_DOWN;
} else {
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE;
}
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER;
dcvs_v3.setBusParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER;
dcvs_v3.setCoreParams = 1;

std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode.");

return Status::OK();
}

Status QnnBackendManager::InitializePowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) {
ORT_RETURN_IF_ERROR(CreateHtpPowerCfgId(device_id, core_id, htp_power_config_id));
CreateTimerThread(htp_power_config_id);
return Status::OK();
}

Status QnnBackendManager::DeInitializePowerCfgId(uint32_t htp_power_config_id) {
ReleaseTimerThread(htp_power_config_id);
ORT_RETURN_IF_ERROR(DestroyHTPPowerConfigID(htp_power_config_id));
return Status::OK();
}

Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) {
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
Expand Down
Loading