diff --git a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h index d035fd34bd072..351eafc2a4675 100644 --- a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h +++ b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h @@ -61,6 +61,22 @@ static const char* const kCoremlProviderOption_SpecializationStrategy = "Special static const char* const kCoremlProviderOption_ProfileComputePlan = "ProfileComputePlan"; // please refer to https://developer.apple.com/documentation/coreml/mlmodelconfiguration/allowlowprecisionaccumulationongpu static const char* const kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU = "AllowLowPrecisionAccumulationOnGPU"; +// Specify the directory to cache any CoreML models created from the ONNX model in. +// CoreML EP will convert onnx subgraph to CoreML model and save to disk. +// If this path is not specified, the model will be saved to a temp directory and deleted after the session is closed. +// otherwise, the model will be saved to the specified path and User should manage to delete the model. + +// we do NOT detect if the onnx model has changed and no longer matches the cached model. +// the user should carefully manage the cache if modifying/replacing a model. +// The cache key is generated by +// 1. User provided key in metadata_props if found (preferred) +// 2. Hash of the model url the inference session was created with +// 3. Hash of the input/output names of the model +// Please find out how to set metadata_props in the onnxruntime API documentation. https://onnxruntime.ai/docs/execution-providers/CoreML-ExecutionProvider.html#configuration-options +static const char* const kCoremlProviderOption_ModelCacheDirectory = "ModelCacheDirectory"; + +// User provided cache-key in metadata_props. +static const char* const kCOREML_CACHE_KEY = "COREML_CACHE_KEY"; #ifdef __cplusplus extern "C" { diff --git a/onnxruntime/core/platform/env.h b/onnxruntime/core/platform/env.h index c42b31e64d129..7dbc3fe82db47 100644 --- a/onnxruntime/core/platform/env.h +++ b/onnxruntime/core/platform/env.h @@ -197,6 +197,7 @@ class Env { #ifdef _WIN32 /// \brief Returns true if the directory exists. virtual bool FolderExists(const std::wstring& path) const = 0; + virtual bool FileExists(const std::wstring& path) const = 0; /// \brief Recursively creates the directory, if it doesn't exist. virtual common::Status CreateFolder(const std::wstring& path) const = 0; // Mainly for use with protobuf library @@ -206,6 +207,7 @@ class Env { #endif /// \brief Returns true if the directory exists. virtual bool FolderExists(const std::string& path) const = 0; + virtual bool FileExists(const std::string& path) const = 0; /// \brief Recursively creates the directory, if it doesn't exist. virtual common::Status CreateFolder(const std::string& path) const = 0; // Recursively deletes the directory and its contents. diff --git a/onnxruntime/core/platform/posix/env.cc b/onnxruntime/core/platform/posix/env.cc index 04cf5ff6a3329..94aadf3df4d7e 100644 --- a/onnxruntime/core/platform/posix/env.cc +++ b/onnxruntime/core/platform/posix/env.cc @@ -471,6 +471,14 @@ class PosixEnv : public Env { return S_ISDIR(sb.st_mode); } + bool FileExists(const std::string& path) const override { + struct stat sb; + if (stat(path.c_str(), &sb)) { + return false; + } + return S_ISREG(sb.st_mode); + } + common::Status CreateFolder(const std::string& path) const override { size_t pos = 0; do { diff --git a/onnxruntime/core/platform/windows/env.cc b/onnxruntime/core/platform/windows/env.cc index 73319cd9c9b1c..4fccad6dfeb37 100644 --- a/onnxruntime/core/platform/windows/env.cc +++ b/onnxruntime/core/platform/windows/env.cc @@ -483,6 +483,16 @@ bool WindowsEnv::FolderExists(const std::string& path) const { return (attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY); } +bool WindowsEnv::FileExists(const std::wstring& path) const { + DWORD attributes = GetFileAttributesW(path.c_str()); + return (attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_NORMAL); +} + +bool WindowsEnv::FileExists(const std::string& path) const { + DWORD attributes = GetFileAttributesA(path.c_str()); + return (attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_NORMAL); +} + common::Status WindowsEnv::CreateFolder(const std::wstring& path) const { size_t pos = 0; do { diff --git a/onnxruntime/core/platform/windows/env.h b/onnxruntime/core/platform/windows/env.h index 395aface1d809..05b92bb6a21eb 100644 --- a/onnxruntime/core/platform/windows/env.h +++ b/onnxruntime/core/platform/windows/env.h @@ -68,6 +68,8 @@ class WindowsEnv : public Env { MappedMemoryPtr& mapped_memory) const override; bool FolderExists(const std::wstring& path) const override; bool FolderExists(const std::string& path) const override; + bool FileExists(const std::wstring& path) const override; + bool FileExists(const std::string& path) const override; common::Status CreateFolder(const std::wstring& path) const override; common::Status CreateFolder(const std::string& path) const override; common::Status DeleteFolder(const PathString& path) const override; diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc index 6486942199df7..f8952301d59a9 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc @@ -390,13 +390,66 @@ void CreateEmptyFile(const std::string& filename) { #endif // defined(COREML_ENABLE_MLPROGRAM) -std::string GetModelOutputPath(bool create_ml_program) { - // path is used to create the ML Package directory for ML Program, and for the model directly otherwise. - auto path = util::GetTemporaryFilePath(); - if (!create_ml_program) { - path += ".model.mlmodel"; - } +std::string GetModelOutputPath(const CoreMLOptions& coreml_options, + const GraphViewer& graph_viewer, + const logging::Logger& logger) { + const std::string& subgraph_name = graph_viewer.Name(); + std::string path; + if (coreml_options.ModelCacheDirectory().empty()) { + // path is used to create the ML Package directory for ML Program, and for the model directly otherwise. + path = util::GetTemporaryFilePath(); + if (!coreml_options.CreateMLProgram()) { + path += ".model.mlmodel"; + } + } else { + // subgraph_name is uniquely generated by + // onnxruntime/core/providers/coreml/coreml_execution_provider.cc::gen_metadef_name + // int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash); + // MakeString(user_provide_key, "_", COREML, "_", model_hash, "_", metadef_id); + std::string_view cache_key = std::string_view(subgraph_name) + .substr(0, subgraph_name.find_first_of("_")); + // subgraph_short_name is metadef_id + std::string_view subgraph_short_name = std::string_view(subgraph_name) + .substr(subgraph_name.find_last_of("_") + 1); + path = MakeString(std::string(coreml_options.ModelCacheDirectory()), "/", cache_key); + + std::string model_file_path = path + "/model.txt"; + + path = MakeString(path, "/", subgraph_short_name); + // Set the model cache path with setting of RequireStaticShape and ModelFormat + if (coreml_options.RequireStaticShape()) { + path += "_static"; + } else { + path += "_dynamic"; + } + if (coreml_options.CreateMLProgram()) { + path += "_mlprogram"; + } else { + path += "_nn"; + } + if (!Env::Default().CreateFolder(path).IsOK()) { + LOGS(logger, ERROR) << "Failed to create cache directory `" << path << "`. Model caching is disabled."; + coreml_options.DisableModelCache(); + return GetModelOutputPath(coreml_options, graph_viewer, logger); + } + path += "/model"; + // Write the model path to a file in the cache directory. + // This is for developers to know what the cached model is as we used a hash for the directory name. + if (!Env::Default().FileExists(ToPathString(model_file_path))) { + const Graph* main_graph = &graph_viewer.GetGraph(); + while (main_graph->IsSubgraph()) { + main_graph = main_graph->ParentGraph(); + } + std::ofstream file(model_file_path); + if (!file.is_open()) { + LOGS(logger, ERROR) << "Failed to open file " << model_file_path; + } else { + file << main_graph->ModelPath().string(); + file.close(); + } + } + } return path; } } // namespace @@ -410,10 +463,21 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge coreml_version_(coreml_version), coreml_options_(coreml_options), create_ml_program_(coreml_options.CreateMLProgram()), - model_output_path_(GetModelOutputPath(create_ml_program_)), + model_output_path_(GetModelOutputPath(coreml_options_, graph_viewer, logger)), // coreml_options_ must be set before this onnx_input_names_(std::move(onnx_input_names)), onnx_output_names_(std::move(onnx_output_names)), coreml_model_(std::make_unique()) { + // GetTemporaryFilePath() always produce a unique path for the model and this is not existed + // Mlprogram will create a folder while NN create a file + if (Env::Default().FolderExists(ToPathString(model_output_path_)) || + Env::Default().FileExists(ToPathString(model_output_path_))) { + is_model_cached_ = true; + LOGS(logger, INFO) << "Model is already cached in " << model_output_path_ + << " and will be reused. If you want to update the model or hit other issues, " + << "please consider to clear the cache and retry."; + return; + } + if (create_ml_program_) { #if defined(COREML_ENABLE_MLPROGRAM) coreml_model_->set_specificationversion(CoreMLSpecVersion()); @@ -847,6 +911,10 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i input_output_info_.emplace(name, OnnxTensorInfo{data_type, shape}); + if (IsModelCached()) { + return Status::OK(); + } + #if defined(COREML_ENABLE_MLPROGRAM) if (create_ml_program_) { if (is_input) { @@ -1056,8 +1124,14 @@ Status ModelBuilder::Build(const GraphViewer& graph_viewer, const logging::Logge ModelBuilder builder(graph_viewer, logger, coreml_version, coreml_options, std::move(onnx_input_names), std::move(onnx_output_names)); - ORT_RETURN_IF_ERROR(builder.CreateModel()); - ORT_RETURN_IF_ERROR(builder.SaveModel()); + if (!builder.IsModelCached()) { + ORT_RETURN_IF_ERROR(builder.CreateModel()); + ORT_RETURN_IF_ERROR(builder.SaveModel()); + } else { + // runtime requires the input/output names to be passed + ORT_RETURN_IF_ERROR(builder.RegisterModelInputs()); + ORT_RETURN_IF_ERROR(builder.RegisterModelOutputs()); + } return builder.LoadModel(model); } diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.h b/onnxruntime/core/providers/coreml/builders/model_builder.h index e19597cf0dc2e..28c7dc42da581 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.h +++ b/onnxruntime/core/providers/coreml/builders/model_builder.h @@ -54,6 +54,7 @@ class ModelBuilder { // We only support CoreML 3 and later so the spec version is always version + 1. int32_t CoreMLVersion() const { return coreml_version_; } int32_t CoreMLSpecVersion() const { return coreml_version_ + 1; } + bool IsModelCached() const { return is_model_cached_; } // Returns true if we are creating an ML Program bool CreateMLProgram() const { @@ -218,8 +219,9 @@ class ModelBuilder { const logging::Logger& logger_; const int32_t coreml_version_; CoreMLOptions coreml_options_; - const bool create_ml_program_; // ML Program (CoreML5, iOS 15+, macOS 12+) or NeuralNetwork (old) - const std::string model_output_path_; // create_ml_program_ ? dir for mlpackage : filename for mlmodel + const bool create_ml_program_; // ML Program (CoreML5, iOS 15+, macOS 12+) or NeuralNetwork (old) + std::string model_output_path_; // create_ml_program_ ? dir for mlpackage : filename for mlmodel + bool is_model_cached_{false}; std::vector onnx_input_names_; std::vector onnx_output_names_; diff --git a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc index 5a2867e5524e4..b6bb4f2c1d66a 100644 --- a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc +++ b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc @@ -18,6 +18,7 @@ #include "core/providers/coreml/model/host_utils.h" #include "core/providers/coreml/model/model.h" #include "core/providers/coreml/shape_utils.h" +#include "core/graph/model.h" namespace onnxruntime { @@ -52,12 +53,37 @@ CoreMLExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie const auto builder_params = coreml::MakeOpBuilderParams(graph_viewer, coreml_version_, coreml_options_.RequireStaticShape(), coreml_options_.CreateMLProgram()); const auto supported_nodes = coreml::GetSupportedNodes(graph_viewer, builder_params, logger); - + const Graph* main_graph = &graph_viewer.GetGraph(); + while (main_graph->IsSubgraph()) { + main_graph = main_graph->ParentGraph(); + } + const auto& metadata = main_graph->GetModel().MetaData(); + + std::string user_provided_key = metadata.count(kCOREML_CACHE_KEY) > 0 + ? metadata.at(kCOREML_CACHE_KEY) + : ""; + if (user_provided_key.size() > 64 || + std::any_of(user_provided_key.begin(), user_provided_key.end(), + [](unsigned char c) { return !std::isalnum(c); })) { + LOGS(logger, ERROR) << "[" << kCOREML_CACHE_KEY << ":" << user_provided_key << "] is not a valid cache key." + << " It should be alphanumeric and less than 64 characters."; + user_provided_key = ""; + } const auto gen_metadef_name = [&]() { HashValue model_hash; int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash); - return MakeString(COREML, "_", model_hash, "_", metadef_id); + // use model_hash as the key if user doesn't provide one + if (user_provided_key.empty()) { + // user passed a empty string + // model_hash is a 64-bit hash value of model_path if model_path is not empty, + // otherwise it hashes the graph input names and all the node output names. + // it can't guarantee the uniqueness of the key, so user should manager the key for the best. + user_provided_key = std::to_string(model_hash); + } + // The string format is used by onnxruntime/core/providers/coreml/builders/model_builder.cc::GetModelOutputPath + // If the format changes, the function should be updated accordingly. + return MakeString(user_provided_key, "_", COREML, "_", model_hash, "_", metadef_id); }; result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes, {}, diff --git a/onnxruntime/core/providers/coreml/coreml_options.cc b/onnxruntime/core/providers/coreml/coreml_options.cc index 4ec780208e528..14ae55de9266b 100644 --- a/onnxruntime/core/providers/coreml/coreml_options.cc +++ b/onnxruntime/core/providers/coreml/coreml_options.cc @@ -5,6 +5,7 @@ #include "core/providers/coreml/coreml_provider_factory.h" // defines flags #include "core/providers/coreml/model/host_utils.h" #include "core/providers/coreml/builders/helper.h" +#include "core/platform/env.h" namespace onnxruntime { @@ -71,6 +72,7 @@ void CoreMLOptions::ValidateAndParseProviderOption(const ProviderOptions& option kCoremlProviderOption_SpecializationStrategy, kCoremlProviderOption_ProfileComputePlan, kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU, + kCoremlProviderOption_ModelCacheDirectory, }; // Validate the options for (const auto& option : options) { @@ -103,6 +105,8 @@ void CoreMLOptions::ValidateAndParseProviderOption(const ProviderOptions& option profile_compute_plan_ = option.second == "1"; } else if (kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU == option.first) { allow_low_precision_accumulation_on_gpu_ = option.second == "1"; + } else if (kCoremlProviderOption_ModelCacheDirectory == option.first) { + model_cache_directory_ = option.second; } } } diff --git a/onnxruntime/core/providers/coreml/coreml_options.h b/onnxruntime/core/providers/coreml/coreml_options.h index fd05c96927bd1..d7ee04b3f8a79 100644 --- a/onnxruntime/core/providers/coreml/coreml_options.h +++ b/onnxruntime/core/providers/coreml/coreml_options.h @@ -17,6 +17,9 @@ class CoreMLOptions { std::string strategy_; bool profile_compute_plan_{false}; bool allow_low_precision_accumulation_on_gpu_{false}; + // path to store the converted coreml model + // we may run DisableModelCache() to disable model caching + mutable std::string model_cache_directory_; public: explicit CoreMLOptions(uint32_t coreml_flags); @@ -32,6 +35,11 @@ class CoreMLOptions { bool UseStrategy(std::string_view strategy) const { return strategy_ == strategy; } bool ProfileComputePlan() const { return profile_compute_plan_ && create_mlprogram_; } + std::string_view ModelCacheDirectory() const { return model_cache_directory_; } + // The options specified by the user are const, but if there's an error setting up caching we disable it + // so that the EP can still be used. The error is logged for the user to investigate. + void DisableModelCache() const { model_cache_directory_.clear(); } + private: void ValidateAndParseProviderOption(const ProviderOptions& options); }; diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 755dbfbd6e68c..5211b89ec17c6 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -301,53 +301,144 @@ Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array, return Status::OK(); } -// since __clang_major__ >= 15, MLComputePlan is introduced in -// We are actually ensure the MacOS/IOS version and Xcode version is greater than `macOS 14.4, iOS 17.4`. -// The macro API_AVAILABLE should also be fine. +// since macos(14.4), ios(17.4), MLComputePlan is introduced in // Otherwise, the compiler will complain `MLComputePlan` is not defined. -// we define __clang_analyzer__ here is for bypass static analysis +#if __has_include() +#define HAS_COREMLPLAN 1 +#else +#define HAS_COREMLPLAN 0 +#endif + +#if HAS_COREMLPLAN +API_AVAILABLE(macos(14.4), ios(17.4), tvos(17.4), watchos(10.4)) +void ProfileBlock(MLComputePlan* _Nullable computePlan, MLModelStructureProgramBlock* block) { + for (MLModelStructureProgramOperation* operation in block.operations) { + for (size_t i = 0; i < operation.blocks.count; ++i) { + ProfileBlock(computePlan, operation.blocks[i]); + } + // Get the compute device usage for the operation. + MLComputePlanDeviceUsage* computeDeviceUsage = [computePlan computeDeviceUsageForMLProgramOperation:operation]; + id preferredDevice = computeDeviceUsage.preferredComputeDevice; + // Get the estimated cost of executing the operation. + MLComputePlanCost* estimatedCost = [computePlan estimatedCostOfMLProgramOperation:operation]; + if (![operation.operatorName isEqualToString:@"const"]) { + NSLog(@"Operation: %@, Device Usage: %@, Estimated Cost: %f", operation.operatorName, preferredDevice, estimatedCost.weight); + } + } +} +#endif + +// since macos(14.4), ios(17.4), MLComputePlan is introduced in +// Otherwise, the compiler will complain `MLComputePlan` is not defined. +API_AVAILABLE(macos(14.4), ios(17.4), tvos(17.4), watchos(10.4)) void ProfileComputePlan(NSURL* compileUrl, MLModelConfiguration* config) { -#if defined(__APPLE__) && defined(__clang__) && __clang_major__ >= 15 && !defined(__clang_analyzer__) - if (@available(macOS 14.4, iOS 17.4, *)) { - [MLComputePlan loadContentsOfURL:compileUrl - configuration:config - completionHandler:^(MLComputePlan* _Nullable computePlan, NSError* _Nullable error) { - if (!computePlan) { - NSLog(@"Error loading compute plan: %@", error); - // Handle error. - return; - } - MLModelStructureProgram* program = computePlan.modelStructure.program; - if (!program) { - NSLog(@"Error loading program from compute plan., this is not a mlprogram model"); - return; - } - - MLModelStructureProgramFunction* mainFunction = program.functions[@"main"]; - if (!mainFunction) { - NSLog(@"Error loading main function from program"); - return; - } - - NSArray* operations = mainFunction.block.operations; - NSLog(@"Number of operations, 'const' node is included. : %lu", operations.count); - for (MLModelStructureProgramOperation* operation in operations) { - // Get the compute device usage for the operation. - MLComputePlanDeviceUsage* computeDeviceUsage = [computePlan computeDeviceUsageForMLProgramOperation:operation]; - id preferredDevice = computeDeviceUsage.preferredComputeDevice; - // Get the estimated cost of executing the operation. - MLComputePlanCost* estimatedCost = [computePlan estimatedCostOfMLProgramOperation:operation]; - if (![operation.operatorName isEqualToString:@"const"]) { - NSLog(@"Operation: %@, Device Usage: %@, Estimated Cost: %f", operation.operatorName, preferredDevice, estimatedCost.weight); - } - } +#if HAS_COREMLPLAN + dispatch_semaphore_t fd_sema = dispatch_semaphore_create(0); + [MLComputePlan loadContentsOfURL:compileUrl + configuration:config + completionHandler:^(MLComputePlan* _Nullable computePlan, NSError* _Nullable error) { + if (!computePlan) { + NSLog(@"Error loading compute plan: %@", error); + // Handle error. + return; + } + MLModelStructureProgram* program = computePlan.modelStructure.program; + if (!program) { + NSLog(@"Error loading program from compute plan., this is not a mlprogram model"); + return; + } + + [computePlan.modelStructure.program.functions enumerateKeysAndObjectsUsingBlock:^(NSString* function_name, + MLModelStructureProgramFunction* function, + BOOL* _Nonnull __unused stop) { + NSLog(@"profile function : %@", function_name); + ProfileBlock(computePlan, function.block); + dispatch_semaphore_signal(fd_sema); }]; + }]; + long status = dispatch_semaphore_wait(fd_sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(5 * 60 * NSEC_PER_SEC))); + if (status != 0) { + NSLog(@"profile function : timeout"); + } +#endif +} + +#if __has_include() +#define HAS_COREMLOPTIMIZATIONHINT 1 +#else +#define HAS_COREMLOPTIMIZATIONHINT 0 +#endif + +API_AVAILABLE_COREML8 +void ConfigureOptimizationHints(MLModelConfiguration* config, const CoreMLOptions& coreml_options) { +#if HAS_COREMLOPTIMIZATIONHINT + MLOptimizationHints* optimizationHints = [[MLOptimizationHints alloc] init]; + if (coreml_options.UseStrategy("FastPrediction")) { + optimizationHints.specializationStrategy = MLSpecializationStrategyFastPrediction; + config.optimizationHints = optimizationHints; + } else if (coreml_options.UseStrategy("Default")) { + optimizationHints.specializationStrategy = MLSpecializationStrategyDefault; + config.optimizationHints = optimizationHints; } else { - NSLog(@"iOS 17.4+/macOS 14.4+ or later is required to use the compute plan API"); + // not set } #endif } +Status CompileOrReadCachedModel(NSURL* modelUrl, const CoreMLOptions& coreml_options, + NSMutableString* compiled_model_path) { + NSURL* cached_model_base_url = modelUrl; + if (!coreml_options.CreateMLProgram()) { + cached_model_base_url = [cached_model_base_url URLByDeletingLastPathComponent]; + } + + NSURL* cached_model_url = [cached_model_base_url URLByAppendingPathComponent:@"compiled_model.mlmodelc"]; + // if cached_model_url is existed, just return + NSError* error = nil; + NSString* cached_model_path = [cached_model_url path]; + // to pass clang-tidy static analyzer + if (compiled_model_path == nil || cached_model_path == nil) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error creating cached model URL"); + } + if ([[NSFileManager defaultManager] fileExistsAtPath:cached_model_path]) { + [compiled_model_path appendString:cached_model_path]; + return Status::OK(); + } + + // TODO: Update this to version with callback handler as the API used here is deprecated. + // https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl + // As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the + // background. We will have to check for completion in `predict` and block until it is done. + NSURL* compiled_model_url = [MLModel compileModelAtURL:modelUrl error:&error]; + if (error != nil) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ", + [[error localizedDescription] UTF8String]); + } + + // to pass clang-tidy static analyzer + NSString* compiled_model_path_from_url = [compiled_model_url path]; + if (compiled_model_url == nil || cached_model_url == nil || compiled_model_path_from_url == nil) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, " compiled_model_url is nil or cached_model_url is nil"); + } + if (coreml_options.ModelCacheDirectory().empty()) { + [compiled_model_path appendString:compiled_model_path_from_url]; + return Status::OK(); + } + + // save the compiled model if user has set a cache path + if (![[NSFileManager defaultManager] moveItemAtURL:compiled_model_url toURL:cached_model_url error:&error]) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error copying compiled model to cache path: ", + [[cached_model_url path] UTF8String], ", reason: ", [[error localizedDescription] UTF8String]); + } + // clang-tidy + NSString* cached_model_path_from_url = [cached_model_url path]; + if (cached_model_path_from_url == nil) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "cached_model_path_from_url is nil"); + } + [compiled_model_path appendString:cached_model_path_from_url]; + return Status::OK(); +} + // Internal Execution class // This class is part of the model class and handles the calls into CoreML. Specifically, it performs // 1. Compile the model by given path for execution @@ -366,7 +457,7 @@ Status Predict(const std::unordered_map& inputs, private: void cleanup(); NSString* coreml_model_path_{nil}; - NSString* compiled_model_path_{nil}; + NSURL* compiled_model_url_{nil}; const logging::Logger& logger_; CoreMLOptions coreml_options_; MLModel* model_{nil}; @@ -387,14 +478,18 @@ Status Predict(const std::unordered_map& inputs, } void Execution::cleanup() { + // we keep the compiled model if the user has set a cache path + if (coreml_options_.ModelCacheDirectory().size()) { + return; + } + NSString* compiled_model_path = [compiled_model_url_ path]; NSError* error = nil; - if (compiled_model_path_ != nil) { - [[NSFileManager defaultManager] removeItemAtPath:compiled_model_path_ error:&error]; + if (compiled_model_path != nil) { + [[NSFileManager defaultManager] removeItemAtPath:compiled_model_path error:&error]; if (error != nil) { - LOGS(logger_, ERROR) << "Failed cleaning up the compiled model: " << [compiled_model_path_ UTF8String] + LOGS(logger_, ERROR) << "Failed cleaning up the compiled model: " << [compiled_model_path UTF8String] << ", error message: " << [[error localizedDescription] UTF8String]; } - compiled_model_path_ = nil; } #if !defined(NDEBUG) @@ -430,17 +525,10 @@ Status Predict(const std::unordered_map& inputs, return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create model URL from path"); } - // TODO: Update this to version with callback handler as the API used here is deprecated. - // https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl - // As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the - // background. We will have to check for completion in `predict` and block until it is done. - NSURL* compileUrl = [MLModel compileModelAtURL:modelUrl error:&error]; - if (error != nil) { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ", - [[error localizedDescription] UTF8String]); - } - - compiled_model_path_ = [compileUrl path]; + NSMutableString* compiled_model_path = [[NSMutableString alloc] init]; + ORT_RETURN_IF_ERROR(CompileOrReadCachedModel( + [NSURL fileURLWithPath:coreml_model_path_], coreml_options_, compiled_model_path)); + compiled_model_url_ = [NSURL fileURLWithPath:compiled_model_path]; MLModelConfiguration* config = [[MLModelConfiguration alloc] init]; uint32_t coreml_compute_unit = coreml_options_.ComputeUnits(); @@ -458,27 +546,22 @@ Status Predict(const std::unordered_map& inputs, config.allowLowPrecisionAccumulationOnGPU = YES; } -// Set the specialization strategy to FastPrediction for macOS 10.15+ -// since __clang_major__ >= 15, optimizationHints is introduced in -// Same as above comments for why we are checking __clang_major__. -// we define __clang_analyzer__ here is for bypass static analysis -#if defined(__APPLE__) && defined(__clang__) && __clang_major__ >= 15 && !defined(__clang_analyzer__) + // Set the specialization strategy to FastPrediction for macOS 10.15+ if (HAS_COREML8_OR_LATER) { - MLOptimizationHints* optimizationHints = [[MLOptimizationHints alloc] init]; - if (coreml_options_.UseStrategy("FastPrediction")) { - optimizationHints.specializationStrategy = MLSpecializationStrategyFastPrediction; - config.optimizationHints = optimizationHints; - } else if (coreml_options_.UseStrategy("Default")) { - optimizationHints.specializationStrategy = MLSpecializationStrategyDefault; - config.optimizationHints = optimizationHints; - } + ConfigureOptimizationHints(config, coreml_options_); + } else { + LOGS(logger_, WARNING) << "iOS 17.4+/macOS 14.4+ or later is required to ConfigureOptimizationHints"; } -#endif + if (coreml_options_.ProfileComputePlan()) { - ProfileComputePlan(compileUrl, config); + if (@available(macOS 14.4, iOS 17.4, *)) { + ProfileComputePlan(compiled_model_url_, config); + } else { + LOGS(logger_, WARNING) << "iOS 17.4+/macOS 14.4+ or later is required to use the compute plan API"; + } } - model_ = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error]; + model_ = [MLModel modelWithContentsOfURL:compiled_model_url_ configuration:config error:&error]; if (error != nil || model_ == nil) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create MLModel", diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 23c3812ebd025..0b1b2bae6c972 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -138,6 +138,7 @@ namespace perftest { "\t [CoreML only] [SpecializationStrategy]:[Default FastPrediction].\n" "\t [CoreML only] [ProfileComputePlan]:[0 1].\n" "\t [CoreML only] [AllowLowPrecisionAccumulationOnGPU]:[0 1].\n" + "\t [CoreML only] [ModelCacheDirectory]:[path../a/b/c].\n" "\t [Example] [For CoreML EP] -e coreml -i \"ModelFormat|MLProgram MLComputeUnits|CPUAndGPU\"\n" "\n" "\t [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a96028ed3903e..08c2cff8058c2 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -349,7 +349,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); kCoremlProviderOption_EnableOnSubgraphs, kCoremlProviderOption_SpecializationStrategy, kCoremlProviderOption_ProfileComputePlan, - kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU}; + kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU, + kCoremlProviderOption_ModelCacheDirectory}; ParseSessionConfigs(ov_string, provider_options, available_keys); std::unordered_map available_options = { @@ -373,6 +374,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); (provider_option.second == "0" || provider_option.second == "1")) { } else if (provider_option.first == kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU && (provider_option.second == "0" || provider_option.second == "1")) { + } else if (provider_option.first == kCoremlProviderOption_ModelCacheDirectory) { } else { ORT_THROW("Invalid value for option ", provider_option.first, ": ", provider_option.second); } diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc index a8480e7416de5..302ad57fb88c5 100644 --- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc +++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc @@ -15,6 +15,7 @@ #include "test/util/include/inference_session_wrapper.h" #include "test/util/include/test_environment.h" #include "test/util/include/test_utils.h" +#include "onnx/onnx_pb.h" #if !defined(ORT_MINIMAL_BUILD) // if this is a full build we need the provider test utils @@ -31,9 +32,10 @@ namespace onnxruntime { namespace test { static std::unique_ptr MakeCoreMLExecutionProvider( - std::string ModelFormat = "NeuralNetwork", std::string ComputeUnits = "CPUOnly") { + std::string ModelFormat = "NeuralNetwork", std::string ComputeUnits = "CPUOnly", std::string ModelCacheDirectory = "") { std::unordered_map provider_options = {{kCoremlProviderOption_MLComputeUnits, ComputeUnits}, - {kCoremlProviderOption_ModelFormat, ModelFormat}}; + {kCoremlProviderOption_ModelFormat, ModelFormat}, + {kCoremlProviderOption_ModelCacheDirectory, ModelCacheDirectory}}; return CoreMLProviderFactoryCreator::Create(provider_options)->CreateProvider(); } @@ -268,5 +270,112 @@ TEST(CoreMLExecutionProviderTest, TestNameSanitization) { } #endif +TEST(CoreMLExecutionProviderTest, TestModelCache) { + const ORTCHAR_T* model_file_name = ORT_TSTR("testdata/coreml_argmax_cast_test.onnx"); + + onnx::ModelProto model; + { + std::ifstream in(model_file_name, std::ios_base::binary); + model.ParseFromIstream(&in); + in.close(); + } + + std::string out_string; +#if defined(__APPLE__) + std::vector dims_mul_x = {3, 2, 2}; + std::vector values_mul_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + OrtValue ml_value_x; + AllocatorPtr allocator = std::make_shared(); + CreateMLValue(allocator, dims_mul_x, values_mul_x, &ml_value_x); + + NameMLValMap feeds; + feeds.insert(std::make_pair("X", ml_value_x)); + std::string subgraph_name; + const std::function graph_verifier = [&subgraph_name](const Graph& graph) { + GraphViewer graph_viewer{graph}; + const auto& node_indices_in_order = graph_viewer.GetNodesInTopologicalOrder(); + const auto* node = graph.GetNode(node_indices_in_order[0]); + auto _first = node->Name().find('_') + 1; + auto _second = node->Name().find('_', _first); + subgraph_name = node->Name().substr(_first, _second - _first); + }; + EPVerificationParams verification_params{.graph_verifier = &graph_verifier}; + + auto* metadata_props = model.add_metadata_props(); + metadata_props->set_key(kCOREML_CACHE_KEY); + { // test with valid model cache directory + metadata_props->set_value("legalhash123"); + model.SerializeToString(&out_string); + gsl::span model_data{reinterpret_cast(out_string.data()), out_string.size()}; + RunAndVerifyOutputsWithEP(model_data, CurrentTestName(), + MakeCoreMLExecutionProvider("MLProgram", "CPUOnly", ORT_TSTR("./tmp/")), + feeds, + verification_params); + ASSERT_EQ(std::filesystem::exists("./tmp/legalhash123"), true); + } + { + // test with invalid model cache directory, only alphanumeric characters are allowed + out_string.clear(); + metadata_props->set_key(kCOREML_CACHE_KEY); + metadata_props->set_value("illegalhash__123"); + model.SerializeToString(&out_string); + gsl::span model_data{reinterpret_cast(out_string.data()), out_string.size()}; + RunAndVerifyOutputsWithEP(model_data, CurrentTestName(), + MakeCoreMLExecutionProvider("MLProgram", "CPUOnly", ORT_TSTR("./tmp")), + feeds, + verification_params); + ASSERT_EQ(std::filesystem::exists("./tmp/illegalhash__123"), false); + // the cache folder name should be the first part of the subgraph name + ASSERT_EQ(std::filesystem::exists("./tmp/" + subgraph_name), true); + } + { + // test with invalid model cache directory, more than 64 characters + out_string.clear(); + metadata_props->set_key(kCOREML_CACHE_KEY); + metadata_props->set_value("modelhashwithmorethan64charactersmodelhashwithmorethan64charactersmodelhashwithmorethan64characters"); + model.SerializeToString(&out_string); + gsl::span model_data{reinterpret_cast(out_string.data()), out_string.size()}; + RunAndVerifyOutputsWithEP(model_data, CurrentTestName(), + MakeCoreMLExecutionProvider("MLProgram", "CPUOnly", ORT_TSTR("./tmp")), + feeds, + verification_params); + ASSERT_EQ(std::filesystem::exists("./tmp/modelhashwithmorethan64charactersmodelhashwithmorethan64charactersmodelhashwithmorethan64characters"), false); + // the cache folder name should be the first part of the subgraph name + ASSERT_EQ(std::filesystem::exists("./tmp/" + subgraph_name), true); + } + { + // test with invalid model cache directory, empty + out_string.clear(); + metadata_props->set_key(kCOREML_CACHE_KEY); + metadata_props->set_value(""); + model.SerializeToString(&out_string); + gsl::span model_data{reinterpret_cast(out_string.data()), out_string.size()}; + RunAndVerifyOutputsWithEP(model_data, CurrentTestName(), + MakeCoreMLExecutionProvider("MLProgram", "CPUOnly", ORT_TSTR("./tmp")), + feeds, + verification_params); + // the cache folder name should be the first part of the subgraph name + ASSERT_EQ(std::filesystem::exists("./tmp/" + subgraph_name), true); + } + { + // test with invalid model cache directory, caching shall be disabled + out_string.clear(); + metadata_props->set_key(kCOREML_CACHE_KEY); + metadata_props->set_value(""); + model.SerializeToString(&out_string); + gsl::span model_data{reinterpret_cast(out_string.data()), out_string.size()}; + RunAndVerifyOutputsWithEP(model_data, CurrentTestName(), + MakeCoreMLExecutionProvider("MLProgram", "CPUOnly", ORT_TSTR("/")), + feeds, + verification_params); + // this folder can't be created + ASSERT_EQ(std::filesystem::exists("/" + subgraph_name), false); + } +#else + model.SerializeToString(&out_string); + gsl::span model_data{reinterpret_cast(out_string.data()), out_string.size()}; + TestModelLoad(model_data, MakeCoreMLExecutionProvider(), ExpectedEPNodeAssignment::All); +#endif +} } // namespace test } // namespace onnxruntime