openvinotoolkit · razvanapetroaie · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024
@@ -51,6 +51,41 @@
 
 // clang-format on
 
+#if defined _WIN32
+
+#    include <psapi.h>
+#    include <windows.h>
+
+int64_t getPeakMemoryUsage() {
+    PROCESS_MEMORY_COUNTERS memCounters;
+    GetProcessMemoryInfo(GetCurrentProcess(), &memCounters, sizeof(memCounters));
+    return memCounters.PeakWorkingSetSize / 1000;
+}
+
+#else
+
+#    include <fstream>
+#    include <regex>
+#    include <sstream>
+
+int64_t getPeakMemoryUsage() {
+    size_t peakMemUsageKB = 0;
+
+    std::ifstream statusFile("/proc/self/status");
+    std::string line;
+    std::regex vmPeakRegex("VmPeak:");
+    std::smatch vmMatch;
+    while (std::getline(statusFile, line)) {
+        if (std::regex_search(line, vmMatch, vmPeakRegex)) {
+            std::istringstream iss(vmMatch.suffix());
+            iss >> peakMemUsageKB;
+        }
+    }
+    return static_cast<int64_t>(peakMemUsageKB);
+}
+
+#endif
+
 namespace {
 
 #if defined(_WIN32)
@@ -120,6 +155,9 @@ int64_t get_peak_memory_usage() {
 
 #endif
 
+constexpr std::string_view WEIGHTS_EXTENSION = ".bin";
+constexpr std::string_view BLOB_EXTENSION = ".blob";
+
 bool parse_and_check_command_line(int argc, char* argv[]) {
     // ---------------------------Parsing and validating input
     // arguments--------------------------------------
@@ -862,10 +900,22 @@ int main(int argc, char* argv[]) {
             auto startTime = Time::now();
 
             std::ifstream modelStream(FLAGS_m, std::ios_base::binary | std::ios_base::in);
+            auto importModelMemStart = getPeakMemoryUsage();
             if (!modelStream.is_open()) {
                 throw std::runtime_error("Cannot open model file " + FLAGS_m);
             }
 
+            device_config.insert(ov::hint::allow_auto_batching(false));
+
+            if (!device_config.count(ov::weights_path.name())) {
+                // Temporary solution: build the path to the weights by leveragin the one towards the binary object
+                std::string weightsPath = FLAGS_m;
+                weightsPath.replace(weightsPath.size() - BLOB_EXTENSION.length(),
+                                    BLOB_EXTENSION.length(),
+                                    WEIGHTS_EXTENSION);
+                device_config.insert(ov::weights_path(weightsPath));
+            }
+
             compiledModel = core.import_model(modelStream, device_name, device_config);
             modelStream.close();
 

@@ -287,11 +287,12 @@ size_t get_batch_size(const benchmark_app::InputsInfo& inputs_info) {
     size_t batch_size = 0;
     for (auto& info : inputs_info) {
         if (ov::layout::has_batch(info.second.layout)) {
-            if (batch_size == 0)
+            if (batch_size == 0) {
                 batch_size = info.second.batch();
-            else if (batch_size != info.second.batch())
-                throw std::logic_error("Can't deterimine batch size: batch is "
-                                       "different for different inputs!");
+            } else if (batch_size != info.second.batch()) {
+                batch_size = 0;
+                break;
+            }
         }
     }
     if (batch_size == 0) {

@@ -265,6 +265,72 @@ struct BATCH_MODE final : OptionBase<BATCH_MODE, ov::intel_npu::BatchMode> {
     static std::string toString(const ov::intel_npu::BatchMode& val);
 };
 
+struct SEPARATE_WEIGHTS_VERSION final : OptionBase<SEPARATE_WEIGHTS_VERSION, uint32_t> {
+    static std::string_view key() {
+        return ov::intel_npu::separate_weights_version.name();
+    }
+
+    static uint32_t defaultValue() {
+        return 3;
+    }
+
+    static uint32_t parse(std::string_view val) {
+        int val_i = -1;
+        try {
+            val_i = std::stoi(val.data());
+            if (val_i >= 0) {
+                return val_i;
+            } else {
+                throw std::logic_error("wrong val");
+            }
+        } catch (const std::exception&) {
+            OPENVINO_THROW("Wrong value of ",
+                           val.data(),
+                           " for property key ",
+                           ov::intel_npu::separate_weights_version.name(),
+                           ". Expected only positive integer numbers");
+        }
+    }
+};
+
+struct BENCHMARK_INIT final : OptionBase<BENCHMARK_INIT, bool> {
+    static std::string_view key() {
+        return ov::intel_npu::benchmark_init.name();
+    }
+
+    static bool defaultValue() {
+        return false;
+    }
+};
+
+struct WS_COMPILE_CALL_NUMBER final : OptionBase<WS_COMPILE_CALL_NUMBER, uint32_t> {
+    static std::string_view key() {
+        return ov::intel_npu::ws_compile_call_number.name();
+    }
+
+    static uint32_t defaultValue() {
+        return 0;
+    }
+
+    static uint32_t parse(std::string_view val) {
+        int val_i = -1;
+        try {
+            val_i = std::stoi(val.data());
+            if (val_i >= 0) {
+                return val_i;
+            } else {
+                throw std::logic_error("wrong val");
+            }
+        } catch (const std::exception&) {
+            OPENVINO_THROW("Wrong value of ",
+                           val.data(),
+                           " for property key ",
+                           ov::intel_npu::ws_compile_call_number.name(),
+                           ". Expected only positive integer numbers");
+        }
+    }
+};
+
 }  // namespace intel_npu
 
 namespace ov {

@@ -219,6 +219,34 @@ struct WEIGHTS_PATH final : OptionBase<WEIGHTS_PATH, std::string> {
     }
 };
 
+//
+// MODEL_PTR
+//
+struct MODEL_PTR final : OptionBase<MODEL_PTR, std::shared_ptr<ov::Model>> {
+    static std::string_view key() {
+        return ov::hint::model.name();
+    }
+
+    static constexpr std::string_view getTypeName() {
+        return "std::shared_ptr<ov::Model>";
+    }
+
+    static std::shared_ptr<ov::Model> defaultValue() {
+        return nullptr;
+    }
+
+    static std::shared_ptr<ov::Model> parse(std::string_view) {
+        return nullptr;
+    }
+    static std::string toString(const std::shared_ptr<ov::Model>& m) {
+        return "";
+    }
+
+    static OptionMode mode() {
+        return OptionMode::RunTime;
+    }
+};
+
 //
 // ENABLE_CPU_PINNING
 //

@@ -56,6 +56,42 @@ class ICompiler : public std::enable_shared_from_this<ICompiler> {
      */
     virtual NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
 
+    /**
+     * @brief TODO
+     *
+     * @param model
+     * @param config
+     * @return NetworkDescription
+     */
+    virtual std::vector<std::shared_ptr<NetworkDescription>> compileWS_v1(const std::shared_ptr<ov::Model>& model,
+                                                                          const Config& config) const = 0;
+
+    /**
+     * @brief Sequantial compilation of Init(s) and Main
+     *
+     * "Stateful compiler" approach
+     */
+    virtual std::shared_ptr<NetworkDescription> compileWS_v2(const std::shared_ptr<ov::Model>& model,
+                                                             const Config& config) = 0;
+
+    /**
+     * @brief Sequantial compilation of Init(s) and Main
+     *
+     * "Stateless compiler" approach
+     * We want to get multiple Inits in the case of a large number of weights.
+     * This allows us to build pipeline:
+     * Allocate W1 -> Init1
+     *             Allocate W2 -> Init2
+     *                          Allocate W3 -> Init2
+     *
+     * This is why there is an additional parameter callNumber:
+     * Compiler should somehow understand wich Init(or Main) to return
+     * Plugin does not know total numbers og Init schedules
+     */
+    virtual std::shared_ptr<NetworkDescription> compileWS_v3(const std::shared_ptr<ov::Model>& model,
+                                                             const Config& config,
+                                                             size_t callNumber) const = 0;
+
     /**
      * @brief Returns information about supported layers of the network passed
      * @param model The model to be queried

@@ -64,6 +64,21 @@ struct IODescriptor {
      */
     bool isShapeTensor = false;
 
+    /**
+     * @brief TODO
+     */
+    bool isInitInputWeights = false;
+
+    /**
+     * @brief TODO
+     */
+    bool isInitOutputWeights = false;
+
+    /**
+     * @brief TODO
+     */
+    bool isMainInputWeights = false;
+
     /**
      * @brief Points towards a related descriptor.
      * @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor)

@@ -298,6 +298,15 @@ static constexpr ov::Property<ProfilingType> profiling_type{"NPU_PROFILING_TYPE"
  */
 static constexpr ov::Property<BatchMode> batch_mode{"NPU_BATCH_MODE"};
 
+/**
+ * @brief TODO
+ */
+static constexpr ov::Property<uint32_t> separate_weights_version{"NPU_SEPARATE_WEIGHTS_VERSION"};
+
+static constexpr ov::Property<bool> benchmark_init{"NPU_BENCHMARK_INIT"};
+
+static constexpr ov::Property<uint32_t> ws_compile_call_number{"WS_COMPILE_CALL_NUMBER"};
+
 /**
  * @brief [Only for NPU Plugin]
  * Type: integer, default is 1

@@ -5,24 +5,42 @@
 #pragma once
 
 #include <string>
+#include <string_view>
 
 namespace intel_npu {
 
 //
-// Prefix for ReadValue and Assign operations in compiler.
+// TODO
 //
 constexpr std::string_view READVALUE_PREFIX = "vpux_ie_read_value_";
 constexpr std::string_view ASSIGN_PREFIX = "vpux_ie_assign_";
 constexpr std::string_view SHAPE_TENSOR_PREFIX = "vpux_ie_shape_";
+constexpr std::string_view INIT_INPUT_WEIGHTS_PREFIX = "in_ov_";
+constexpr std::string_view INIT_OUTPUT_WEIGHTS_PREFIX = "out_ov_";
+constexpr std::string_view MAIN_INPUT_WEIGHTS_PREFIX = "out_ov_";
 
-inline bool isStateInputName(const std::string& name) {
-    return !name.compare(0, READVALUE_PREFIX.length(), READVALUE_PREFIX);
+inline bool nameHasPrefix(std::string_view name, std::string_view prefix) {
+    return !name.compare(0, prefix.length(), prefix);
 }
-inline bool isStateOutputName(const std::string& name) {
-    return !name.compare(0, ASSIGN_PREFIX.length(), ASSIGN_PREFIX);
+
+inline bool isStateInputName(std::string_view name) {
+    return nameHasPrefix(name, READVALUE_PREFIX);
+}
+inline bool isStateOutputName(std::string_view name) {
+    return nameHasPrefix(name, ASSIGN_PREFIX);
+}
+inline bool isShapeTensorName(std::string_view name) {
+    return nameHasPrefix(name, SHAPE_TENSOR_PREFIX);
+}
+
+inline bool isInitInputWeightsName(std::string_view name) {
+    return nameHasPrefix(name, INIT_INPUT_WEIGHTS_PREFIX);
+}
+inline bool isInitOutputWeightsName(std::string_view name) {
+    return nameHasPrefix(name, INIT_OUTPUT_WEIGHTS_PREFIX);
 }
-inline bool isShapeTensorName(const std::string& name) {
-    return !name.compare(0, SHAPE_TENSOR_PREFIX.length(), SHAPE_TENSOR_PREFIX);
+inline bool isMainInputWeightsName(std::string_view name) {
+    return nameHasPrefix(name, MAIN_INPUT_WEIGHTS_PREFIX);
 }
 
 }  // namespace intel_npu
@@ -22,6 +22,9 @@ void intel_npu::registerCommonOptions(OptionsDesc& desc) {
     desc.add<CACHE_DIR>();
     desc.add<LOADED_FROM_CACHE>();
     desc.add<BATCH_MODE>();
+    desc.add<SEPARATE_WEIGHTS_VERSION>();
+    desc.add<BENCHMARK_INIT>();
+    desc.add<WS_COMPILE_CALL_NUMBER>();
 }
 
 //

@@ -27,6 +27,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
     desc.add<WORKLOAD_TYPE>();
     desc.add<TURBO>();
     desc.add<WEIGHTS_PATH>();
+    desc.add<MODEL_PTR>();
     desc.add<BYPASS_UMD_CACHING>();
     desc.add<RUN_INFERENCES_SEQUENTIALLY>();
     desc.add<DISABLE_VERSION_CHECK>();

@@ -20,6 +20,15 @@ class ZeroDevice : public IDevice {
 public:
     ZeroDevice(const std::shared_ptr<ZeroInitStructsHolder>& initStructs);
 
+    /**
+     * @brief TODO
+     */
+    std::pair<std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>, ov::SoPtr<ov::ITensor>> runInit(
+        const std::shared_ptr<IGraph>& initGraph,
+        const std::shared_ptr<const ov::Model>& model,
+        const ov::SoPtr<ov::IRemoteContext>& context,
+        const Config& config) override;
+
     std::string getName() const override;
     std::string getFullDeviceName() const override;
     Uuid getUuid() const override;
@@ -45,7 +54,7 @@ class ZeroDevice : public IDevice {
         const Config& config,
         ov::intel_npu::TensorType tensor_type = ov::intel_npu::TensorType::BINDED,
         ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF,
-        void* mem = nullptr) override;
+        const void* mem = nullptr) override;
 
     ov::SoPtr<ov::ITensor> createHostTensor(
         std::shared_ptr<ov::IRemoteContext> context,

@@ -35,6 +35,9 @@ class ZeroInferRequest final : public SyncInferRequest {
 
     void get_result() override;
 
+    void set_weights_inputs(
+        const std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& weightsInputs) override;
+
 private:
     std::vector<ov::ProfilingInfo> get_profiling_info() const override;
     std::vector<uint8_t> get_raw_profiling_data() const;