openvinotoolkit
diff --git a/‎src/core/xml_util/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎src/core/xml_util/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp‎
Lines changed: 14 additions & 0 deletions b/‎src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp‎
Lines changed: 12 additions & 3 deletions b/‎src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp‎
Lines changed: 46 additions & 0 deletions b/‎src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp‎
Lines changed: 7 additions & 4 deletions b/‎src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎src/plugins/intel_npu/src/compiler_adapter/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎src/plugins/intel_npu/src/compiler_adapter/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/plugins/intel_npu/src/compiler_adapter/include/custom_stream_buffer.hpp‎
Lines changed: 5 additions & 0 deletions b/‎src/plugins/intel_npu/src/compiler_adapter/include/custom_stream_buffer.hpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp‎
Lines changed: 8 additions & 4 deletions b/‎src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp‎
Lines changed: 0 additions & 82 deletions b/‎src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp‎
Lines changed: 0 additions & 82 deletions
diff --git a/‎src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp‎
Lines changed: 7 additions & 4 deletions b/‎src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp‎
Lines changed: 7 additions & 4 deletions
@@ -19,7 +19,7 @@ source_group("include" FILES ${PUBLIC_HEADERS})
 add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS})
 
 add_library(openvino::xml_util ALIAS ${TARGET_NAME})
-set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME openvino_xml_util)
+set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME xml_util)
 
 target_include_directories(${TARGET_NAME} PUBLIC
     $<BUILD_INTERFACE:${TARGET_INCLUDE_DIR}>
 
@@ -1426,4 +1426,18 @@ struct USE_BASE_MODEL_SERIALIZER final : OptionBase<USE_BASE_MODEL_SERIALIZER, b
     }
 };
 
+struct SERIALIZATION_WEIGHTS_SIZE_THRESHOLD final : OptionBase<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD, size_t> {
+    static std::string_view key() {
+        return ov::intel_npu::serialization_weights_size_threshold.name();
+    }
+
+    static size_t defaultValue() {
+        return 0;
+    }
+
+    static OptionMode mode() {
+        return OptionMode::RunTime;
+    }
+};
+
 }  // namespace intel_npu
@@ -357,12 +357,21 @@ static constexpr ov::Property<bool> weightless_blob{"NPU_WEIGHTLESS_BLOB"};
  *
  * The base serializer is the OV implementation of the "XmlSerializer" without any extensions. All weights are copied in
  * a separate buffer. By turning this off, the NPU extension of the serializer is enabled. This allows optimizing the
- * process by avoiding copies into a separate weights buffer. However, this solution may be less reliable.
- *
- * @note This option doesn't actually do anything right now, it has been registered in advance.
+ * process by reducing the amount of weights that will be copied in a separate buffer. However, this solution may be
+ * less reliable.
  */
 static constexpr ov::Property<bool> use_base_model_serializer{"NPU_USE_BASE_MODEL_SERIALIZER"};
 
+/**
+ * @brief [Only for NPU Plugin]
+ * Type: size_t. Default is 0.
+ *
+ * Effective only if "use_base_model_serializer" is set to false. All "ov::Constant" buffers smaller than this value
+ * (byte size) will be copied in a separate buffer. The rest of the weights will be reconstructed at deserialization
+ * time using buffer pointers.
+ */
+static constexpr ov::Property<size_t> serialization_weights_size_threshold{"NPU_SERIALIZATION_WEIGHTS_SIZE_THRESHOLD"};
+
 /**
  * @brief [Experimental, only for NPU Plugin]
  * Type: integer.
 
@@ -0,0 +1,46 @@
+// Copyright (C) 2025 Intel Corporation.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string_view>
+
+#include "openvino/core/runtime_attribute.hpp"
+
+namespace intel_npu {
+
+/**
+ * @brief Attribute containing the memory address of a weights buffer and the size of the buffer in bytes.
+ * @details Used as part of the serialization/deserialization algorithms in order to allow processing models without
+ * copying weights.
+ */
+class WeightsPointerAttribute : public ov::RuntimeAttribute {
+public:
+    OPENVINO_RTTI("WeightsPointerAttribute", "0", RuntimeAttribute);
+
+    WeightsPointerAttribute() = delete;
+
+    WeightsPointerAttribute(const void* pointer, const size_t size)
+        : memory_pointer(reinterpret_cast<size_t>(pointer)),
+          byte_size(size) {}
+
+    /**
+     * @note The names of the attributes have been kept short in order to save some memory (there may be a lot of
+     * "ov::Constant" nodes in a model). While deserializing, the name of the attribute ("WeightsPointerAttribute") is
+     * also used as part of identification in order to avoid collision.
+     */
+    static constexpr const std::string_view POINTER_KEY = "mp";
+    static constexpr const std::string_view BYTE_SIZE_KEY = "ms";
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override {
+        visitor.on_attribute(POINTER_KEY.data(), memory_pointer);
+        visitor.on_attribute(BYTE_SIZE_KEY.data(), byte_size);
+        return true;
+    }
+
+    size_t memory_pointer;
+    size_t byte_size;
+};
+
+}  // namespace intel_npu
@@ -4,14 +4,15 @@
 
 #pragma once
 
+#include "intel_npu/common/filtered_config.hpp"
 #include "intel_npu/common/igraph.hpp"
 
 namespace intel_npu {
 
 class ICompilerAdapter {
 public:
     virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
-                                            const Config& config) const = 0;
+                                            const FilteredConfig& config) const = 0;
 
     /**
      * @brief Compiles the model, weights separation enabled.
@@ -27,7 +28,8 @@ class ICompilerAdapter {
      * "icompiler.hpp".
      * @return A "WeightlessGraph" type of object.
      */
-    virtual std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const = 0;
+    virtual std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
+                                              const FilteredConfig& config) const = 0;
 
     /**
      * @brief Parses the provided binary objects and returns a wrapper over the resulted L0 handles. The model may also
@@ -44,11 +46,12 @@ class ICompilerAdapter {
      */
     virtual std::shared_ptr<IGraph> parse(
         ov::Tensor mainBlob,
-        const Config& config,
+        const FilteredConfig& config,
         std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
         const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const = 0;
 
-    virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
+    virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
+                                      const FilteredConfig& config) const = 0;
     virtual uint32_t get_version() const = 0;
     virtual std::vector<std::string> get_supported_options() const = 0;
     virtual bool is_option_supported(std::string optname) const = 0;
 
@@ -25,6 +25,7 @@ target_link_libraries(${TARGET_NAME}
     PRIVATE
         openvino::npu_al
         openvino::npu_common
+        openvino::xml_util
 )
 
 #
 
@@ -75,6 +75,11 @@ class writer_streambuf final : public std::streambuf {
         }
     }
 
+    pos_type seekpos(pos_type pos, std::ios_base::openmode which) override {
+        writeIt = startIt + pos;
+        return pos;
+    }
+
     OutputIt startIt;
     OutputIt writeIt;
 };
 
@@ -10,6 +10,7 @@
 #include "intel_npu/config/config.hpp"
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
+#include "vcl_serializer.hpp"
 #include "ze_graph_ext_wrappers.hpp"
 
 namespace intel_npu {
@@ -18,17 +19,20 @@ class DriverCompilerAdapter final : public ICompilerAdapter {
 public:
     DriverCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);
 
-    std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+    std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
+                                    const FilteredConfig& config) const override;
 
-    std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;
+    std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
+                                      const FilteredConfig& config) const override;
 
     std::shared_ptr<IGraph> parse(
         ov::Tensor mainBlob,
-        const Config& config,
+        const FilteredConfig& config,
         std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
         const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
 
-    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
+                              const FilteredConfig& config) const override;
 
     std::vector<std::string> get_supported_options() const override;
 
 
@@ -19,17 +19,20 @@ class PluginCompilerAdapter final : public ICompilerAdapter {
 public:
     PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);
 
-    std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+    std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
+                                    const FilteredConfig& config) const override;
 
-    std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;
+    std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
+                                      const FilteredConfig& config) const override;
 
     std::shared_ptr<IGraph> parse(
         ov::Tensor mainBlob,
-        const Config& config,
+        const FilteredConfig& config,
         std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
         const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
 
-    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
+                              const FilteredConfig& config) const override;
 
     std::vector<std::string> get_supported_options() const override;
Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@ target_link_libraries(${TARGET_NAME}`
`25`	`25`	`PRIVATE`
`26`	`26`	`openvino::npu_al`
`27`	`27`	`openvino::npu_common`
	`28`	`+ openvino::xml_util`
`28`	`29`	`)`
`29`	`30`
`30`	`31`	`#`
Original file line number	Diff line number	Diff line change
`@@ -75,6 +75,11 @@ class writer_streambuf final : public std::streambuf {`
`75`	`75`	`}`
`76`	`76`	`}`
`77`	`77`
	`78`	`+ pos_type seekpos(pos_type pos, std::ios_base::openmode which) override {`
	`79`	`+ writeIt = startIt + pos;`
	`80`	`+ return pos;`
	`81`	`+ }`
	`82`	`+`
`78`	`83`	`OutputIt startIt;`
`79`	`84`	`OutputIt writeIt;`
`80`	`85`	`};`