diff --git a/CUDADataFormats/Common/interface/HeterogeneousSoA.h b/CUDADataFormats/Common/interface/HeterogeneousSoA.h
new file mode 100644
index 0000000000000..3f2a551bc320f
--- /dev/null
+++ b/CUDADataFormats/Common/interface/HeterogeneousSoA.h
@@ -0,0 +1,189 @@
+#ifndef CUDADataFormatsCommonHeterogeneousSoA_H
+#define CUDADataFormatsCommonHeterogeneousSoA_H
+
+#include <cassert>
+
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+
+// a heterogeneous unique pointer...
+template <typename T>
+class HeterogeneousSoA {
+public:
+  using Product = T;
+
+  HeterogeneousSoA() = default;  // make root happy
+  ~HeterogeneousSoA() = default;
+  HeterogeneousSoA(HeterogeneousSoA &&) = default;
+  HeterogeneousSoA &operator=(HeterogeneousSoA &&) = default;
+
+  explicit HeterogeneousSoA(cms::cuda::device::unique_ptr<T> &&p) : dm_ptr(std::move(p)) {}
+  explicit HeterogeneousSoA(cms::cuda::host::unique_ptr<T> &&p) : hm_ptr(std::move(p)) {}
+  explicit HeterogeneousSoA(std::unique_ptr<T> &&p) : std_ptr(std::move(p)) {}
+
+  auto const *get() const { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
+
+  auto const &operator*() const { return *get(); }
+
+  auto const *operator->() const { return get(); }
+
+  auto *get() { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
+
+  auto &operator*() { return *get(); }
+
+  auto *operator->() { return get(); }
+
+  // in reality valid only for GPU version...
+  cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const {
+    assert(dm_ptr);
+    auto ret = cms::cuda::make_host_unique<T>(stream);
+    cudaCheck(cudaMemcpyAsync(ret.get(), dm_ptr.get(), sizeof(T), cudaMemcpyDefault, stream));
+    return ret;
+  }
+
+private:
+  // a union wan't do it, a variant will not be more efficienct
+  cms::cuda::device::unique_ptr<T> dm_ptr;  //!
+  cms::cuda::host::unique_ptr<T> hm_ptr;    //!
+  std::unique_ptr<T> std_ptr;               //!
+};
+
+namespace cms {
+  namespace cudacompat {
+
+    struct GPUTraits {
+      template <typename T>
+      using unique_ptr = cms::cuda::device::unique_ptr<T>;
+
+      template <typename T>
+      static auto make_unique(cudaStream_t stream) {
+        return cms::cuda::make_device_unique<T>(stream);
+      }
+
+      template <typename T>
+      static auto make_unique(size_t size, cudaStream_t stream) {
+        return cms::cuda::make_device_unique<T>(size, stream);
+      }
+
+      template <typename T>
+      static auto make_host_unique(cudaStream_t stream) {
+        return cms::cuda::make_host_unique<T>(stream);
+      }
+
+      template <typename T>
+      static auto make_device_unique(cudaStream_t stream) {
+        return cms::cuda::make_device_unique<T>(stream);
+      }
+
+      template <typename T>
+      static auto make_device_unique(size_t size, cudaStream_t stream) {
+        return cms::cuda::make_device_unique<T>(size, stream);
+      }
+    };
+
+    struct HostTraits {
+      template <typename T>
+      using unique_ptr = cms::cuda::host::unique_ptr<T>;
+
+      template <typename T>
+      static auto make_unique(cudaStream_t stream) {
+        return cms::cuda::make_host_unique<T>(stream);
+      }
+
+      template <typename T>
+      static auto make_host_unique(cudaStream_t stream) {
+        return cms::cuda::make_host_unique<T>(stream);
+      }
+
+      template <typename T>
+      static auto make_device_unique(cudaStream_t stream) {
+        return cms::cuda::make_device_unique<T>(stream);
+      }
+
+      template <typename T>
+      static auto make_device_unique(size_t size, cudaStream_t stream) {
+        return cms::cuda::make_device_unique<T>(size, stream);
+      }
+    };
+
+    struct CPUTraits {
+      template <typename T>
+      using unique_ptr = std::unique_ptr<T>;
+
+      template <typename T>
+      static auto make_unique(cudaStream_t) {
+        return std::make_unique<T>();
+      }
+
+      template <typename T>
+      static auto make_unique(size_t size, cudaStream_t) {
+        return std::make_unique<T>(size);
+      }
+
+      template <typename T>
+      static auto make_host_unique(cudaStream_t) {
+        return std::make_unique<T>();
+      }
+
+      template <typename T>
+      static auto make_device_unique(cudaStream_t) {
+        return std::make_unique<T>();
+      }
+
+      template <typename T>
+      static auto make_device_unique(size_t size, cudaStream_t) {
+        return std::make_unique<T>(size);
+      }
+    };
+
+  }  // namespace cudacompat
+}  // namespace cms
+
+// a heterogeneous unique pointer (of a different sort) ...
+template <typename T, typename Traits>
+class HeterogeneousSoAImpl {
+public:
+  template <typename V>
+  using unique_ptr = typename Traits::template unique_ptr<V>;
+
+  HeterogeneousSoAImpl() = default;  // make root happy
+  ~HeterogeneousSoAImpl() = default;
+  HeterogeneousSoAImpl(HeterogeneousSoAImpl &&) = default;
+  HeterogeneousSoAImpl &operator=(HeterogeneousSoAImpl &&) = default;
+
+  explicit HeterogeneousSoAImpl(unique_ptr<T> &&p) : m_ptr(std::move(p)) {}
+  explicit HeterogeneousSoAImpl(cudaStream_t stream);
+
+  T const *get() const { return m_ptr.get(); }
+
+  T *get() { return m_ptr.get(); }
+
+  cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const;
+
+private:
+  unique_ptr<T> m_ptr;  //!
+};
+
+template <typename T, typename Traits>
+HeterogeneousSoAImpl<T, Traits>::HeterogeneousSoAImpl(cudaStream_t stream) {
+  m_ptr = Traits::template make_unique<T>(stream);
+}
+
+// in reality valid only for GPU version...
+template <typename T, typename Traits>
+cms::cuda::host::unique_ptr<T> HeterogeneousSoAImpl<T, Traits>::toHostAsync(cudaStream_t stream) const {
+  auto ret = cms::cuda::make_host_unique<T>(stream);
+  cudaCheck(cudaMemcpyAsync(ret.get(), get(), sizeof(T), cudaMemcpyDefault, stream));
+  return ret;
+}
+
+template <typename T>
+using HeterogeneousSoAGPU = HeterogeneousSoAImpl<T, cms::cudacompat::GPUTraits>;
+template <typename T>
+using HeterogeneousSoACPU = HeterogeneousSoAImpl<T, cms::cudacompat::CPUTraits>;
+template <typename T>
+using HeterogeneousSoAHost = HeterogeneousSoAImpl<T, cms::cudacompat::HostTraits>;
+
+#endif
diff --git a/CUDADataFormats/Common/interface/HostProduct.h b/CUDADataFormats/Common/interface/HostProduct.h
new file mode 100644
index 0000000000000..63a152298e42b
--- /dev/null
+++ b/CUDADataFormats/Common/interface/HostProduct.h
@@ -0,0 +1,29 @@
+#ifndef CUDADataFormatsCommonHostProduct_H
+#define CUDADataFormatsCommonHostProduct_H
+
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+
+// a heterogeneous unique pointer...
+template <typename T>
+class HostProduct {
+public:
+  HostProduct() = default;  // make root happy
+  ~HostProduct() = default;
+  HostProduct(HostProduct&&) = default;
+  HostProduct& operator=(HostProduct&&) = default;
+
+  explicit HostProduct(cms::cuda::host::unique_ptr<T>&& p) : hm_ptr(std::move(p)) {}
+  explicit HostProduct(std::unique_ptr<T>&& p) : std_ptr(std::move(p)) {}
+
+  auto const* get() const { return hm_ptr ? hm_ptr.get() : std_ptr.get(); }
+
+  auto const& operator*() const { return *get(); }
+
+  auto const* operator->() const { return get(); }
+
+private:
+  cms::cuda::host::unique_ptr<T> hm_ptr;  //!
+  std::unique_ptr<T> std_ptr;             //!
+};
+
+#endif
diff --git a/CUDADataFormats/StdDictionaries/BuildFile.xml b/CUDADataFormats/StdDictionaries/BuildFile.xml
new file mode 100644
index 0000000000000..0a1542b3b05c6
--- /dev/null
+++ b/CUDADataFormats/StdDictionaries/BuildFile.xml
@@ -0,0 +1,5 @@
+<use name="rootcore"/>
+<use name="HeterogeneousCore/CUDAUtilities"/>
+<export>
+  <lib name="1"/>
+</export>
diff --git a/CUDADataFormats/StdDictionaries/src/classes.h b/CUDADataFormats/StdDictionaries/src/classes.h
new file mode 100644
index 0000000000000..9c4e1d810c3c2
--- /dev/null
+++ b/CUDADataFormats/StdDictionaries/src/classes.h
@@ -0,0 +1,4 @@
+#include <cstddef>
+#include <vector>
+
+#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
diff --git a/CUDADataFormats/StdDictionaries/src/classes_def.xml b/CUDADataFormats/StdDictionaries/src/classes_def.xml
new file mode 100644
index 0000000000000..7060ec91efa1a
--- /dev/null
+++ b/CUDADataFormats/StdDictionaries/src/classes_def.xml
@@ -0,0 +1,14 @@
+<lcgdict>
+    <class name="std::vector<std::byte, cms::cuda::HostAllocator<std::byte, 0>>" />
+
+    <class name="std::vector<uint8_t, cms::cuda::HostAllocator<uint8_t, 0>>" />
+    <class name="std::vector<uint16_t, cms::cuda::HostAllocator<uint16_t, 0>>" />
+    <class name="std::vector<uint32_t, cms::cuda::HostAllocator<uint32_t, 0>>" />
+
+    <class name="std::vector<int8_t, cms::cuda::HostAllocator<int8_t, 0>>" />
+    <class name="std::vector<int16_t, cms::cuda::HostAllocator<int16_t, 0>>" />
+    <class name="std::vector<int32_t, cms::cuda::HostAllocator<int32_t, 0>>" />
+
+    <class name="std::vector<float, cms::cuda::HostAllocator<float, 0>>" />
+    <class name="std::vector<double, cms::cuda::HostAllocator<double, 0>>" />
+</lcgdict>