diff --git a/src/alpaka/AlpakaCore/CachingDeviceAllocator.h b/src/alpaka/AlpakaCore/CachingDeviceAllocator.h
new file mode 100644
index 000000000..9d1407ac9
--- /dev/null
+++ b/src/alpaka/AlpakaCore/CachingDeviceAllocator.h
@@ -0,0 +1,444 @@
+#ifndef HeterogenousCore_AlpakaUtilities_src_CachingDeviceAllocator_h
+#define HeterogenousCore_AlpakaUtilities_src_CachingDeviceAllocator_h
+
+/******************************************************************************
+ * Simple caching allocator for device memory allocations. The allocator is
+ * thread-safe and capable of managing device allocations on multiple devices.
+ ******************************************************************************/
+
+#include <cmath>
+#include <unordered_set>
+#include <memory>
+#include <mutex>
+
+#include "AlpakaCore/alpakaMemoryHelper.h"
+#include "AlpakaCore/deviceAllocatorStatus.h"
+
+/// cms::alpakatools::allocator namespace
+namespace cms::alpakatools::allocator {
+
+  /**
+ * \addtogroup UtilMgmt
+ * @{
+ */
+
+  /******************************************************************************
+ * CachingDeviceAllocator (host use)
+ ******************************************************************************/
+
+  /**
+ * \brief A simple caching allocator for device memory allocations.
+ *
+ * \par Overview
+ * The allocator is thread-safe and is capable of managing cached
+ * device allocations on multiple devices. It behaves as follows:
+ *
+ * \par
+ * - Allocations are categorized and cached by bin size.  A new allocation request of
+ *   a given size will only consider cached allocations within the corresponding bin.
+ * - Bin limits progress geometrically in accordance with the growth factor
+ *   \p bin_growth provided during construction.  Unused device allocations within
+ *   a larger bin cache are not reused for allocation requests that categorize to
+ *   smaller bin sizes.
+ * - Allocation requests below (\p bin_growth ^ \p min_bin) are rounded up to
+ *   (\p bin_growth ^ \p min_bin).
+ * - Allocations above (\p bin_growth ^ \p max_bin) are not rounded up to the nearest
+ *   bin and are simply freed when they are deallocated instead of being returned
+ *   to a bin-cache.
+ * - %If the total storage of cached allocations on a given device will exceed
+ *   \p max_cached_bytes, allocations for that device are simply freed when they are
+ *   deallocated instead of being returned to their bin-cache.
+ *
+ * \par
+ * For example, the default-constructed CachingDeviceAllocator is configured with:
+ * - \p bin_growth          = 8
+ * - \p min_bin             = 3
+ * - \p max_bin             = 7
+ * - \p max_cached_bytes    = 6MB - 1B
+ *
+ * \par
+ * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB
+ * and sets a maximum of 6,291,455 cached bytes per device
+ *
+ */
+  struct CachingDeviceAllocator {
+    //---------------------------------------------------------------------
+    // Constants
+    //---------------------------------------------------------------------
+
+    /// Out-of-bounds bin
+    static const unsigned int INVALID_BIN = (unsigned int)-1;
+
+    /// Invalid size
+    static const size_t INVALID_SIZE = (size_t)-1;
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS  // Do not document
+    //---------------------------------------------------------------------
+    // Type definitions and helper types
+    //---------------------------------------------------------------------
+
+    /**
+     * Descriptor for device memory allocations
+     */
+    struct BlockDescriptor {
+      ::ALPAKA_ACCELERATOR_NAMESPACE::AlpakaDeviceBuf<std::byte> buf;  // Device buffer
+      size_t bytes;                                                    // Size of allocation in bytes
+      size_t bytesRequested;  // CMS: requested allocation size (for monitoring only)
+      unsigned int bin;       // Bin enumeration
+
+      // Constructor (suitable for searching maps for a block, given a device and bytes)
+      BlockDescriptor(unsigned int block_bin,
+                      size_t block_bytes,
+                      size_t bytes_requested,
+                      const ::ALPAKA_ACCELERATOR_NAMESPACE::Device& device)
+          : buf{cms::alpakatools::allocDeviceBuf<std::byte>(device, 0u)},
+            bytes{block_bytes},
+            bytesRequested{bytes_requested},  // CMS
+            bin{block_bin} {}
+
+      // Constructor (suitable for searching maps for a specific block, given a device buffer)
+      BlockDescriptor(::ALPAKA_ACCELERATOR_NAMESPACE::AlpakaDeviceBuf<std::byte> buffer)
+          : buf{std::move(buffer)},
+            bytes{0},
+            bytesRequested{0},  // CMS
+            bin{INVALID_BIN} {}
+    };
+
+    struct BlockHashByBytes {
+      size_t operator()(const BlockDescriptor& descriptor) const {
+        size_t h1 = std::hash<int>{}(getIdxOfDev(alpaka::getDev(descriptor.buf)));
+        size_t h2 = std::hash<size_t>{}(descriptor.bytes);
+        return h1 ^ (h2 << 1);
+      }
+    };
+
+    struct BlockEqualByBytes {
+      bool operator()(const BlockDescriptor& a, const BlockDescriptor& b) const {
+        return (getIdxOfDev(alpaka::getDev(a.buf)) == getIdxOfDev(alpaka::getDev(b.buf)) && a.bytes == b.bytes);
+      }
+    };
+
+    struct BlockHashByPtr {
+      size_t operator()(const BlockDescriptor& descriptor) const {
+        size_t h1 = std::hash<int>{}(getIdxOfDev(alpaka::getDev(descriptor.buf)));
+        size_t h2 = std::hash<const std::byte*>{}(alpaka::getPtrNative(descriptor.buf));
+        return h1 ^ (h2 << 1);
+      }
+    };
+
+    struct BlockEqualByPtr {
+      bool operator()(const BlockDescriptor& a, const BlockDescriptor& b) const {
+        return (getIdxOfDev(alpaka::getDev(a.buf)) == getIdxOfDev(alpaka::getDev(b.buf)) &&
+                alpaka::getPtrNative(a.buf) == alpaka::getPtrNative(b.buf));
+      }
+    };
+
+    // CMS: Moved TotalBytes to deviceAllocatorStatus.h
+
+    /// Set type for cached blocks (hashed by size)
+    using CachedBlocks = std::unordered_multiset<BlockDescriptor, BlockHashByBytes, BlockEqualByBytes>;
+
+    /// Set type for live blocks (hashed by ptr)
+    using BusyBlocks = std::unordered_multiset<BlockDescriptor, BlockHashByPtr, BlockEqualByPtr>;
+
+    // CMS: Moved DeviceCachedBytes to deviceAllocatorStatus.h
+
+    //---------------------------------------------------------------------
+    // Utility functions
+    //---------------------------------------------------------------------
+
+    /**
+     * Integer pow function for unsigned base and exponent
+     */
+    static constexpr unsigned int IntPow(unsigned int base, unsigned int exp) {
+      unsigned int retval = 1;
+      while (exp > 0) {
+        if (exp & 1) {
+          retval = retval * base;  // multiply the result by the current base
+        }
+        base = base * base;  // square the base
+        exp = exp >> 1;      // divide the exponent in half
+      }
+      return retval;
+    }
+
+    /**
+     * Round up to the nearest power-of
+     */
+    std::pair<unsigned int, size_t> NearestPowerOf(unsigned int base, size_t value) {
+      unsigned int power = 0;
+      size_t rounded_bytes = 1;
+
+      if (value * base < value) {
+        // Overflow
+        power = sizeof(size_t) * 8;
+        rounded_bytes = size_t(0) - 1;
+      } else {
+        while (rounded_bytes < value) {
+          rounded_bytes *= base;
+          power++;
+        }
+      }
+
+      return {power, rounded_bytes};
+    }
+
+    //---------------------------------------------------------------------
+    // Fields
+    //---------------------------------------------------------------------
+
+    // CMS: use std::mutex instead of cub::Mutex, declare mutable
+    mutable std::mutex mutex;  /// Mutex for thread-safety
+
+    unsigned int bin_growth;  /// Geometric growth factor for bin-sizes
+    unsigned int min_bin;     /// Minimum bin enumeration
+    unsigned int max_bin;     /// Maximum bin enumeration
+
+    size_t min_bin_bytes;     /// Minimum bin size
+    size_t max_bin_bytes;     /// Maximum bin size
+    size_t max_cached_bytes;  /// Maximum aggregate cached bytes per device
+
+    bool debug;  /// Whether or not to print (de)allocation events to stdout
+
+    DeviceCachedBytes cached_bytes;  /// Map of device to aggregate cached bytes on that device
+    CachedBlocks cached_blocks;      /// Set of cached device allocations available for reuse
+    BusyBlocks live_blocks;          /// Set of live device allocations currently in use
+
+#endif  // DOXYGEN_SHOULD_SKIP_THIS
+
+    //---------------------------------------------------------------------
+    // Methods
+    //---------------------------------------------------------------------
+
+    /**
+     * \brief Constructor.
+     */
+    CachingDeviceAllocator(
+        unsigned int bin_growth,                 ///< Geometric growth factor for bin-sizes
+        unsigned int min_bin = 1,                ///< Minimum bin (default is bin_growth ^ 1)
+        unsigned int max_bin = INVALID_BIN,      ///< Maximum bin (default is no max bin)
+        size_t max_cached_bytes = INVALID_SIZE,  ///< Maximum aggregate cached bytes per device (default is no limit)
+        bool debug = false)  ///< Whether or not to print (de)allocation events to stdout (default is no stderr output)
+        : bin_growth(bin_growth),
+          min_bin(min_bin),
+          max_bin(max_bin),
+          min_bin_bytes(IntPow(bin_growth, min_bin)),
+          max_bin_bytes(IntPow(bin_growth, max_bin)),
+          max_cached_bytes(max_cached_bytes),
+          debug(debug) {}
+
+    /**
+     * \brief Default constructor.
+     *
+     * Configured with:
+     * \par
+     * - \p bin_growth          = 8
+     * - \p min_bin             = 3
+     * - \p max_bin             = 7
+     * - \p max_cached_bytes    = (\p bin_growth ^ \p max_bin) * 3) - 1 = 6,291,455 bytes
+     *
+     * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB and
+     * sets a maximum of 6,291,455 cached bytes per device
+     */
+    CachingDeviceAllocator(/*bool skip_cleanup = false, */ bool debug = false)
+        : bin_growth(8),
+          min_bin(3),
+          max_bin(7),
+          min_bin_bytes(IntPow(bin_growth, min_bin)),
+          max_bin_bytes(IntPow(bin_growth, max_bin)),
+          max_cached_bytes((max_bin_bytes * 3) - 1),
+          debug(debug) {}
+
+    /**
+     * \brief Sets the limit on the number bytes this allocator is allowed to cache per device.
+     *
+     * Changing the ceiling of cached bytes does not cause any allocations (in-use or
+     * cached-in-reserve) to be freed.  See \p FreeAllCached().
+     */
+    void SetMaxCachedBytes(size_t max_cached_bytes) {
+      // Lock
+      std::unique_lock mutex_locker(mutex);
+
+      if (debug) {
+        // CMS: use raw printf
+        printf("Changing max_cached_bytes (%lld -> %lld)\n",
+               (long long)this->max_cached_bytes,
+               (long long)max_cached_bytes);
+      }
+
+      this->max_cached_bytes = max_cached_bytes;
+
+      mutex_locker.unlock();
+    }
+
+    /**
+     * \brief Provides a suitable allocation of device memory for the given size on the specified device.
+     *
+     * Once freed, the allocation becomes available immediately for reuse.
+     */
+    auto DeviceAllocate(size_t bytes,  ///< [in] Minimum no. of bytes for the allocation
+                        const ::ALPAKA_ACCELERATOR_NAMESPACE::Device&
+                            device)  ///< [in] The device to be associated with this allocation
+    {
+      std::unique_lock<std::mutex> mutex_locker(mutex, std::defer_lock);
+      int device_idx = getIdxOfDev(device);
+
+      // Create a block descriptor for the requested allocation
+      bool found = false;
+      auto [bin, bin_bytes] = NearestPowerOf(bin_growth, bytes);
+      BlockDescriptor search_key{bin, bin_bytes, bytes, device};
+
+      if (search_key.bin > max_bin) {
+        // Bin is greater than our maximum bin: allocate the request
+        // exactly and give out-of-bounds bin.  It will not be cached
+        // for reuse when returned.
+        search_key.bin = INVALID_BIN;
+        search_key.bytes = bytes;
+      } else {
+        // Search for a suitable cached allocation: lock
+        mutex_locker.lock();
+
+        if (search_key.bin < min_bin) {
+          // Bin is less than minimum bin: round up
+          search_key.bin = min_bin;
+          search_key.bytes = min_bin_bytes;
+        }
+
+        // Find a cached block on the same device in the same bin
+        auto block_itr = cached_blocks.find(search_key);
+        if (block_itr != cached_blocks.end()) {
+          // Reuse existing cache block.  Insert into live blocks.
+          found = true;
+          search_key = *block_itr;
+          live_blocks.insert(search_key);
+
+          // Remove from free blocks
+          cached_bytes[device_idx].free -= search_key.bytes;
+          cached_bytes[device_idx].live += search_key.bytes;
+          cached_bytes[device_idx].liveRequested += search_key.bytesRequested;  // CMS
+
+          if (debug) {
+            // CMS: improved debug message
+            // CMS: use raw printf
+            printf("\tDevice %d reused cached block at %p (%lld bytes).\n",
+                   device_idx,
+                   alpaka::getPtrNative(search_key.buf),
+                   (long long)search_key.bytes);
+          }
+
+          cached_blocks.erase(block_itr);
+        }
+        // Done searching: unlock
+        mutex_locker.unlock();
+      }
+
+      // Allocate the block if necessary
+      if (!found) {
+        search_key.buf = alpaka::allocBuf<std::byte, alpaka_common::Idx>(
+            device, static_cast<alpaka_common::Extent>(search_key.bytes));
+#if CUDA_VERSION >= 11020
+        alpaka::prepareForAsyncCopy(search_key.buf);
+#endif
+
+        // Insert into live blocks
+        mutex_locker.lock();
+        live_blocks.insert(search_key);
+        cached_bytes[device_idx].live += search_key.bytes;
+        cached_bytes[device_idx].liveRequested += search_key.bytesRequested;  // CMS
+        mutex_locker.unlock();
+
+        if (debug) {
+          // CMS: improved debug message
+          // CMS: use raw printf
+          printf("\tDevice %d allocated new device block at %p (%lld bytes).\n",
+                 device_idx,
+                 alpaka::getPtrNative(search_key.buf),
+                 (long long)search_key.bytes);
+        }
+      }
+
+      if (debug) {
+        // CMS: use raw printf
+        printf("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n",
+               (long long)cached_blocks.size(),
+               (long long)cached_bytes[device_idx].free,
+               (long long)live_blocks.size(),
+               (long long)cached_bytes[device_idx].live);
+      }
+
+      return search_key.buf;
+    }
+
+    /**
+     * \brief Frees a live allocation of device memory on the specified device, returning it to the allocator.
+     */
+    void DeviceFree(const ::ALPAKA_ACCELERATOR_NAMESPACE::AlpakaDeviceBuf<std::byte>& buf) {
+      // Lock
+      std::unique_lock<std::mutex> mutex_locker(mutex);
+
+      bool recached = false;
+      int device_idx = getIdxOfDev(alpaka::getDev(buf));
+      // Find corresponding block descriptor
+      BlockDescriptor search_key{buf};
+      auto block_itr = live_blocks.find(search_key);
+      if (block_itr != live_blocks.end()) {
+        // Remove from live blocks
+        search_key = *block_itr;
+        live_blocks.erase(block_itr);
+        cached_bytes[device_idx].live -= search_key.bytes;
+        cached_bytes[device_idx].liveRequested -= search_key.bytesRequested;  // CMS
+
+        // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold
+        if ((search_key.bin != INVALID_BIN) && (cached_bytes[device_idx].free + search_key.bytes <= max_cached_bytes)) {
+          recached = true;
+          // Insert returned allocation into free blocks
+          cached_blocks.insert(search_key);
+          cached_bytes[device_idx].free += search_key.bytes;
+
+          if (debug) {
+            // CMS: improved debug message
+            // CMS: use raw printf
+            printf(
+                "\tDevice %d returned %lld bytes at %p.\n\t\t %lld available "
+                "blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
+                device_idx,
+                (long long)search_key.bytes,
+                alpaka::getPtrNative(search_key.buf),
+                (long long)cached_blocks.size(),
+                (long long)cached_bytes[device_idx].free,
+                (long long)live_blocks.size(),
+                (long long)cached_bytes[device_idx].live);
+          }
+        }
+      }
+
+      // Unlock
+      mutex_locker.unlock();
+
+      if (!recached and debug) {
+        // CMS: improved debug message
+        printf(
+            "\tDevice %d freed %lld bytes at %p.\n\t\t  %lld available "
+            "blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
+            device_idx,
+            (long long)search_key.bytes,
+            alpaka::getPtrNative(search_key.buf),
+            (long long)cached_blocks.size(),
+            (long long)cached_bytes[device_idx].free,
+            (long long)live_blocks.size(),
+            (long long)cached_bytes[device_idx].live);
+      }
+    }
+
+    // CMS: give access to cache allocation status
+    DeviceCachedBytes CacheStatus() const {
+      std::unique_lock mutex_locker(mutex);
+      return cached_bytes;
+    }
+  };
+
+  /** @} */  // end group UtilMgmt
+
+}  // namespace cms::alpakatools::allocator
+
+#endif
diff --git a/src/alpaka/AlpakaCore/CachingHostAllocator.h b/src/alpaka/AlpakaCore/CachingHostAllocator.h
new file mode 100644
index 000000000..17d63f3a5
--- /dev/null
+++ b/src/alpaka/AlpakaCore/CachingHostAllocator.h
@@ -0,0 +1,398 @@
+#ifndef HeterogenousCore_AlpakaUtilities_src_CachingHostAllocator_h
+#define HeterogenousCore_AlpakaUtilities_src_CachingHostAllocator_h
+
+/******************************************************************************
+ * Simple caching allocator for pinned host memory allocations. The allocator is
+ * thread-safe.
+ ******************************************************************************/
+
+#include <cmath>
+#include <memory>
+#include <unordered_set>
+#include <mutex>
+
+#include "AlpakaCore/alpakaMemoryHelper.h"
+#include "AlpakaCore/deviceAllocatorStatus.h"
+
+/// cms::alpaka::allocator namespace
+namespace cms::alpakatools::allocator {
+
+  /**
+ * \addtogroup UtilMgmt
+ * @{
+ */
+
+  /******************************************************************************
+ * CachingHostAllocator (host use)
+ ******************************************************************************/
+
+  /**
+ * \brief A simple caching allocator pinned host memory allocations.
+ *
+ * \par Overview
+ * The allocator is thread-safe.  It behaves as follows:
+ *
+ * \par
+ * - Allocations are categorized and cached by bin size.  A new allocation request of
+ *   a given size will only consider cached allocations within the corresponding bin.
+ * - Bin limits progress geometrically in accordance with the growth factor
+ *   \p bin_growth provided during construction.  Unused host allocations within
+ *   a larger bin cache are not reused for allocation requests that categorize to
+ *   smaller bin sizes.
+ * - Allocation requests below (\p bin_growth ^ \p min_bin) are rounded up to
+ *   (\p bin_growth ^ \p min_bin).
+ * - Allocations above (\p bin_growth ^ \p max_bin) are not rounded up to the nearest
+ *   bin and are simply freed when they are deallocated instead of being returned
+ *   to a bin-cache.
+ * - %If the total storage of cached allocations  will exceed
+ *   \p max_cached_bytes, allocations are simply freed when they are
+ *   deallocated instead of being returned to their bin-cache.
+ *
+ * \par
+ * For example, the default-constructed CachingHostAllocator is configured with:
+ * - \p bin_growth          = 8
+ * - \p min_bin             = 3
+ * - \p max_bin             = 7
+ * - \p max_cached_bytes    = 6MB - 1B
+ *
+ * \par
+ * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB
+ * and sets a maximum of 6,291,455 cached bytes
+ *
+ */
+
+  struct CachingHostAllocator {
+    //---------------------------------------------------------------------
+    // Constants
+    //---------------------------------------------------------------------
+
+    /// Out-of-bounds bin
+    static const unsigned int INVALID_BIN = (unsigned int)-1;
+
+    /// Invalid size
+    static const size_t INVALID_SIZE = (size_t)-1;
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS  // Do not document
+
+    //---------------------------------------------------------------------
+    // Type definitions and helper types
+    //---------------------------------------------------------------------
+
+    /**
+     * Descriptor for pinned host memory allocations
+     */
+    struct BlockDescriptor {
+      alpaka_common::AlpakaHostBuf<std::byte> buf;  // Host buffer
+      size_t bytes;                                 // Size of allocation in bytes
+      unsigned int bin;                             // Bin enumeration
+
+      // Constructor (suitable for searching maps for a specific block, given a host buffer)
+      BlockDescriptor(alpaka_common::AlpakaHostBuf<std::byte> buffer)
+          : buf{std::move(buffer)}, bytes{0}, bin{INVALID_BIN} {}
+
+      // Constructor (suitable for searching maps for a block, given the bytes)
+      BlockDescriptor(unsigned int block_bin, size_t block_bytes)
+          : buf{allocHostBuf<std::byte>(0u)}, bytes{block_bytes}, bin{block_bin} {}
+    };
+
+    struct BlockHashByBytes {
+      size_t operator()(const BlockDescriptor& descriptor) const { return std::hash<size_t>{}(descriptor.bytes); }
+    };
+
+    struct BlockEqualByBytes {
+      bool operator()(const BlockDescriptor& a, const BlockDescriptor& b) const { return (a.bytes == b.bytes); }
+    };
+
+    struct BlockHashByPtr {
+      size_t operator()(const BlockDescriptor& descriptor) const {
+        return std::hash<const std::byte*>{}(alpaka::getPtrNative(descriptor.buf));
+      }
+    };
+
+    struct BlockEqualByPtr {
+      bool operator()(const BlockDescriptor& a, const BlockDescriptor& b) const {
+        return (alpaka::getPtrNative(a.buf) == alpaka::getPtrNative(b.buf));
+      }
+    };
+
+    /// Set type for cached blocks (hashed by size)
+    using CachedBlocks = std::unordered_multiset<BlockDescriptor, BlockHashByBytes, BlockEqualByBytes>;
+
+    /// Set type for live blocks (hashed by ptr)
+    using BusyBlocks = std::unordered_multiset<BlockDescriptor, BlockHashByPtr, BlockEqualByPtr>;
+
+    //---------------------------------------------------------------------
+    // Utility functions
+    //---------------------------------------------------------------------
+
+    /**
+     * Integer pow function for unsigned base and exponent
+     */
+    static unsigned int IntPow(unsigned int base, unsigned int exp) {
+      unsigned int retval = 1;
+      while (exp > 0) {
+        if (exp & 1) {
+          retval = retval * base;  // multiply the result by the current base
+        }
+        base = base * base;  // square the base
+        exp = exp >> 1;      // divide the exponent in half
+      }
+      return retval;
+    }
+
+    /**
+     * Round up to the nearest power-of
+     */
+    std::pair<unsigned int, size_t> NearestPowerOf(unsigned int base, size_t value) {
+      unsigned int power = 0;
+      size_t rounded_bytes = 1;
+
+      if (value * base < value) {
+        // Overflow
+        power = sizeof(size_t) * 8;
+        rounded_bytes = size_t(0) - 1;
+      } else {
+        while (rounded_bytes < value) {
+          rounded_bytes *= base;
+          power++;
+        }
+      }
+
+      return {power, rounded_bytes};
+    }
+
+    //---------------------------------------------------------------------
+    // Fields
+    //---------------------------------------------------------------------
+
+    std::mutex mutex;  /// Mutex for thread-safety
+
+    unsigned int bin_growth;  /// Geometric growth factor for bin-sizes
+    unsigned int min_bin;     /// Minimum bin enumeration
+    unsigned int max_bin;     /// Maximum bin enumeration
+
+    size_t min_bin_bytes;     /// Minimum bin size
+    size_t max_bin_bytes;     /// Maximum bin size
+    size_t max_cached_bytes;  /// Maximum aggregate cached bytes
+
+    bool debug;  /// Whether or not to print (de)allocation events to stdout
+
+    TotalBytes cached_bytes;     /// Aggregate cached bytes
+    CachedBlocks cached_blocks;  /// Set of cached pinned host allocations available for reuse
+    BusyBlocks live_blocks;      /// Set of live pinned host allocations currently in use
+
+#endif  // DOXYGEN_SHOULD_SKIP_THIS
+
+    //---------------------------------------------------------------------
+    // Methods
+    //---------------------------------------------------------------------
+
+    /**
+     * \brief Constructor.
+     */
+    CachingHostAllocator(
+        unsigned int bin_growth,                 ///< Geometric growth factor for bin-sizes
+        unsigned int min_bin = 1,                ///< Minimum bin (default is bin_growth ^ 1)
+        unsigned int max_bin = INVALID_BIN,      ///< Maximum bin (default is no max bin)
+        size_t max_cached_bytes = INVALID_SIZE,  ///< Maximum aggregate cached bytes (default is no limit)
+        bool debug = false)  ///< Whether or not to print (de)allocation events to stdout (default is no stderr output)
+        : bin_growth(bin_growth),
+          min_bin(min_bin),
+          max_bin(max_bin),
+          min_bin_bytes(IntPow(bin_growth, min_bin)),
+          max_bin_bytes(IntPow(bin_growth, max_bin)),
+          max_cached_bytes(max_cached_bytes),
+          debug(debug) {}
+
+    /**
+     * \brief Default constructor.
+     *
+     * Configured with:
+     * \par
+     * - \p bin_growth          = 8
+     * - \p min_bin             = 3
+     * - \p max_bin             = 7
+     * - \p max_cached_bytes    = (\p bin_growth ^ \p max_bin) * 3) - 1 = 6,291,455 bytes
+     *
+     * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB and
+     * sets a maximum of 6,291,455 cached bytes
+     */
+    CachingHostAllocator(bool debug = false)
+        : bin_growth(8),
+          min_bin(3),
+          max_bin(7),
+          min_bin_bytes(IntPow(bin_growth, min_bin)),
+          max_bin_bytes(IntPow(bin_growth, max_bin)),
+          max_cached_bytes((max_bin_bytes * 3) - 1),
+          debug(debug) {}
+
+    /**
+     * \brief Sets the limit on the number bytes this allocator is allowed to cache
+     *
+     * Changing the ceiling of cached bytes does not cause any allocations (in-use or
+     * cached-in-reserve) to be freed.  See \p FreeAllCached().
+     */
+    void SetMaxCachedBytes(size_t max_cached_bytes) {
+      // Lock
+      std::unique_lock mutex_locker(mutex);
+
+      if (debug) {
+        printf("Changing max_cached_bytes (%lld -> %lld)\n",
+               (long long)this->max_cached_bytes,
+               (long long)max_cached_bytes);
+      }
+
+      this->max_cached_bytes = max_cached_bytes;
+
+      // Unlock (redundant, kept for style uniformity)
+      mutex_locker.unlock();
+    }
+
+    /**
+     * \brief Provides a suitable allocation of pinned host memory for the given size.
+     *
+     * Once freed, the allocation becomes available immediately for reuse.
+     */
+    auto HostAllocate(size_t bytes  ///< [in] Minimum no. of bytes for the allocation
+    ) {
+      std::unique_lock<std::mutex> mutex_locker(mutex, std::defer_lock);
+
+      // Create a block descriptor for the requested allocation
+      bool found = false;
+      auto [bin, bin_bytes] = NearestPowerOf(bin_growth, bytes);
+      BlockDescriptor search_key{bin, bin_bytes};
+
+      if (search_key.bin > max_bin) {
+        // Bin is greater than our maximum bin: allocate the request
+        // exactly and give out-of-bounds bin.  It will not be cached
+        // for reuse when returned.
+        search_key.bin = INVALID_BIN;
+        search_key.bytes = bytes;
+      } else {
+        // Search for a suitable cached allocation: lock
+        mutex_locker.lock();
+
+        if (search_key.bin < min_bin) {
+          // Bin is less than minimum bin: round up
+          search_key.bin = min_bin;
+          search_key.bytes = min_bin_bytes;
+        }
+
+        // Find a cached block in the same bin
+        auto block_itr = cached_blocks.find(search_key);
+        if (block_itr != cached_blocks.end()) {
+          // Reuse existing cache block.  Insert into live blocks.
+          found = true;
+          search_key = *block_itr;
+
+          live_blocks.insert(search_key);
+
+          // Remove from free blocks
+          cached_bytes.free -= search_key.bytes;
+          cached_bytes.live += search_key.bytes;
+
+          if (debug) {
+            printf("\tHost reused cached block at %p (%lld bytes).\n",
+                   alpaka::getPtrNative(search_key.buf),
+                   (long long)search_key.bytes);
+          }
+
+          cached_blocks.erase(block_itr);
+        }
+
+        // Done searching: unlock
+        mutex_locker.unlock();
+      }
+
+      // Allocate the block if necessary
+      if (!found) {
+        // TODO: eventually support allocation flags
+        search_key.buf = allocHostBuf<std::byte>(static_cast<alpaka_common::Extent>(search_key.bytes));
+#if CUDA_VERSION >= 11020
+        alpaka::prepareForAsyncCopy(search_key.buf);
+#endif
+
+        // Insert into live blocks
+        mutex_locker.lock();
+        live_blocks.insert(search_key);
+        cached_bytes.live += search_key.bytes;
+        mutex_locker.unlock();
+
+        if (debug) {
+          printf("\tHost allocated new host block at %p (%lld bytes).\n",
+                 alpaka::getPtrNative(search_key.buf),
+                 (long long)search_key.bytes);
+        }
+      }
+
+      if (debug) {
+        printf("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n",
+               (long long)cached_blocks.size(),
+               (long long)cached_bytes.free,
+               (long long)live_blocks.size(),
+               (long long)cached_bytes.live);
+      }
+
+      return search_key.buf;
+    }
+
+    /**
+     * \brief Frees a live allocation of pinned host memory, returning it to the allocator.
+     *
+     * Once freed, the allocation becomes available immediately for reuse.
+     */
+    void HostFree(const alpaka_common::AlpakaHostBuf<std::byte>& buf) {
+      // Lock
+      std::unique_lock<std::mutex> mutex_locker(mutex);
+
+      bool recached = false;
+      // Find corresponding block descriptor
+      BlockDescriptor search_key{buf};
+      auto block_itr = live_blocks.find(search_key);
+      if (block_itr != live_blocks.end()) {
+        // Remove from live blocks
+        search_key = *block_itr;
+        live_blocks.erase(block_itr);
+        cached_bytes.live -= search_key.bytes;
+
+        // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold
+        if ((search_key.bin != INVALID_BIN) && (cached_bytes.free + search_key.bytes <= max_cached_bytes)) {
+          recached = true;
+          // Insert returned allocation into free blocks
+          cached_blocks.insert(search_key);
+          cached_bytes.free += search_key.bytes;
+
+          if (debug) {
+            printf(
+                "\tHost returned %lld bytes.\n\t\t %lld "
+                "available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n",
+                (long long)search_key.bytes,
+                (long long)cached_blocks.size(),
+                (long long)cached_bytes.free,
+                (long long)live_blocks.size(),
+                (long long)cached_bytes.live);
+          }
+        }
+      }
+
+      // Unlock
+      mutex_locker.unlock();
+
+      if (!recached and debug) {
+        printf(
+            "\tHost freed %lld bytes.\n\t\t  %lld available "
+            "blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
+            (long long)search_key.bytes,
+            (long long)cached_blocks.size(),
+            (long long)cached_bytes.free,
+            (long long)live_blocks.size(),
+            (long long)cached_bytes.live);
+      }
+    }
+  };
+
+  /** @} */  // end group UtilMgmt
+
+}  // namespace cms::alpakatools::allocator
+
+#endif
diff --git a/src/alpaka/AlpakaCore/ESProduct.h b/src/alpaka/AlpakaCore/ESProduct.h
index c478cabbb..af50fdc5b 100644
--- a/src/alpaka/AlpakaCore/ESProduct.h
+++ b/src/alpaka/AlpakaCore/ESProduct.h
@@ -20,7 +20,8 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
 
     ESProduct() : gpuDataPerDevice_(::ALPAKA_ACCELERATOR_NAMESPACE::devices.size()) {
       for (size_t i = 0; i < gpuDataPerDevice_.size(); ++i) {
-        gpuDataPerDevice_[i].m_event = ::cms::alpakatools::getEventCache<Event>().get(::ALPAKA_ACCELERATOR_NAMESPACE::devices[i]);
+        gpuDataPerDevice_[i].m_event =
+            ::cms::alpakatools::getEventCache<Event>().get(::ALPAKA_ACCELERATOR_NAMESPACE::devices[i]);
       }
     }
 
diff --git a/src/alpaka/AlpakaCore/alpaka/alpakaDevAcc.cc b/src/alpaka/AlpakaCore/alpaka/alpakaDevAcc.cc
index 626761a8d..1de7da53e 100644
--- a/src/alpaka/AlpakaCore/alpaka/alpakaDevAcc.cc
+++ b/src/alpaka/AlpakaCore/alpaka/alpakaDevAcc.cc
@@ -2,8 +2,7 @@
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
-  static
-  std::vector<ALPAKA_ACCELERATOR_NAMESPACE::Device> enumerate() {
+  static std::vector<ALPAKA_ACCELERATOR_NAMESPACE::Device> enumerate() {
     std::vector<ALPAKA_ACCELERATOR_NAMESPACE::Device> devices;
     uint32_t n = alpaka::getDevCount<ALPAKA_ACCELERATOR_NAMESPACE::Platform>();
     devices.reserve(n);
@@ -14,4 +13,4 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
   }
 
   const std::vector<Device> devices = enumerate();
-}
+}  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/AlpakaCore/alpakaMemoryHelper.h b/src/alpaka/AlpakaCore/alpakaMemoryHelper.h
index 9219b11a8..069146b78 100644
--- a/src/alpaka/AlpakaCore/alpakaMemoryHelper.h
+++ b/src/alpaka/AlpakaCore/alpakaMemoryHelper.h
@@ -34,6 +34,11 @@ namespace cms::alpakatools {
     return alpaka::ViewPlainPtr<TDevice, TData, Dim1D, Idx>(data, device, extent);
   }
 
+  template <typename TData>
+  inline size_t nbytesFromExtent(const Extent& extent) {
+    return (sizeof(TData) * extent);
+  }
+
 }  // namespace cms::alpakatools
 
 #endif  // AlpakaCore_alpakaMemoryHelper_h
diff --git a/src/alpaka/AlpakaCore/deviceAllocatorStatus.h b/src/alpaka/AlpakaCore/deviceAllocatorStatus.h
new file mode 100644
index 000000000..41b82b524
--- /dev/null
+++ b/src/alpaka/AlpakaCore/deviceAllocatorStatus.h
@@ -0,0 +1,29 @@
+#ifndef HeterogeneousCore_AlpakaUtilities_deviceAllocatorStatus_h
+#define HeterogeneousCore_AlpakaUtilities_deviceAllocatorStatus_h
+
+#include <map>
+
+#include "AlpakaCore/alpakaConfig.h"
+
+namespace cms {
+  namespace alpakatools {
+    namespace allocator {
+      struct TotalBytes {
+        size_t free;
+        size_t live;
+        size_t liveRequested;  // CMS: monitor also requested amount
+        TotalBytes() { free = live = liveRequested = 0; }
+      };
+
+      inline int getIdxOfDev(const ::ALPAKA_ACCELERATOR_NAMESPACE::Device& device) {
+        static const auto devices{alpaka::getDevs<::ALPAKA_ACCELERATOR_NAMESPACE::Platform>()};
+        return (std::find(devices.begin(), devices.end(), device) - devices.begin());
+      }
+
+      // Map device index to the number of bytes cached by it
+      using DeviceCachedBytes = std::map<int, TotalBytes>;
+    }  // namespace allocator
+  }    // namespace alpakatools
+}  // namespace cms
+
+#endif
diff --git a/src/alpaka/AlpakaCore/device_unique_ptr.h b/src/alpaka/AlpakaCore/device_unique_ptr.h
new file mode 100644
index 000000000..6c7501db9
--- /dev/null
+++ b/src/alpaka/AlpakaCore/device_unique_ptr.h
@@ -0,0 +1,72 @@
+#ifndef HeterogeneousCore_AlpakaUtilities_interface_device_unique_ptr_h
+#define HeterogeneousCore_AlpakaUtilities_interface_device_unique_ptr_h
+
+#include <memory>
+#include <type_traits>
+
+#include "AlpakaCore/host_unique_ptr.h"
+
+namespace cms {
+  namespace alpakatools {
+    namespace device {
+      namespace impl {
+        template <typename TData>
+        class DeviceDeleter {
+        public:
+          DeviceDeleter(::ALPAKA_ACCELERATOR_NAMESPACE::AlpakaDeviceBuf<TData> buffer) : buf{std::move(buffer)} {}
+
+          void operator()(void* d_ptr) {
+            if constexpr (allocator::policy == allocator::Policy::Caching) {
+              if (d_ptr) {
+                allocator::getCachingDeviceAllocator().DeviceFree(buf);
+              }
+            }
+          }
+
+        private:
+          ::ALPAKA_ACCELERATOR_NAMESPACE::AlpakaDeviceBuf<TData> buf;
+        };
+      }  // namespace impl
+      template <typename TData>
+      using unique_ptr =
+#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
+          std::unique_ptr<TData,
+                          impl::DeviceDeleter<
+                              std::conditional_t<allocator::policy == allocator::Policy::Caching, std::byte, TData>>>;
+#else
+          host::unique_ptr<TData>;
+#endif
+    }  // namespace device
+
+    template <typename TData>
+    auto make_device_unique(const alpaka_common::Extent& extent) {
+      const auto& device = ::ALPAKA_ACCELERATOR_NAMESPACE::devices[0];
+#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
+      if constexpr (allocator::policy == allocator::Policy::Caching) {
+        const alpaka_common::Extent nbytes = alpakatools::nbytesFromExtent<TData>(extent);
+        if (nbytes > maxAllocationSize) {
+          throw std::runtime_error("Tried to allocate " + std::to_string(nbytes) +
+                                   " bytes, but the allocator maximum is " + std::to_string(maxAllocationSize));
+        }
+        auto buf = allocator::getCachingDeviceAllocator().DeviceAllocate(nbytes, device);
+        void* d_ptr = alpaka::getPtrNative(buf);
+        return typename device::unique_ptr<TData>{reinterpret_cast<TData*>(d_ptr),
+                                                  device::impl::DeviceDeleter<std::byte>{buf}};
+      } else {
+        auto buf = alpaka::allocBuf<TData, alpaka_common::Idx>(device, extent);
+#if CUDA_VERSION >= 11020
+        if constexpr (allocator::policy == allocator::Policy::Asynchronous) {
+          alpaka::prepareForAsyncCopy(buf);
+        }
+#endif
+        TData* d_ptr = alpaka::getPtrNative(buf);
+        return typename device::unique_ptr<TData>{d_ptr, device::impl::DeviceDeleter<TData>{buf}};
+      }
+#else
+      return make_host_unique<TData>(extent);
+#endif
+    }
+  }  // namespace alpakatools
+}  // namespace cms
+
+#endif
diff --git a/src/alpaka/AlpakaCore/getCachingDeviceAllocator.h b/src/alpaka/AlpakaCore/getCachingDeviceAllocator.h
new file mode 100644
index 000000000..f19acc269
--- /dev/null
+++ b/src/alpaka/AlpakaCore/getCachingDeviceAllocator.h
@@ -0,0 +1,72 @@
+#ifndef HeterogeneousCore_AlpakaCore_src_getCachingDeviceAllocator
+#define HeterogeneousCore_AlpakaCore_src_getCachingDeviceAllocator
+
+#include <iomanip>
+#include <iostream>
+
+#include "AlpakaCore/CachingDeviceAllocator.h"
+
+namespace cms::alpakatools::allocator {
+  // Use caching or not
+  enum class Policy { Synchronous = 0, Asynchronous = 1, Caching = 2 };
+#ifndef ALPAKA_DISABLE_CACHING_ALLOCATOR
+  constexpr Policy policy = Policy::Caching;
+#elif CUDA_VERSION >= 11020 && !defined ALPAKA_DISABLE_ASYNC_ALLOCATOR
+  constexpr Policy policy = Policy::Asynchronous;
+#else
+  constexpr Policy policy = Policy::Synchronous;
+#endif
+  // Growth factor (bin_growth in CachingDeviceAllocator
+  constexpr unsigned int binGrowth = 2;
+  // Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in CacingDeviceAllocator
+  constexpr unsigned int minBin = 8;
+  // Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in CachingDeviceAllocator). Note that unlike in allocator, allocations larger than binGrowth^maxBin are set to fail.
+  constexpr unsigned int maxBin = 30;
+  // Total storage for the allocator. 0 means no limit.
+  constexpr size_t maxCachedBytes = 0;
+  // Fraction of total device memory taken for the allocator. In case there are multiple devices with different amounts of memory, the smallest of them is taken. If maxCachedBytes is non-zero, the smallest of them is taken.
+  constexpr double maxCachedFraction = 0.8;
+  constexpr bool debug = false;
+
+  inline size_t minCachedBytes() {
+    size_t ret = std::numeric_limits<size_t>::max();
+    const auto devices{alpaka::getDevs<::ALPAKA_ACCELERATOR_NAMESPACE::Platform>()};
+    for (const auto& device : devices) {
+      const size_t freeMemory{alpaka::getFreeMemBytes(device)};
+      ret = std::min(ret, static_cast<size_t>(maxCachedFraction * freeMemory));
+    }
+    if (maxCachedBytes > 0) {
+      ret = std::min(ret, maxCachedBytes);
+    }
+    return ret;
+  }
+
+  inline CachingDeviceAllocator& getCachingDeviceAllocator() {
+    if (debug) {
+      std::cout << "CachingDeviceAllocator settings\n"
+                << "  bin growth " << binGrowth << "\n"
+                << "  min bin    " << minBin << "\n"
+                << "  max bin    " << maxBin << "\n"
+                << "  resulting bins:\n";
+      for (auto bin = minBin; bin <= maxBin; ++bin) {
+        auto binSize = CachingDeviceAllocator::IntPow(binGrowth, bin);
+        if (binSize >= (1 << 30) and binSize % (1 << 30) == 0) {
+          std::cout << "    " << std::setw(8) << (binSize >> 30) << " GB\n";
+        } else if (binSize >= (1 << 20) and binSize % (1 << 20) == 0) {
+          std::cout << "    " << std::setw(8) << (binSize >> 20) << " MB\n";
+        } else if (binSize >= (1 << 10) and binSize % (1 << 10) == 0) {
+          std::cout << "    " << std::setw(8) << (binSize >> 10) << " kB\n";
+        } else {
+          std::cout << "    " << std::setw(9) << binSize << " B\n";
+        }
+      }
+      std::cout << "  maximum amount of cached memory: " << (minCachedBytes() >> 20) << " MB\n";
+    }
+
+    // the public interface is thread safe
+    static CachingDeviceAllocator allocator{binGrowth, minBin, maxBin, minCachedBytes(), debug};
+    return allocator;
+  }
+}  // namespace cms::alpakatools::allocator
+
+#endif
diff --git a/src/alpaka/AlpakaCore/getCachingHostAllocator.h b/src/alpaka/AlpakaCore/getCachingHostAllocator.h
new file mode 100644
index 000000000..db075b4d6
--- /dev/null
+++ b/src/alpaka/AlpakaCore/getCachingHostAllocator.h
@@ -0,0 +1,39 @@
+#ifndef HeterogeneousCore_AlpakaCore_src_getCachingHostAllocator
+#define HeterogeneousCore_AlpakaCore_src_getCachingHostAllocator
+
+#include <iomanip>
+#include <iostream>
+
+#include "CachingHostAllocator.h"
+#include "getCachingDeviceAllocator.h"
+
+namespace cms::alpakatools::allocator {
+  inline CachingHostAllocator& getCachingHostAllocator() {
+    if (debug) {
+      std::cout << "CachingHostAllocator settings\n"
+                << "  bin growth " << binGrowth << "\n"
+                << "  min bin    " << minBin << "\n"
+                << "  max bin    " << maxBin << "\n"
+                << "  resulting bins:\n";
+      for (auto bin = minBin; bin <= maxBin; ++bin) {
+        auto binSize = CachingDeviceAllocator::IntPow(binGrowth, bin);
+        if (binSize >= (1 << 30) and binSize % (1 << 30) == 0) {
+          std::cout << "    " << std::setw(8) << (binSize >> 30) << " GB\n";
+        } else if (binSize >= (1 << 20) and binSize % (1 << 20) == 0) {
+          std::cout << "    " << std::setw(8) << (binSize >> 20) << " MB\n";
+        } else if (binSize >= (1 << 10) and binSize % (1 << 10) == 0) {
+          std::cout << "    " << std::setw(8) << (binSize >> 10) << " kB\n";
+        } else {
+          std::cout << "    " << std::setw(9) << binSize << " B\n";
+        }
+      }
+      std::cout << "  maximum amount of cached memory: " << (minCachedBytes() >> 20) << " MB\n";
+    }
+
+    // the public interface is thread safe
+    static CachingHostAllocator allocator{binGrowth, minBin, maxBin, minCachedBytes(), debug};
+    return allocator;
+  }
+}  // namespace cms::alpakatools::allocator
+
+#endif
diff --git a/src/alpaka/AlpakaCore/host_unique_ptr.h b/src/alpaka/AlpakaCore/host_unique_ptr.h
new file mode 100644
index 000000000..ec7030dfc
--- /dev/null
+++ b/src/alpaka/AlpakaCore/host_unique_ptr.h
@@ -0,0 +1,67 @@
+#ifndef HeterogeneousCore_AlpakaUtilities_interface_host_unique_ptr_h
+#define HeterogeneousCore_AlpakaUtilities_interface_host_unique_ptr_h
+
+#include <memory>
+#include <type_traits>
+
+#include "AlpakaCore/getCachingHostAllocator.h"
+
+namespace cms {
+  namespace alpakatools {
+    namespace host {
+      namespace impl {
+        template <typename TData>
+        class HostDeleter {
+        public:
+          HostDeleter(alpaka_common::AlpakaHostBuf<TData> buffer) : buf{std::move(buffer)} {}
+
+          void operator()(void* d_ptr) {
+            if constexpr (allocator::policy == allocator::Policy::Caching) {
+              if (d_ptr) {
+                allocator::getCachingHostAllocator().HostFree(buf);
+              }
+            }
+          }
+
+        private:
+          alpaka_common::AlpakaHostBuf<TData> buf;
+        };
+      }  // namespace impl
+
+      template <typename TData>
+      using unique_ptr = std::unique_ptr<
+          TData,
+          impl::HostDeleter<std::conditional_t<allocator::policy == allocator::Policy::Caching, std::byte, TData>>>;
+    }  // namespace host
+
+    inline constexpr size_t maxAllocationSize =
+        allocator::CachingDeviceAllocator::IntPow(allocator::binGrowth, allocator::maxBin);
+
+    // Allocate pinned host memory
+    template <typename TData>
+    typename host::unique_ptr<TData> make_host_unique(const alpaka_common::Extent& extent) {
+      if constexpr (allocator::policy == allocator::Policy::Caching) {
+        const alpaka_common::Extent nbytes = alpakatools::nbytesFromExtent<TData>(extent);
+        if (nbytes > maxAllocationSize) {
+          throw std::runtime_error("Tried to allocate " + std::to_string(nbytes) +
+                                   " bytes, but the allocator maximum is " + std::to_string(maxAllocationSize));
+        }
+        auto buf = allocator::getCachingHostAllocator().HostAllocate(nbytes);
+        void* d_ptr = alpaka::getPtrNative(buf);
+        return
+            typename host::unique_ptr<TData>{reinterpret_cast<TData*>(d_ptr), host::impl::HostDeleter<std::byte>{buf}};
+      } else {
+        auto buf = allocHostBuf<TData>(extent);
+#if CUDA_VERSION >= 11020
+        if constexpr (allocator::policy == allocator::Policy::Asynchronous) {
+          alpaka::prepareForAsyncCopy(buf);
+        }
+#endif
+        TData* d_ptr = alpaka::getPtrNative(buf);
+        return typename host::unique_ptr<TData>{d_ptr, host::impl::HostDeleter<TData>{buf}};
+      }
+    }
+  }  // namespace alpakatools
+}  // namespace cms
+
+#endif
diff --git a/src/alpaka/AlpakaDataFormats/BeamSpotAlpaka.h b/src/alpaka/AlpakaDataFormats/BeamSpotAlpaka.h
index 3012f4399..c8960b003 100644
--- a/src/alpaka/AlpakaDataFormats/BeamSpotAlpaka.h
+++ b/src/alpaka/AlpakaDataFormats/BeamSpotAlpaka.h
@@ -1,7 +1,7 @@
 #ifndef AlpakaDataFormats_BeamSpot_interface_BeamSpotAlpaka_h
 #define AlpakaDataFormats_BeamSpot_interface_BeamSpotAlpaka_h
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 #include "DataFormats/BeamSpotPOD.h"
 
 #include <cstring>
@@ -13,19 +13,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     BeamSpotAlpaka() = default;
 
     BeamSpotAlpaka(BeamSpotPOD const* data, Queue& queue)
-        : data_d{cms::alpakatools::allocDeviceBuf<BeamSpotPOD>(alpaka::getDev(queue), 1u)} {
+        : data_d{::cms::alpakatools::make_device_unique<BeamSpotPOD>(1u)} {
       auto data_h{cms::alpakatools::createHostView<const BeamSpotPOD>(data, 1u)};
+      auto data_d_view{cms::alpakatools::createDeviceView<BeamSpotPOD>(alpaka::getDev(queue), data_d.get(), 1u)};
 
-      alpaka::memcpy(queue, data_d, data_h, 1u);
-      // alpaka::wait(queue);
+      alpaka::memcpy(queue, data_d_view, data_h, 1u);
     }
 
-    //TODO ANTONIO
-
-    const BeamSpotPOD* data() const { return alpaka::getPtrNative(data_d); }
+    const BeamSpotPOD* data() const { return data_d.get(); }
 
   private:
-    AlpakaDeviceBuf<BeamSpotPOD> data_d;
+    ::cms::alpakatools::device::unique_ptr<BeamSpotPOD> data_d;
   };
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/AlpakaDataFormats/PixelTrackAlpaka.h b/src/alpaka/AlpakaDataFormats/PixelTrackAlpaka.h
index c6e0f36f0..64ff4e05c 100644
--- a/src/alpaka/AlpakaDataFormats/PixelTrackAlpaka.h
+++ b/src/alpaka/AlpakaDataFormats/PixelTrackAlpaka.h
@@ -1,10 +1,12 @@
-#ifndef CUDADataFormatsTrackTrackHeterogeneous_H
-#define CUDADataFormatsTrackTrackHeterogeneous_H
+#ifndef AlpakaDataFormatsTrackTrackHeterogeneous_H
+#define AlpakaDataFormatsTrackTrackHeterogeneous_H
 
 #include "AlpakaCore/HistoContainer.h"
 #include "AlpakaCore/alpakaCommon.h"
 #include "AlpakaDataFormats/TrajectoryStateSoA.h"
 
+#include "AlpakaCore/device_unique_ptr.h"
+
 namespace trackQuality {
   enum Quality : uint8_t { bad = 0, dup, loose, strict, tight, highPurity };
 }
@@ -69,8 +71,8 @@ namespace pixelTrack {
 }  // namespace pixelTrack
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
-  using PixelTrackAlpaka = AlpakaDeviceBuf<pixelTrack::TrackSoA>;
-  using PixelTrackHost = AlpakaHostBuf<pixelTrack::TrackSoA>;
+  using PixelTrackAlpaka = cms::alpakatools::device::unique_ptr<pixelTrack::TrackSoA>;
+  using PixelTrackHost = cms::alpakatools::host::unique_ptr<pixelTrack::TrackSoA>;
 
   // NB: ANOTHER OPTION IS TO CREATE A HeterogeneousSoA class,
   // with a AlpakaDeviceBuf<pixelTrack::TrackSoA> as a data member
diff --git a/src/alpaka/AlpakaDataFormats/SiPixelClustersAlpaka.h b/src/alpaka/AlpakaDataFormats/SiPixelClustersAlpaka.h
index 9c13818ca..97c14a3e9 100644
--- a/src/alpaka/AlpakaDataFormats/SiPixelClustersAlpaka.h
+++ b/src/alpaka/AlpakaDataFormats/SiPixelClustersAlpaka.h
@@ -1,18 +1,18 @@
-#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
-#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
+#ifndef AlpakaDataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
+#define AlpakaDataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
   class SiPixelClustersAlpaka {
   public:
     SiPixelClustersAlpaka() = default;
-    explicit SiPixelClustersAlpaka(Device const &device, size_t maxClusters)
-        : moduleStart_d{cms::alpakatools::allocDeviceBuf<uint32_t>(device, maxClusters + 1)},
-          clusInModule_d{cms::alpakatools::allocDeviceBuf<uint32_t>(device, maxClusters)},
-          moduleId_d{cms::alpakatools::allocDeviceBuf<uint32_t>(device, maxClusters)},
-          clusModuleStart_d{cms::alpakatools::allocDeviceBuf<uint32_t>(device, maxClusters + 1)} {}
+    explicit SiPixelClustersAlpaka(size_t maxClusters)
+        : moduleStart_d{cms::alpakatools::make_device_unique<uint32_t>(maxClusters + 1)},
+          clusInModule_d{cms::alpakatools::make_device_unique<uint32_t>(maxClusters)},
+          moduleId_d{cms::alpakatools::make_device_unique<uint32_t>(maxClusters)},
+          clusModuleStart_d{cms::alpakatools::make_device_unique<uint32_t>(maxClusters + 1)} {}
     ~SiPixelClustersAlpaka() = default;
 
     SiPixelClustersAlpaka(const SiPixelClustersAlpaka &) = delete;
@@ -24,20 +24,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     uint32_t nClusters() const { return nClusters_h; }
 
-    uint32_t *moduleStart() { return alpaka::getPtrNative(moduleStart_d); }
-    uint32_t *clusInModule() { return alpaka::getPtrNative(clusInModule_d); }
-    uint32_t *moduleId() { return alpaka::getPtrNative(moduleId_d); }
-    uint32_t *clusModuleStart() { return alpaka::getPtrNative(clusModuleStart_d); }
+    uint32_t *moduleStart() { return moduleStart_d.get(); }
+    uint32_t *clusInModule() { return clusInModule_d.get(); }
+    uint32_t *moduleId() { return moduleId_d.get(); }
+    uint32_t *clusModuleStart() { return clusModuleStart_d.get(); }
 
-    uint32_t const *moduleStart() const { return alpaka::getPtrNative(moduleStart_d); }
-    uint32_t const *clusInModule() const { return alpaka::getPtrNative(clusInModule_d); }
-    uint32_t const *moduleId() const { return alpaka::getPtrNative(moduleId_d); }
-    uint32_t const *clusModuleStart() const { return alpaka::getPtrNative(clusModuleStart_d); }
+    uint32_t const *moduleStart() const { return moduleStart_d.get(); }
+    uint32_t const *clusInModule() const { return clusInModule_d.get(); }
+    uint32_t const *moduleId() const { return moduleId_d.get(); }
+    uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); }
 
-    uint32_t const *c_moduleStart() const { return alpaka::getPtrNative(moduleStart_d); }
-    uint32_t const *c_clusInModule() const { return alpaka::getPtrNative(clusInModule_d); }
-    uint32_t const *c_moduleId() const { return alpaka::getPtrNative(moduleId_d); }
-    uint32_t const *c_clusModuleStart() const { return alpaka::getPtrNative(clusModuleStart_d); }
+    uint32_t const *c_moduleStart() const { return moduleStart_d.get(); }
+    uint32_t const *c_clusInModule() const { return clusInModule_d.get(); }
+    uint32_t const *c_moduleId() const { return moduleId_d.get(); }
+    uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); }
 
     class DeviceConstView {
     public:
@@ -61,12 +61,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     }
 
   private:
-    AlpakaDeviceBuf<uint32_t> moduleStart_d;   // index of the first pixel of each module
-    AlpakaDeviceBuf<uint32_t> clusInModule_d;  // number of clusters found in each module
-    AlpakaDeviceBuf<uint32_t> moduleId_d;      // module id of each module
+    cms::alpakatools::device::unique_ptr<uint32_t> moduleStart_d;   // index of the first pixel of each module
+    cms::alpakatools::device::unique_ptr<uint32_t> clusInModule_d;  // number of clusters found in each module
+    cms::alpakatools::device::unique_ptr<uint32_t> moduleId_d;      // module id of each module
 
     // originally from rechits
-    AlpakaDeviceBuf<uint32_t> clusModuleStart_d;  // index of the first cluster of each module
+    cms::alpakatools::device::unique_ptr<uint32_t> clusModuleStart_d;  // index of the first cluster of each module
 
     uint32_t nClusters_h = 0;
   };
diff --git a/src/alpaka/AlpakaDataFormats/SiPixelDigiErrorsAlpaka.h b/src/alpaka/AlpakaDataFormats/SiPixelDigiErrorsAlpaka.h
index bba2cd2a5..fa1b4da31 100644
--- a/src/alpaka/AlpakaDataFormats/SiPixelDigiErrorsAlpaka.h
+++ b/src/alpaka/AlpakaDataFormats/SiPixelDigiErrorsAlpaka.h
@@ -1,27 +1,33 @@
-#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
-#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
+#ifndef AlpakaDataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
+#define AlpakaDataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
 
 #include "DataFormats/PixelErrors.h"
 #include "AlpakaCore/SimpleVector.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
+#include "AlpakaCore/host_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
   class SiPixelDigiErrorsAlpaka {
   public:
     SiPixelDigiErrorsAlpaka() = default;
-    explicit SiPixelDigiErrorsAlpaka(Device const& device, size_t maxFedWords, PixelFormatterErrors errors, Queue& queue)
-        : data_d{cms::alpakatools::allocDeviceBuf<PixelErrorCompact>(device, maxFedWords)},
-          error_d{cms::alpakatools::allocDeviceBuf<::cms::alpakatools::SimpleVector<PixelErrorCompact>>(device, 1u)},
-          error_h{::cms::alpakatools::allocHostBuf<::cms::alpakatools::SimpleVector<PixelErrorCompact>>(1u)},
+    explicit SiPixelDigiErrorsAlpaka(size_t maxFedWords, PixelFormatterErrors errors, Queue& queue)
+        : data_d{cms::alpakatools::make_device_unique<PixelErrorCompact>(maxFedWords)},
+          error_d{cms::alpakatools::make_device_unique<cms::alpakatools::SimpleVector<PixelErrorCompact>>(1u)},
+          error_h{cms::alpakatools::make_host_unique<cms::alpakatools::SimpleVector<PixelErrorCompact>>(1u)},
           formatterErrors_h{std::move(errors)} {
-      auto perror_h = alpaka::getPtrNative(error_h);
-      perror_h->construct(maxFedWords, alpaka::getPtrNative(data_d));
+      auto perror_h = error_h.get();
+      perror_h->construct(maxFedWords, data_d.get());
       ALPAKA_ASSERT_OFFLOAD(perror_h->empty());
       ALPAKA_ASSERT_OFFLOAD(perror_h->capacity() == static_cast<int>(maxFedWords));
 
-      alpaka::memcpy(queue, error_d, error_h, 1u);
+      // TO DO: nothing really async in here for now... Pass the queue in constructor argument instead, and don't wait anymore!
+      auto error_h_view =
+          cms::alpakatools::createHostView<cms::alpakatools::SimpleVector<PixelErrorCompact>>(error_h.get(), 1u);
+      auto error_d_view = cms::alpakatools::createDeviceView<cms::alpakatools::SimpleVector<PixelErrorCompact>>(
+          alpaka::getDev(queue), error_d.get(), 1u);
+      alpaka::memcpy(queue, error_d_view, error_h_view, 1u);
     }
     ~SiPixelDigiErrorsAlpaka() = default;
 
@@ -32,9 +38,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; }
 
-    ::cms::alpakatools::SimpleVector<PixelErrorCompact>* error() { return alpaka::getPtrNative(error_d); }
-    ::cms::alpakatools::SimpleVector<PixelErrorCompact> const* error() const { return alpaka::getPtrNative(error_d); }
-    ::cms::alpakatools::SimpleVector<PixelErrorCompact> const* c_error() const { return alpaka::getPtrNative(error_d); }
+    cms::alpakatools::SimpleVector<PixelErrorCompact>* error() { return error_d.get(); }
+    cms::alpakatools::SimpleVector<PixelErrorCompact> const* error() const { return error_d.get(); }
+    cms::alpakatools::SimpleVector<PixelErrorCompact> const* c_error() const { return error_d.get(); }
 
 #ifdef TODO
     using HostDataError =
@@ -45,9 +51,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 #endif
 
   private:
-    AlpakaDeviceBuf<PixelErrorCompact> data_d;
-    AlpakaDeviceBuf<::cms::alpakatools::SimpleVector<PixelErrorCompact>> error_d;
-    AlpakaHostBuf<::cms::alpakatools::SimpleVector<PixelErrorCompact>> error_h;
+    cms::alpakatools::device::unique_ptr<PixelErrorCompact> data_d;
+    cms::alpakatools::device::unique_ptr<cms::alpakatools::SimpleVector<PixelErrorCompact>> error_d;
+    cms::alpakatools::host::unique_ptr<cms::alpakatools::SimpleVector<PixelErrorCompact>> error_h;
     PixelFormatterErrors formatterErrors_h;
   };
 
diff --git a/src/alpaka/AlpakaDataFormats/SiPixelDigisAlpaka.h b/src/alpaka/AlpakaDataFormats/SiPixelDigisAlpaka.h
index bc8c2192c..8450885f0 100644
--- a/src/alpaka/AlpakaDataFormats/SiPixelDigisAlpaka.h
+++ b/src/alpaka/AlpakaDataFormats/SiPixelDigisAlpaka.h
@@ -1,21 +1,23 @@
-#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
-#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
+#ifndef AlpakaDataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
+#define AlpakaDataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
+#include "AlpakaCore/host_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
   class SiPixelDigisAlpaka {
   public:
     SiPixelDigisAlpaka() = default;
-    explicit SiPixelDigisAlpaka(Device const &device, size_t maxFedWords)
-        : xx_d{cms::alpakatools::allocDeviceBuf<uint16_t>(device, maxFedWords)},
-          yy_d{cms::alpakatools::allocDeviceBuf<uint16_t>(device, maxFedWords)},
-          adc_d{cms::alpakatools::allocDeviceBuf<uint16_t>(device, maxFedWords)},
-          moduleInd_d{cms::alpakatools::allocDeviceBuf<uint16_t>(device, maxFedWords)},
-          clus_d{cms::alpakatools::allocDeviceBuf<int32_t>(device, maxFedWords)},
-          pdigi_d{cms::alpakatools::allocDeviceBuf<uint32_t>(device, maxFedWords)},
-          rawIdArr_d{cms::alpakatools::allocDeviceBuf<uint32_t>(device, maxFedWords)} {}
+    explicit SiPixelDigisAlpaka(size_t maxFedWords)
+        : maxFedWords_{maxFedWords},
+          xx_d{cms::alpakatools::make_device_unique<uint16_t>(maxFedWords)},
+          yy_d{cms::alpakatools::make_device_unique<uint16_t>(maxFedWords)},
+          adc_d{cms::alpakatools::make_device_unique<uint16_t>(maxFedWords)},
+          moduleInd_d{cms::alpakatools::make_device_unique<uint16_t>(maxFedWords)},
+          clus_d{cms::alpakatools::make_device_unique<int32_t>(maxFedWords)},
+          pdigi_d{cms::alpakatools::make_device_unique<uint32_t>(maxFedWords)},
+          rawIdArr_d{cms::alpakatools::make_device_unique<uint32_t>(maxFedWords)} {}
     ~SiPixelDigisAlpaka() = default;
 
     SiPixelDigisAlpaka(const SiPixelDigisAlpaka &) = delete;
@@ -31,34 +33,36 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     uint32_t nModules() const { return nModules_h; }
     uint32_t nDigis() const { return nDigis_h; }
 
-    uint16_t *xx() { return alpaka::getPtrNative(xx_d); }
-    uint16_t *yy() { return alpaka::getPtrNative(yy_d); }
-    uint16_t *adc() { return alpaka::getPtrNative(adc_d); }
-    uint16_t *moduleInd() { return alpaka::getPtrNative(moduleInd_d); }
-    int32_t *clus() { return alpaka::getPtrNative(clus_d); }
-    uint32_t *pdigi() { return alpaka::getPtrNative(pdigi_d); }
-    uint32_t *rawIdArr() { return alpaka::getPtrNative(rawIdArr_d); }
-
-    uint16_t const *xx() const { return alpaka::getPtrNative(xx_d); }
-    uint16_t const *yy() const { return alpaka::getPtrNative(yy_d); }
-    uint16_t const *adc() const { return alpaka::getPtrNative(adc_d); }
-    uint16_t const *moduleInd() const { return alpaka::getPtrNative(moduleInd_d); }
-    int32_t const *clus() const { return alpaka::getPtrNative(clus_d); }
-    uint32_t const *pdigi() const { return alpaka::getPtrNative(pdigi_d); }
-    uint32_t const *rawIdArr() const { return alpaka::getPtrNative(rawIdArr_d); }
-
-    uint16_t const *c_xx() const { return alpaka::getPtrNative(xx_d); }
-    uint16_t const *c_yy() const { return alpaka::getPtrNative(yy_d); }
-    uint16_t const *c_adc() const { return alpaka::getPtrNative(adc_d); }
-    uint16_t const *c_moduleInd() const { return alpaka::getPtrNative(moduleInd_d); }
-    int32_t const *c_clus() const { return alpaka::getPtrNative(clus_d); }
-    uint32_t const *c_pdigi() const { return alpaka::getPtrNative(pdigi_d); }
-    uint32_t const *c_rawIdArr() const { return alpaka::getPtrNative(rawIdArr_d); }
+    uint16_t *xx() { return xx_d.get(); }
+    uint16_t *yy() { return yy_d.get(); }
+    uint16_t *adc() { return adc_d.get(); }
+    uint16_t *moduleInd() { return moduleInd_d.get(); }
+    int32_t *clus() { return clus_d.get(); }
+    uint32_t *pdigi() { return pdigi_d.get(); }
+    uint32_t *rawIdArr() { return rawIdArr_d.get(); }
+
+    uint16_t const *xx() const { return xx_d.get(); }
+    uint16_t const *yy() const { return yy_d.get(); }
+    uint16_t const *adc() const { return adc_d.get(); }
+    uint16_t const *moduleInd() const { return moduleInd_d.get(); }
+    int32_t const *clus() const { return clus_d.get(); }
+    uint32_t const *pdigi() const { return pdigi_d.get(); }
+    uint32_t const *rawIdArr() const { return rawIdArr_d.get(); }
+
+    uint16_t const *c_xx() const { return xx_d.get(); }
+    uint16_t const *c_yy() const { return yy_d.get(); }
+    uint16_t const *c_adc() const { return adc_d.get(); }
+    uint16_t const *c_moduleInd() const { return moduleInd_d.get(); }
+    int32_t const *c_clus() const { return clus_d.get(); }
+    uint32_t const *c_pdigi() const { return pdigi_d.get(); }
+    uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); }
 
     // TO DO: nothing async in here for now... Pass the queue as argument instead, and don't wait anymore!
     auto adcToHostAsync(Queue &queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<uint16_t>(nDigis());
-      alpaka::memcpy(queue, ret, adc_d, nDigis());
+      auto ret = cms::alpakatools::make_host_unique<uint16_t>(nDigis());
+      auto ret_view = cms::alpakatools::createHostView<uint16_t>(ret.get(), nDigis());
+      auto adc_d_view = cms::alpakatools::createDeviceView<uint16_t>(alpaka::getDev(queue), adc_d.get(), maxFedWords_);
+      alpaka::memcpy(queue, ret_view, adc_d_view, nDigis());
       return ret;
     }
 
@@ -90,17 +94,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     const DeviceConstView view() const { return DeviceConstView{c_xx(), c_yy(), c_adc(), c_moduleInd(), c_clus()}; }
 
   private:
+    size_t maxFedWords_;
     // These are consumed by downstream device code
-    AlpakaDeviceBuf<uint16_t> xx_d;         // local coordinates of each pixel
-    AlpakaDeviceBuf<uint16_t> yy_d;         //
-    AlpakaDeviceBuf<uint16_t> adc_d;        // ADC of each pixel
-    AlpakaDeviceBuf<uint16_t> moduleInd_d;  // module id of each pixel
-    AlpakaDeviceBuf<int32_t> clus_d;        // cluster id of each pixel
+    cms::alpakatools::device::unique_ptr<uint16_t> xx_d;         // local coordinates of each pixel
+    cms::alpakatools::device::unique_ptr<uint16_t> yy_d;         //
+    cms::alpakatools::device::unique_ptr<uint16_t> adc_d;        // ADC of each pixel
+    cms::alpakatools::device::unique_ptr<uint16_t> moduleInd_d;  // module id of each pixel
+    cms::alpakatools::device::unique_ptr<int32_t> clus_d;        // cluster id of each pixel
 
     // These are for CPU output; should we (eventually) place them to a
     // separate product?
-    AlpakaDeviceBuf<uint32_t> pdigi_d;
-    AlpakaDeviceBuf<uint32_t> rawIdArr_d;
+    cms::alpakatools::device::unique_ptr<uint32_t> pdigi_d;
+    cms::alpakatools::device::unique_ptr<uint32_t> rawIdArr_d;
 
     uint32_t nModules_h = 0;
     uint32_t nDigis_h = 0;
diff --git a/src/alpaka/AlpakaDataFormats/TrackingRecHit2DAlpaka.h b/src/alpaka/AlpakaDataFormats/TrackingRecHit2DAlpaka.h
index c9b372c96..d5d2f89f4 100644
--- a/src/alpaka/AlpakaDataFormats/TrackingRecHit2DAlpaka.h
+++ b/src/alpaka/AlpakaDataFormats/TrackingRecHit2DAlpaka.h
@@ -1,8 +1,9 @@
-#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
-#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
+#ifndef AlpakaDataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
+#define AlpakaDataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
 
 #include "AlpakaDataFormats/TrackingRecHit2DSOAView.h"
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
+#include "AlpakaCore/host_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
@@ -20,25 +21,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           // NON-OWNING DEVICE POINTERS:
           m_hitsModuleStart(hitsModuleStart),
           // OWNING DEVICE POINTERS:
-          m_xl{cms::alpakatools::allocDeviceBuf<float>(alpaka::getDev(queue), nHits)},
-          m_yl{cms::alpakatools::allocDeviceBuf<float>(alpaka::getDev(queue), nHits)},
-          m_xerr{cms::alpakatools::allocDeviceBuf<float>(alpaka::getDev(queue), nHits)},
-          m_yerr{cms::alpakatools::allocDeviceBuf<float>(alpaka::getDev(queue), nHits)},
-          m_xg{cms::alpakatools::allocDeviceBuf<float>(alpaka::getDev(queue), nHits)},
-          m_yg{cms::alpakatools::allocDeviceBuf<float>(alpaka::getDev(queue), nHits)},
-          m_zg{cms::alpakatools::allocDeviceBuf<float>(alpaka::getDev(queue), nHits)},
-          m_rg{cms::alpakatools::allocDeviceBuf<float>(alpaka::getDev(queue), nHits)},
-          m_iphi{cms::alpakatools::allocDeviceBuf<int16_t>(alpaka::getDev(queue), nHits)},
-          m_charge{cms::alpakatools::allocDeviceBuf<int32_t>(alpaka::getDev(queue), nHits)},
-          m_xsize{cms::alpakatools::allocDeviceBuf<int16_t>(alpaka::getDev(queue), nHits)},
-          m_ysize{cms::alpakatools::allocDeviceBuf<int16_t>(alpaka::getDev(queue), nHits)},
-          m_detInd{cms::alpakatools::allocDeviceBuf<uint16_t>(alpaka::getDev(queue), nHits)},
-          m_averageGeometry{
-              cms::alpakatools::allocDeviceBuf<TrackingRecHit2DSOAView::AverageGeometry>(alpaka::getDev(queue), 1u)},
-          m_hitsLayerStart{cms::alpakatools::allocDeviceBuf<uint32_t>(alpaka::getDev(queue), nHits)},
-          m_hist{cms::alpakatools::allocDeviceBuf<Hist>(alpaka::getDev(queue), 1u)},
-          // SoA view:
-          m_view{cms::alpakatools::allocDeviceBuf<TrackingRecHit2DSOAView>(alpaka::getDev(queue), 1u)},
+          m_xl{cms::alpakatools::make_device_unique<float>(nHits)},
+          m_yl{cms::alpakatools::make_device_unique<float>(nHits)},
+          m_xerr{cms::alpakatools::make_device_unique<float>(nHits)},
+          m_yerr{cms::alpakatools::make_device_unique<float>(nHits)},
+          m_xg{cms::alpakatools::make_device_unique<float>(nHits)},
+          m_yg{cms::alpakatools::make_device_unique<float>(nHits)},
+          m_zg{cms::alpakatools::make_device_unique<float>(nHits)},
+          m_rg{cms::alpakatools::make_device_unique<float>(nHits)},
+          m_iphi{cms::alpakatools::make_device_unique<int16_t>(nHits)},
+          m_charge{cms::alpakatools::make_device_unique<int32_t>(nHits)},
+          m_xsize{cms::alpakatools::make_device_unique<int16_t>(nHits)},
+          m_ysize{cms::alpakatools::make_device_unique<int16_t>(nHits)},
+          m_detInd{cms::alpakatools::make_device_unique<uint16_t>(nHits)},
+          m_averageGeometry{cms::alpakatools::make_device_unique<TrackingRecHit2DSOAView::AverageGeometry>(1u)},
+          m_hitsLayerStart{cms::alpakatools::make_device_unique<uint32_t>(nHits)},
+          m_hist{cms::alpakatools::make_device_unique<Hist>(1u)},
+          // SOA view:
+          m_view{cms::alpakatools::make_device_unique<TrackingRecHit2DSOAView>(1u)},
           m_view_h{::cms::alpakatools::allocHostBuf<TrackingRecHit2DSOAView>(1u)} {
       // the hits are actually accessed in order only in building
       // if ordering is relevant they may have to be stored phi-ordered by layer or so
@@ -54,24 +54,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       // Raw pointer to data already owned in the eventSetup by PixelCPEFast object:
       view.m_cpeParams = cpeParams;
       // Raw pointers to data owned here in TrackingRecHit2DAlpaka object:
-      view.m_xl = alpaka::getPtrNative(m_xl);
-      view.m_yl = alpaka::getPtrNative(m_yl);
-      view.m_xerr = alpaka::getPtrNative(m_xerr);
-      view.m_yerr = alpaka::getPtrNative(m_yerr);
-      view.m_xg = alpaka::getPtrNative(m_xg);
-      view.m_yg = alpaka::getPtrNative(m_yg);
-      view.m_zg = alpaka::getPtrNative(m_zg);
-      view.m_rg = alpaka::getPtrNative(m_rg);
-      view.m_iphi = alpaka::getPtrNative(m_iphi);
-      view.m_charge = alpaka::getPtrNative(m_charge);
-      view.m_xsize = alpaka::getPtrNative(m_xsize);
-      view.m_ysize = alpaka::getPtrNative(m_ysize);
-      view.m_detInd = alpaka::getPtrNative(m_detInd);
-      view.m_averageGeometry = alpaka::getPtrNative(m_averageGeometry);
-      view.m_hitsLayerStart = alpaka::getPtrNative(m_hitsLayerStart);
-      view.m_hist = alpaka::getPtrNative(m_hist);
-      // Copy the SoA view to the device
-      alpaka::memcpy(queue, m_view, m_view_h, 1u);
+#define SET(name) view.name = name.get()
+      SET(m_xl);
+      SET(m_yl);
+      SET(m_xerr);
+      SET(m_yerr);
+      SET(m_xg);
+      SET(m_yg);
+      SET(m_zg);
+      SET(m_rg);
+      SET(m_iphi);
+      SET(m_charge);
+      SET(m_xsize);
+      SET(m_ysize);
+      SET(m_detInd);
+      SET(m_averageGeometry);
+      SET(m_hitsLayerStart);
+      SET(m_hist);
+#undef SET
+
+      // SoA view on device:
+      auto view_h{cms::alpakatools::createHostView<TrackingRecHit2DSOAView>(&view, 1u)};
+      auto view_m_view{
+          cms::alpakatools::createDeviceView<TrackingRecHit2DSOAView>(alpaka::getDev(queue), m_view.get(), 1u)};
+      alpaka::memcpy(queue, view_m_view, m_view_h, 1u);
     }
 
     ~TrackingRecHit2DAlpaka() = default;
@@ -81,71 +87,93 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     TrackingRecHit2DAlpaka(TrackingRecHit2DAlpaka&&) = default;
     TrackingRecHit2DAlpaka& operator=(TrackingRecHit2DAlpaka&&) = default;
 
-    TrackingRecHit2DSOAView* view() { return alpaka::getPtrNative(m_view); }
-    TrackingRecHit2DSOAView const* view() const { return alpaka::getPtrNative(m_view); }
+    TrackingRecHit2DSOAView* view() { return m_view.get(); }
+    TrackingRecHit2DSOAView const* view() const { return m_view.get(); }
 
     auto nHits() const { return m_nHits; }
     auto hitsModuleStart() const { return m_hitsModuleStart; }
 
-    auto hitsLayerStart() { return alpaka::getPtrNative(m_hitsLayerStart); }
-    auto const* c_hitsLayerStart() const { return alpaka::getPtrNative(m_hitsLayerStart); }
-    auto phiBinner() { return alpaka::getPtrNative(m_hist); }
-    auto iphi() { return alpaka::getPtrNative(m_iphi); }
-    auto const* c_iphi() const { return alpaka::getPtrNative(m_iphi); }
+    auto hitsLayerStart() { return m_hitsLayerStart.get(); }
+    auto const* c_hitsLayerStart() const { return m_hitsLayerStart.get(); }
+    auto phiBinner() { return m_hist.get(); }
+    auto iphi() { return m_iphi.get(); }
+    auto const* c_iphi() const { return m_iphi.get(); }
 
     auto xlToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<float>(nHits());
-      alpaka::memcpy(queue, ret, m_xl, nHits());
+      auto ret = cms::alpakatools::make_host_unique<float>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<float>(ret.get(), nHits());
+      auto view_m_xl = cms::alpakatools::createDeviceView<float>(alpaka::getDev(queue), m_xl.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_xl, nHits());
       return ret;
     }
     auto ylToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<float>(nHits());
-      alpaka::memcpy(queue, ret, m_yl, nHits());
+      auto ret = cms::alpakatools::make_host_unique<float>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<float>(ret.get(), nHits());
+      auto view_m_yl = cms::alpakatools::createDeviceView<float>(alpaka::getDev(queue), m_yl.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_yl, nHits());
       return ret;
     }
     auto xerrToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<float>(nHits());
-      alpaka::memcpy(queue, ret, m_xerr, nHits());
+      auto ret = cms::alpakatools::make_host_unique<float>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<float>(ret.get(), nHits());
+      auto view_m_xerr = cms::alpakatools::createDeviceView<float>(alpaka::getDev(queue), m_xerr.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_xerr, nHits());
       return ret;
     }
     auto yerrToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<float>(nHits());
-      alpaka::memcpy(queue, ret, m_yerr, nHits());
+      auto ret = cms::alpakatools::make_host_unique<float>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<float>(ret.get(), nHits());
+      auto view_m_yerr = cms::alpakatools::createDeviceView<float>(alpaka::getDev(queue), m_yerr.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_yerr, nHits());
       return ret;
     }
     auto xgToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<float>(nHits());
-      alpaka::memcpy(queue, ret, m_xg, nHits());
+      auto ret = cms::alpakatools::make_host_unique<float>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<float>(ret.get(), nHits());
+      auto view_m_xg = cms::alpakatools::createDeviceView<float>(alpaka::getDev(queue), m_xg.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_xg, nHits());
       return ret;
     }
     auto ygToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<float>(nHits());
-      alpaka::memcpy(queue, ret, m_yg, nHits());
+      auto ret = cms::alpakatools::make_host_unique<float>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<float>(ret.get(), nHits());
+      auto view_m_yg = cms::alpakatools::createDeviceView<float>(alpaka::getDev(queue), m_yg.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_yg, nHits());
       return ret;
     }
     auto zgToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<float>(nHits());
-      alpaka::memcpy(queue, ret, m_zg, nHits());
+      auto ret = cms::alpakatools::make_host_unique<float>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<float>(ret.get(), nHits());
+      auto view_m_zg = cms::alpakatools::createDeviceView<float>(alpaka::getDev(queue), m_zg.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_zg, nHits());
       return ret;
     }
     auto rgToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<float>(nHits());
-      alpaka::memcpy(queue, ret, m_rg, nHits());
+      auto ret = cms::alpakatools::make_host_unique<float>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<float>(ret.get(), nHits());
+      auto view_m_rg = cms::alpakatools::createDeviceView<float>(alpaka::getDev(queue), m_rg.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_rg, nHits());
       return ret;
     }
     auto chargeToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<int32_t>(nHits());
-      alpaka::memcpy(queue, ret, m_charge, nHits());
+      auto ret = cms::alpakatools::make_host_unique<int32_t>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<int32_t>(ret.get(), nHits());
+      auto view_m_charge = cms::alpakatools::createDeviceView<int32_t>(alpaka::getDev(queue), m_charge.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_charge, nHits());
       return ret;
     }
     auto xsizeToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<int16_t>(nHits());
-      alpaka::memcpy(queue, ret, m_xsize, nHits());
+      auto ret = cms::alpakatools::make_host_unique<int16_t>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<int16_t>(ret.get(), nHits());
+      auto view_m_xsize = cms::alpakatools::createDeviceView<int16_t>(alpaka::getDev(queue), m_xsize.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_xsize, nHits());
       return ret;
     }
     auto ysizeToHostAsync(Queue& queue) const {
-      auto ret = ::cms::alpakatools::allocHostBuf<int16_t>(nHits());
-      alpaka::memcpy(queue, ret, m_ysize, nHits());
+      auto ret = cms::alpakatools::make_host_unique<int16_t>(nHits());
+      auto view_ret = cms::alpakatools::createHostView<int16_t>(ret.get(), nHits());
+      auto view_m_ysize = cms::alpakatools::createDeviceView<int16_t>(alpaka::getDev(queue), m_ysize.get(), nHits());
+      alpaka::memcpy(queue, view_ret, view_m_ysize, nHits());
       return ret;
     }
 #ifdef TODO
@@ -153,17 +181,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     cms::cuda::host::unique_ptr<uint16_t[]> detIndexToHostAsync(cudaStream_t stream) const;
     cms::cuda::host::unique_ptr<uint32_t[]> hitsModuleStartToHostAsync(cudaStream_t stream) const;
 #endif
-    auto const* xl() const { return alpaka::getPtrNative(m_xl); }
-    auto const* yl() const { return alpaka::getPtrNative(m_yl); }
-    auto const* xerr() const { return alpaka::getPtrNative(m_xerr); }
-    auto const* yerr() const { return alpaka::getPtrNative(m_yerr); }
-    auto const* xg() const { return alpaka::getPtrNative(m_xg); }
-    auto const* yg() const { return alpaka::getPtrNative(m_yg); }
-    auto const* zg() const { return alpaka::getPtrNative(m_zg); }
-    auto const* rg() const { return alpaka::getPtrNative(m_rg); }
-    auto const* charge() const { return alpaka::getPtrNative(m_charge); }
-    auto const* xsize() const { return alpaka::getPtrNative(m_xsize); }
-    auto const* ysize() const { return alpaka::getPtrNative(m_ysize); }
+    auto const* xl() const { return m_xl.get(); }
+    auto const* yl() const { return m_yl.get(); }
+    auto const* xerr() const { return m_xerr.get(); }
+    auto const* yerr() const { return m_yerr.get(); }
+    auto const* xg() const { return m_xg.get(); }
+    auto const* yg() const { return m_yg.get(); }
+    auto const* zg() const { return m_zg.get(); }
+    auto const* rg() const { return m_rg.get(); }
+    auto const* charge() const { return m_charge.get(); }
+    auto const* xsize() const { return m_xsize.get(); }
+    auto const* ysize() const { return m_ysize.get(); }
 
   private:
     uint32_t m_nHits;
@@ -174,35 +202,34 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     // OWNING DEVICE POINTERS
     // local coord
-    AlpakaDeviceBuf<float> m_xl;
-    AlpakaDeviceBuf<float> m_yl;
-    AlpakaDeviceBuf<float> m_xerr;
-    AlpakaDeviceBuf<float> m_yerr;
+    ::cms::alpakatools::device::unique_ptr<float> m_xl;
+    ::cms::alpakatools::device::unique_ptr<float> m_yl;
+    ::cms::alpakatools::device::unique_ptr<float> m_xerr;
+    ::cms::alpakatools::device::unique_ptr<float> m_yerr;
 
     // global coord
-    AlpakaDeviceBuf<float> m_xg;
-    AlpakaDeviceBuf<float> m_yg;
-    AlpakaDeviceBuf<float> m_zg;
-    AlpakaDeviceBuf<float> m_rg;
-    AlpakaDeviceBuf<int16_t> m_iphi;
+    ::cms::alpakatools::device::unique_ptr<float> m_xg;
+    ::cms::alpakatools::device::unique_ptr<float> m_yg;
+    ::cms::alpakatools::device::unique_ptr<float> m_zg;
+    ::cms::alpakatools::device::unique_ptr<float> m_rg;
+    ::cms::alpakatools::device::unique_ptr<int16_t> m_iphi;
 
     // cluster properties
-    AlpakaDeviceBuf<int32_t> m_charge;
-    AlpakaDeviceBuf<int16_t> m_xsize;
-    AlpakaDeviceBuf<int16_t> m_ysize;
-    AlpakaDeviceBuf<uint16_t> m_detInd;
+    ::cms::alpakatools::device::unique_ptr<int32_t> m_charge;
+    ::cms::alpakatools::device::unique_ptr<int16_t> m_xsize;
+    ::cms::alpakatools::device::unique_ptr<int16_t> m_ysize;
+    ::cms::alpakatools::device::unique_ptr<uint16_t> m_detInd;
 
-    AlpakaDeviceBuf<TrackingRecHit2DSOAView::AverageGeometry> m_averageGeometry;
+    ::cms::alpakatools::device::unique_ptr<TrackingRecHit2DSOAView::AverageGeometry> m_averageGeometry;
 
     // needed as kernel params...
-    AlpakaDeviceBuf<uint32_t> m_hitsLayerStart;
-    AlpakaDeviceBuf<Hist> m_hist;
+    ::cms::alpakatools::device::unique_ptr<uint32_t> m_hitsLayerStart;
+    ::cms::alpakatools::device::unique_ptr<Hist> m_hist;
 
     // This is a SoA view which itself gathers non-owning pointers to the data owned above (in TrackingRecHit2DAlpaka instance).
     // This is used to access and modify data on GPU in a SoA format (TrackingRecHit2DSOAView),
     // while the data itself is owned here in the TrackingRecHit2DAlpaka instance.
-    AlpakaDeviceBuf<TrackingRecHit2DSOAView> m_view;
-    // Keep a host copy of the device view alive during the asynchronous copy
+    ::cms::alpakatools::device::unique_ptr<TrackingRecHit2DSOAView> m_view;
     AlpakaHostBuf<TrackingRecHit2DSOAView> m_view_h;
   };
 
diff --git a/src/alpaka/AlpakaDataFormats/ZVertexAlpaka.h b/src/alpaka/AlpakaDataFormats/ZVertexAlpaka.h
index d9cb43762..22c17a9f2 100644
--- a/src/alpaka/AlpakaDataFormats/ZVertexAlpaka.h
+++ b/src/alpaka/AlpakaDataFormats/ZVertexAlpaka.h
@@ -1,13 +1,13 @@
-#ifndef CUDADataFormatsVertexZVertexHeterogeneous_H
-#define CUDADataFormatsVertexZVertexHeterogeneous_H
+#ifndef AlpakaDataFormatsVertexZVertexHeterogeneous_H
+#define AlpakaDataFormatsVertexZVertexHeterogeneous_H
 
 #include "AlpakaDataFormats/ZVertexSoA.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
-  using ZVertexAlpaka = AlpakaDeviceBuf<ZVertexSoA>;
-  using ZVertexHost = AlpakaHostBuf<ZVertexSoA>;
+  using ZVertexAlpaka = ::cms::alpakatools::device::unique_ptr<ZVertexSoA>;
+  using ZVertexHost = ::cms::alpakatools::host::unique_ptr<ZVertexSoA>;
 
   // NB: ANOTHER OPTION IS TO CREATE A HeterogeneousSoA class,
   // with a AlpakaDeviceBuf<ZVertexSoA> as a data member
diff --git a/src/alpaka/CondFormats/PixelCPEFast.h b/src/alpaka/CondFormats/PixelCPEFast.h
index 4b910f1b1..4cf7250b3 100644
--- a/src/alpaka/CondFormats/PixelCPEFast.h
+++ b/src/alpaka/CondFormats/PixelCPEFast.h
@@ -8,16 +8,17 @@
 #include "AlpakaCore/alpakaCommon.h"
 #include "AlpakaCore/ESProduct.h"
 #include "AlpakaCore/alpakaMemoryHelper.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
   class PixelCPEFast {
   public:
-    PixelCPEFast(AlpakaDeviceBuf<pixelCPEforGPU::CommonParams> commonParams,
-                 AlpakaDeviceBuf<pixelCPEforGPU::DetParams> detParams,
-                 AlpakaDeviceBuf<pixelCPEforGPU::LayerGeometry> layerGeometry,
-                 AlpakaDeviceBuf<pixelCPEforGPU::AverageGeometry> averageGeometry,
-                 AlpakaDeviceBuf<pixelCPEforGPU::ParamsOnGPU> params)
+    PixelCPEFast(cms::alpakatools::device::unique_ptr<pixelCPEforGPU::CommonParams> commonParams,
+                 cms::alpakatools::device::unique_ptr<pixelCPEforGPU::DetParams> detParams,
+                 cms::alpakatools::device::unique_ptr<pixelCPEforGPU::LayerGeometry> layerGeometry,
+                 cms::alpakatools::device::unique_ptr<pixelCPEforGPU::AverageGeometry> averageGeometry,
+                 cms::alpakatools::device::unique_ptr<pixelCPEforGPU::ParamsOnGPU> params)
         : m_commonParams(std::move(commonParams)),
           m_detParams(std::move(detParams)),
           m_layerGeometry(std::move(layerGeometry)),
@@ -26,80 +27,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     ~PixelCPEFast() = default;
 
-    pixelCPEforGPU::ParamsOnGPU const *params() const { return alpaka::getPtrNative(m_params); }
-
-#ifdef TODO
-    template <typename Data>
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ESProduct<Data> getGPUData() const {
-      ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ESProduct<Data> gpuData;
-      return gpuData;
-    }
-
-    // The return value can only be used safely in kernels launched on
-    // the same cudaStream, or after cudaStreamSynchronize.
-    const pixelCPEforGPU::ParamsOnGPU getGPUProductAsync(Queue queue) const {
-      auto gpuData = getGPUData<GPUData>();
-
-      auto const& data = gpuData_.dataForDeviceAsync(queue, [this](GPUData &data, Queue queue) {
-        using namespace ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE;
-        // and now copy to device...
-        auto cParams = allocDeviceBuf<pixelCPEforGPU::CommonParams>(1u);
-        data.h_paramsOnGPU.m_commonParams = alpaka::getPtrNative(cParams);
-
-        uint32_t size_detParams = alpaka::extent::getExtentVec(this->m_detParams)[0u];
-        auto detParams = allocDeviceBuf<pixelCPEforGPU::DetParams>(size_detParams);
-        data.h_paramsOnGPU.m_detParams = alpaka::getPtrNative(detParams);
-
-        auto avgGeom = allocDeviceBuf<pixelCPEforGPU::AverageGeometry>(1u);
-        data.h_paramsOnGPU.m_averageGeometry = alpaka::getPtrNative(avgGeom);
-
-        auto layerGeom = allocDeviceBuf<pixelCPEforGPU::LayerGeometry>(1u);
-        data.h_paramsOnGPU.m_layerGeometry = alpaka::getPtrNative(layerGeom);
-
-        auto parGPU = allocDeviceBuf<pixelCPEforGPU::ParamsOnGPU>(1u);
-        data.d_paramsOnGPU = alpaka::getPtrNative(parGPU);
-
-        alpaka::prepareForAsyncCopy(cParams);
-        alpaka::prepareForAsyncCopy(detParams);
-        alpaka::prepareForAsyncCopy(avgGeom);
-        alpaka::prepareForAsyncCopy(layerGeom);
-        alpaka::prepareForAsyncCopy(parGPU);
-
-        alpaka::memcpy(queue, parGPU, data.h_paramsOnGPU, 1u);
-        alpaka::memcpy(queue, data.h_paramsOnGPU.m_commonParams, this->m_commonParams, 1u);
-        alpaka::memcpy(queue, data.h_paramsOnGPU.m_averageGeometry, this->m_averageGeometry, 1u);
-        alpaka::memcpy(queue, data.h_paramsOnGPU.m_layerGeometry, this->m_layerGeometry, 1u);
-        alpaka::memcpy(queue, data.h_paramsOnGPU.m_detParams, alpaka::getPtrNative(this->m_detParams), size_detParams);
-      });
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-      return *data.d_paramsOnGPU;
-#else
-      return data.h_paramsOnGPU;
-#endif
-    }
-#endif  // TODO
+    pixelCPEforGPU::ParamsOnGPU const* params() const { return m_params.get(); }
 
   private:
-    AlpakaDeviceBuf<pixelCPEforGPU::CommonParams> m_commonParams;
-    AlpakaDeviceBuf<pixelCPEforGPU::DetParams> m_detParams;
-    AlpakaDeviceBuf<pixelCPEforGPU::LayerGeometry> m_layerGeometry;
-    AlpakaDeviceBuf<pixelCPEforGPU::AverageGeometry> m_averageGeometry;
-    AlpakaDeviceBuf<pixelCPEforGPU::ParamsOnGPU> m_params;
-
-#ifdef TODO
-    struct GPUData {
-      // not needed if not used on CPU...
-      pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU;
-      pixelCPEforGPU::ParamsOnGPU *d_paramsOnGPU = nullptr;  // copy of the above on the Device
-      ~GPUData() {
-        if (d_paramsOnGPU != nullptr) {
-          //cudafree
-        }
-      }
-    };
-
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ESProduct<GPUData> gpuData_;
-#endif  // TODO
+    cms::alpakatools::device::unique_ptr<pixelCPEforGPU::CommonParams> m_commonParams;
+    cms::alpakatools::device::unique_ptr<pixelCPEforGPU::DetParams> m_detParams;
+    cms::alpakatools::device::unique_ptr<pixelCPEforGPU::LayerGeometry> m_layerGeometry;
+    cms::alpakatools::device::unique_ptr<pixelCPEforGPU::AverageGeometry> m_averageGeometry;
+    cms::alpakatools::device::unique_ptr<pixelCPEforGPU::ParamsOnGPU> m_params;
   };
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/CondFormats/SiPixelFedCablingMapGPUWrapper.h b/src/alpaka/CondFormats/SiPixelFedCablingMapGPUWrapper.h
index 4033ef5a9..4a840cd46 100644
--- a/src/alpaka/CondFormats/SiPixelFedCablingMapGPUWrapper.h
+++ b/src/alpaka/CondFormats/SiPixelFedCablingMapGPUWrapper.h
@@ -3,24 +3,24 @@
 
 #include "CondFormats/SiPixelFedCablingMapGPU.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
   class SiPixelFedCablingMapGPUWrapper {
   public:
-    using CablingMapDeviceBuf = AlpakaDeviceBuf<SiPixelFedCablingMapGPU>;
+    using CablingMapDevicePtr = cms::alpakatools::device::unique_ptr<SiPixelFedCablingMapGPU>;
 
-    explicit SiPixelFedCablingMapGPUWrapper(CablingMapDeviceBuf cablingMap, bool quality)
+    explicit SiPixelFedCablingMapGPUWrapper(CablingMapDevicePtr cablingMap, bool quality)
         : cablingMapDevice_{std::move(cablingMap)}, hasQuality_{quality} {}
     ~SiPixelFedCablingMapGPUWrapper() = default;
 
     bool hasQuality() const { return hasQuality_; }
 
-    const SiPixelFedCablingMapGPU* cablingMap() const { return alpaka::getPtrNative(cablingMapDevice_); }
+    const SiPixelFedCablingMapGPU* cablingMap() const { return cablingMapDevice_.get(); }
 
   private:
-    CablingMapDeviceBuf cablingMapDevice_;
+    CablingMapDevicePtr cablingMapDevice_;
     bool hasQuality_;
   };
 
diff --git a/src/alpaka/CondFormats/SiPixelGainForHLTonGPU.h b/src/alpaka/CondFormats/SiPixelGainForHLTonGPU.h
index afd3f4482..bf205185f 100644
--- a/src/alpaka/CondFormats/SiPixelGainForHLTonGPU.h
+++ b/src/alpaka/CondFormats/SiPixelGainForHLTonGPU.h
@@ -1,7 +1,7 @@
 #ifndef CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h
 #define CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
@@ -29,9 +29,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     using Range = std::pair<uint32_t, uint32_t>;
     using RangeAndCols = std::pair<Range, int>;
 
-    SiPixelGainForHLTonGPU(AlpakaDeviceBuf<DecodingStructure> ped,
-                           AlpakaDeviceBuf<RangeAndCols> rc,
-                           AlpakaDeviceBuf<Fields> f)
+    SiPixelGainForHLTonGPU(cms::alpakatools::device::unique_ptr<DecodingStructure> ped,
+                           cms::alpakatools::device::unique_ptr<RangeAndCols> rc,
+                           cms::alpakatools::device::unique_ptr<Fields> f)
         : v_pedestals_(std::move(ped)), rangeAndCols_(std::move(rc)), fields_(std::move(f)){};
 
     ALPAKA_FN_INLINE ALPAKA_FN_ACC static std::pair<float, float> getPedAndGain(const DecodingStructure* v_pedestals,
@@ -72,14 +72,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       return ped * fields->pedPrecision + fields->minPed_;
     }
 
-    ALPAKA_FN_HOST const DecodingStructure* getVpedestals() const { return alpaka::getPtrNative(v_pedestals_); }
-    ALPAKA_FN_HOST const RangeAndCols* getRangeAndCols() const { return alpaka::getPtrNative(rangeAndCols_); }
-    ALPAKA_FN_HOST const Fields* getFields() const { return alpaka::getPtrNative(fields_); }
+    ALPAKA_FN_HOST const DecodingStructure* getVpedestals() const { return v_pedestals_.get(); }
+    ALPAKA_FN_HOST const RangeAndCols* getRangeAndCols() const { return rangeAndCols_.get(); }
+    ALPAKA_FN_HOST const Fields* getFields() const { return fields_.get(); }
 
   private:
-    AlpakaDeviceBuf<DecodingStructure> v_pedestals_;
-    AlpakaDeviceBuf<RangeAndCols> rangeAndCols_;
-    AlpakaDeviceBuf<Fields> fields_;
+    cms::alpakatools::device::unique_ptr<DecodingStructure> v_pedestals_;
+    cms::alpakatools::device::unique_ptr<RangeAndCols> rangeAndCols_;
+    cms::alpakatools::device::unique_ptr<Fields> fields_;
   };
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/BrokenLineFitOnGPU.cc b/src/alpaka/plugin-PixelTriplets/alpaka/BrokenLineFitOnGPU.cc
index d3fada0b0..e5d7f0f55 100644
--- a/src/alpaka/plugin-PixelTriplets/alpaka/BrokenLineFitOnGPU.cc
+++ b/src/alpaka/plugin-PixelTriplets/alpaka/BrokenLineFitOnGPU.cc
@@ -1,6 +1,6 @@
 #include "BrokenLineFitOnGPU.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
@@ -18,14 +18,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
         Vec1D::all(numberOfBlocks / 4), Vec1D::all(blockSize));
 
     //  Fit internals
-    auto hitsGPU_ = cms::alpakatools::allocDeviceBuf<double>(
-        alpaka::getDev(queue), maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix3xNd<4>) / sizeof(double));
+    auto hitsGPU_ = cms::alpakatools::make_device_unique<double>(maxNumberOfConcurrentFits_ *
+                                                                 sizeof(Rfit::Matrix3xNd<4>) / sizeof(double));
 
-    auto hits_geGPU_ = cms::alpakatools::allocDeviceBuf<float>(
-        alpaka::getDev(queue), maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix6x4f) / sizeof(float));
+    auto hits_geGPU_ = cms::alpakatools::make_device_unique<float>(maxNumberOfConcurrentFits_ *
+                                                                   sizeof(Rfit::Matrix6x4f) / sizeof(float));
 
-    auto fast_fit_resultsGPU_ = cms::alpakatools::allocDeviceBuf<double>(
-        alpaka::getDev(queue), maxNumberOfConcurrentFits_ * sizeof(Rfit::Vector4d) / sizeof(double));
+    auto fast_fit_resultsGPU_ = cms::alpakatools::make_device_unique<double>(maxNumberOfConcurrentFits_ *
+                                                                             sizeof(Rfit::Vector4d) / sizeof(double));
 
     for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
       // fit triplets
@@ -35,9 +35,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       tuples_d,
                                                       tupleMultiplicity_d,
                                                       hv,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
                                                       3,
                                                       offset));
 
@@ -47,9 +47,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       tupleMultiplicity_d,
                                                       bField_,
                                                       outputSoa_d,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
                                                       3,
                                                       offset));
 
@@ -60,9 +60,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       tuples_d,
                                                       tupleMultiplicity_d,
                                                       hv,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
                                                       4,
                                                       offset));
 
@@ -72,9 +72,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       tupleMultiplicity_d,
                                                       bField_,
                                                       outputSoa_d,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
                                                       4,
                                                       offset));
 
@@ -86,9 +86,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         tuples_d,
                                                         tupleMultiplicity_d,
                                                         hv,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
                                                         5,
                                                         offset));
 
@@ -98,9 +98,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         tupleMultiplicity_d,
                                                         bField_,
                                                         outputSoa_d,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
                                                         5,
                                                         offset));
         alpaka::wait(queue);
@@ -112,9 +112,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         tuples_d,
                                                         tupleMultiplicity_d,
                                                         hv,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
                                                         5,
                                                         offset));
 
@@ -124,9 +124,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         tupleMultiplicity_d,
                                                         bField_,
                                                         outputSoa_d,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
                                                         5,
                                                         offset));
         alpaka::wait(queue);
diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernels.cc b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernels.cc
index f542cf940..2968ecc93 100644
--- a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernels.cc
+++ b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernels.cc
@@ -48,23 +48,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     const Vec2D blks(numberOfBlocks, 1u);
     const Vec2D thrs(blockSize, stride);
     const WorkDiv2D kernelConnectWorkDiv = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(blks, thrs);
-    alpaka::enqueue(queue,
-                    alpaka::createTaskKernel<Acc2D>(
-                        kernelConnectWorkDiv,
-                        kernel_connect(),
-                        alpaka::getPtrNative(device_hitTuple_apc_),
-                        alpaka::getPtrNative(device_hitToTuple_apc_),  // needed only to be reset, ready for next kernel
-                        hh.view(),
-                        alpaka::getPtrNative(device_theCells_),
-                        alpaka::getPtrNative(device_nCells_),
-                        alpaka::getPtrNative(device_theCellNeighbors_),
-                        alpaka::getPtrNative(device_isOuterHitOfCell_),
-                        m_params.hardCurvCut_,
-                        m_params.ptmin_,
-                        m_params.CAThetaCutBarrel_,
-                        m_params.CAThetaCutForward_,
-                        m_params.dcaCutInnerTriplet_,
-                        m_params.dcaCutOuterTriplet_));
+    alpaka::enqueue(
+        queue,
+        alpaka::createTaskKernel<Acc2D>(kernelConnectWorkDiv,
+                                        kernel_connect(),
+                                        device_hitTuple_apc_.get(),
+                                        device_hitToTuple_apc_.get(),  // needed only to be reset, ready for next kernel
+                                        hh.view(),
+                                        device_theCells_.get(),
+                                        device_nCells_.get(),
+                                        device_theCellNeighbors_.get(),
+                                        device_isOuterHitOfCell_.get(),
+                                        m_params.hardCurvCut_,
+                                        m_params.ptmin_,
+                                        m_params.CAThetaCutBarrel_,
+                                        m_params.CAThetaCutForward_,
+                                        m_params.dcaCutInnerTriplet_,
+                                        m_params.dcaCutOuterTriplet_));
 
     if (nhits > 1 && m_params.earlyFishbone_) {
       const uint32_t nthTot = 128;
@@ -79,9 +79,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                       alpaka::createTaskKernel<Acc2D>(fishboneWorkDiv,
                                                       gpuPixelDoublets::fishbone(),
                                                       hh.view(),
-                                                      alpaka::getPtrNative(device_theCells_),
-                                                      alpaka::getPtrNative(device_nCells_),
-                                                      alpaka::getPtrNative(device_isOuterHitOfCell_),
+                                                      device_theCells_.get(),
+                                                      device_nCells_.get(),
+                                                      device_isOuterHitOfCell_.get(),
                                                       nhits,
                                                       false));
       alpaka::wait(queue);
@@ -95,21 +95,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                     alpaka::createTaskKernel<Acc1D>(workDiv1D,
                                                     kernel_find_ntuplets(),
                                                     hh.view(),
-                                                    alpaka::getPtrNative(device_theCells_),
-                                                    alpaka::getPtrNative(device_nCells_),
-                                                    alpaka::getPtrNative(device_theCellTracks_),
+                                                    device_theCells_.get(),
+                                                    device_nCells_.get(),
+                                                    device_theCellTracks_.get(),
                                                     tuples_d,
-                                                    alpaka::getPtrNative(device_hitTuple_apc_),
+                                                    device_hitTuple_apc_.get(),
                                                     quality_d,
                                                     m_params.minHitsPerNtuplet_));
 
     if (m_params.doStats_) {
       alpaka::enqueue(queue,
-                      alpaka::createTaskKernel<Acc1D>(workDiv1D,
-                                                      kernel_mark_used(),
-                                                      hh.view(),
-                                                      alpaka::getPtrNative(device_theCells_),
-                                                      alpaka::getPtrNative(device_nCells_)));
+                      alpaka::createTaskKernel<Acc1D>(
+                          workDiv1D, kernel_mark_used(), hh.view(), device_theCells_.get(), device_nCells_.get()));
     }
 
 #ifdef GPU_DEBUG
@@ -120,10 +117,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     numberOfBlocks = (HitContainer::totbins() + blockSize - 1) / blockSize;
     workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                Vec1D::all(blockSize));
-    alpaka::enqueue(
-        queue,
-        alpaka::createTaskKernel<Acc1D>(
-            workDiv1D, ::cms::alpakatools::finalizeBulk(), alpaka::getPtrNative(device_hitTuple_apc_), tuples_d));
+    alpaka::enqueue(queue,
+                    alpaka::createTaskKernel<Acc1D>(
+                        workDiv1D, cms::alpakatools::finalizeBulk(), device_hitTuple_apc_.get(), tuples_d));
 
     // remove duplicates (tracks that share a doublet)
     numberOfBlocks = (3 * m_params.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize;
@@ -132,8 +128,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     alpaka::enqueue(queue,
                     alpaka::createTaskKernel<Acc1D>(workDiv1D,
                                                     kernel_earlyDuplicateRemover(),
-                                                    alpaka::getPtrNative(device_theCells_),
-                                                    alpaka::getPtrNative(device_nCells_),
+                                                    device_theCells_.get(),
+                                                    device_nCells_.get(),
                                                     tuples_d,
                                                     quality_d));
 
@@ -142,20 +138,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                Vec1D::all(blockSize));
     alpaka::enqueue(queue,
-                    alpaka::createTaskKernel<Acc1D>(workDiv1D,
-                                                    kernel_countMultiplicity(),
-                                                    tuples_d,
-                                                    quality_d,
-                                                    alpaka::getPtrNative(device_tupleMultiplicity_)));
+                    alpaka::createTaskKernel<Acc1D>(
+                        workDiv1D, kernel_countMultiplicity(), tuples_d, quality_d, device_tupleMultiplicity_.get()));
 
-    ::cms::alpakatools::launchFinalize(alpaka::getPtrNative(device_tupleMultiplicity_), queue);
+    cms::alpakatools::launchFinalize(device_tupleMultiplicity_.get(), queue);
 
     workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                Vec1D::all(blockSize));
-    alpaka::enqueue(
-        queue,
-        alpaka::createTaskKernel<Acc1D>(
-            workDiv1D, kernel_fillMultiplicity(), tuples_d, quality_d, alpaka::getPtrNative(device_tupleMultiplicity_)));
+    alpaka::enqueue(queue,
+                    alpaka::createTaskKernel<Acc1D>(
+                        workDiv1D, kernel_fillMultiplicity(), tuples_d, quality_d, device_tupleMultiplicity_.get()));
 
     if (nhits > 1 && m_params.lateFishbone_) {
       const uint32_t nthTot = 128;
@@ -170,9 +162,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                       alpaka::createTaskKernel<Acc2D>(workDiv2D,
                                                       gpuPixelDoublets::fishbone(),
                                                       hh.view(),
-                                                      alpaka::getPtrNative(device_theCells_),
-                                                      alpaka::getPtrNative(device_nCells_),
-                                                      alpaka::getPtrNative(device_isOuterHitOfCell_),
+                                                      device_theCells_.get(),
+                                                      device_nCells_.get(),
+                                                      device_isOuterHitOfCell_.get(),
                                                       nhits,
                                                       true));
       alpaka::wait(queue);
@@ -186,16 +178,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                       alpaka::createTaskKernel<Acc1D>(workDiv1D,
                                                       kernel_checkOverflows(),
                                                       tuples_d,
-                                                      alpaka::getPtrNative(device_tupleMultiplicity_),
-                                                      alpaka::getPtrNative(device_hitTuple_apc_),
-                                                      alpaka::getPtrNative(device_theCells_),
-                                                      alpaka::getPtrNative(device_nCells_),
-                                                      alpaka::getPtrNative(device_theCellNeighbors_),
-                                                      alpaka::getPtrNative(device_theCellTracks_),
-                                                      alpaka::getPtrNative(device_isOuterHitOfCell_),
+                                                      device_tupleMultiplicity_.get(),
+                                                      device_hitTuple_apc_.get(),
+                                                      device_theCells_.get(),
+                                                      device_nCells_.get(),
+                                                      device_theCellNeighbors_.get(),
+                                                      device_theCellTracks_.get(),
+                                                      device_isOuterHitOfCell_.get(),
                                                       nhits,
                                                       m_params.maxNumberOfDoublets_,
-                                                      alpaka::getPtrNative(counters_)));
+                                                      counters_.get()));
       alpaka::wait(queue);
     }
 #ifdef GPU_DEBUG
@@ -217,23 +209,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     alpaka::wait(queue);
 #endif
 
-    ALPAKA_ASSERT_OFFLOAD(alpaka::getPtrNative(device_isOuterHitOfCell_));
+    ALPAKA_ASSERT_OFFLOAD(device_isOuterHitOfCell_.get());
 
     {
       int threadsPerBlock = 128;
-      // at least one block!
+      // at least one block!device_nCells_
       int blocks = (std::max(1U, nhits) + threadsPerBlock - 1) / threadsPerBlock;
       const WorkDiv1D workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(
           Vec1D::all(blocks), Vec1D::all(threadsPerBlock));
       alpaka::enqueue(queue,
                       alpaka::createTaskKernel<Acc1D>(workDiv1D,
                                                       gpuPixelDoublets::initDoublets(),
-                                                      alpaka::getPtrNative(device_isOuterHitOfCell_),
+                                                      device_isOuterHitOfCell_.get(),
                                                       nhits,
-                                                      alpaka::getPtrNative(device_theCellNeighbors_),
-                                                      alpaka::getPtrNative(device_theCellNeighborsContainer_),
-                                                      alpaka::getPtrNative(device_theCellTracks_),
-                                                      alpaka::getPtrNative(device_theCellTracksContainer_)));
+                                                      device_theCellNeighbors_.get(),
+                                                      device_theCellNeighborsContainer_.get(),
+                                                      device_theCellTracks_.get(),
+                                                      device_theCellTracksContainer_.get()));
       alpaka::wait(queue);
     }
 
@@ -262,12 +254,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     alpaka::enqueue(queue,
                     alpaka::createTaskKernel<Acc2D>(workDiv2D,
                                                     gpuPixelDoublets::getDoubletsFromHisto(),
-                                                    alpaka::getPtrNative(device_theCells_),
-                                                    alpaka::getPtrNative(device_nCells_),
-                                                    alpaka::getPtrNative(device_theCellNeighbors_),
-                                                    alpaka::getPtrNative(device_theCellTracks_),
+                                                    device_theCells_.get(),
+                                                    device_nCells_.get(),
+                                                    device_theCellNeighbors_.get(),
+                                                    device_theCellTracks_.get(),
                                                     hh.view(),
-                                                    alpaka::getPtrNative(device_isOuterHitOfCell_),
+                                                    device_isOuterHitOfCell_.get(),
                                                     nActualPairs,
                                                     m_params.idealConditions_,
                                                     m_params.doClusterCut_,
@@ -301,12 +293,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       numberOfBlocks = (3 * m_params.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize;
       workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                  Vec1D::all(blockSize));
-      alpaka::enqueue(queue,
-                      alpaka::createTaskKernel<Acc1D>(workDiv1D,
-                                                      kernel_fishboneCleaner(),
-                                                      alpaka::getPtrNative(device_theCells_),
-                                                      alpaka::getPtrNative(device_nCells_),
-                                                      quality_d));
+      alpaka::enqueue(
+          queue,
+          alpaka::createTaskKernel<Acc1D>(
+              workDiv1D, kernel_fishboneCleaner(), device_theCells_.get(), device_nCells_.get(), quality_d));
       alpaka::wait(queue);
     }
 
@@ -314,32 +304,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     numberOfBlocks = (3 * m_params.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize;
     workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                Vec1D::all(blockSize));
-    alpaka::enqueue(queue,
-                    alpaka::createTaskKernel<Acc1D>(workDiv1D,
-                                                    kernel_fastDuplicateRemover(),
-                                                    alpaka::getPtrNative(device_theCells_),
-                                                    alpaka::getPtrNative(device_nCells_),
-                                                    tuples_d,
-                                                    tracks_d));
+    alpaka::enqueue(
+        queue,
+        alpaka::createTaskKernel<Acc1D>(
+            workDiv1D, kernel_fastDuplicateRemover(), device_theCells_.get(), device_nCells_.get(), tuples_d, tracks_d));
 
     if (m_params.minHitsPerNtuplet_ < 4 || m_params.doStats_) {
       // fill hit->track "map"
       numberOfBlocks = (3 * CAConstants::maxNumberOfQuadruplets() / 4 + blockSize - 1) / blockSize;
       workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                  Vec1D::all(blockSize));
-      alpaka::enqueue(
-          queue,
-          alpaka::createTaskKernel<Acc1D>(
-              workDiv1D, kernel_countHitInTracks(), tuples_d, quality_d, alpaka::getPtrNative(device_hitToTuple_)));
+      alpaka::enqueue(queue,
+                      alpaka::createTaskKernel<Acc1D>(
+                          workDiv1D, kernel_countHitInTracks(), tuples_d, quality_d, device_hitToTuple_.get()));
 
-      ::cms::alpakatools::launchFinalize(alpaka::getPtrNative(device_hitToTuple_), queue);
+      cms::alpakatools::launchFinalize(device_hitToTuple_.get(), queue);
 
       workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                  Vec1D::all(blockSize));
-      alpaka::enqueue(
-          queue,
-          alpaka::createTaskKernel<Acc1D>(
-              workDiv1D, kernel_fillHitInTracks(), tuples_d, quality_d, alpaka::getPtrNative(device_hitToTuple_)));
+      alpaka::enqueue(queue,
+                      alpaka::createTaskKernel<Acc1D>(
+                          workDiv1D, kernel_fillHitInTracks(), tuples_d, quality_d, device_hitToTuple_.get()));
       alpaka::wait(queue);
     }
     if (m_params.minHitsPerNtuplet_ < 4) {
@@ -347,14 +332,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       numberOfBlocks = (HitToTuple::capacity() + blockSize - 1) / blockSize;
       workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                  Vec1D::all(blockSize));
-      alpaka::enqueue(queue,
-                      alpaka::createTaskKernel<Acc1D>(workDiv1D,
-                                                      kernel_tripletCleaner(),
-                                                      hh.view(),
-                                                      tuples_d,
-                                                      tracks_d,
-                                                      quality_d,
-                                                      alpaka::getPtrNative(device_hitToTuple_)));
+      alpaka::enqueue(
+          queue,
+          alpaka::createTaskKernel<Acc1D>(
+              workDiv1D, kernel_tripletCleaner(), hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get()));
       alpaka::wait(queue);
     }
 
@@ -364,17 +345,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                  Vec1D::all(blockSize));
       alpaka::enqueue(queue,
-                      alpaka::createTaskKernel<Acc1D>(workDiv1D,
-                                                      kernel_doStatsForHitInTracks(),
-                                                      alpaka::getPtrNative(device_hitToTuple_),
-                                                      alpaka::getPtrNative(counters_)));
+                      alpaka::createTaskKernel<Acc1D>(
+                          workDiv1D, kernel_doStatsForHitInTracks(), device_hitToTuple_.get(), counters_.get()));
 
       numberOfBlocks = (3 * CAConstants::maxNumberOfQuadruplets() / 4 + blockSize - 1) / blockSize;
       workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(numberOfBlocks),
                                                                                  Vec1D::all(blockSize));
-      alpaka::enqueue(queue,
-                      alpaka::createTaskKernel<Acc1D>(
-                          workDiv1D, kernel_doStatsForTracks(), tuples_d, quality_d, alpaka::getPtrNative(counters_)));
+      alpaka::enqueue(
+          queue,
+          alpaka::createTaskKernel<Acc1D>(workDiv1D, kernel_doStatsForTracks(), tuples_d, quality_d, counters_.get()));
       alpaka::wait(queue);
     }
 #ifdef GPU_DEBUG
@@ -392,18 +371,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                     tuples_d,
                                                     tracks_d,
                                                     quality_d,
-                                                    alpaka::getPtrNative(device_hitToTuple_),
+                                                    device_hitToTuple_.get(),
                                                     100,
                                                     iev));
 #endif
   }
 
   void CAHitNtupletGeneratorKernels::printCounters(Queue &queue) {
-    const WorkDiv1D workDiv1D =
-        ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(1u), Vec1D::all(1u));
-    alpaka::enqueue(
-        queue, alpaka::createTaskKernel<Acc1D>(workDiv1D, kernel_printCounters(), alpaka::getPtrNative(counters_)));
-    alpaka::wait(queue);
+    // const WorkDiv1D workDiv1D = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(1u), Vec1D::all(1u));
+    // alpaka::enqueue(queue, alpaka::createTaskKernel<Acc1D>(workDiv1D, kernel_printCounters(), counters_.get()));
+    // alpaka::wait(queue);
   }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernels.h b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernels.h
index b23dac1c5..59fb0db7b 100644
--- a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernels.h
+++ b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernels.h
@@ -1,6 +1,7 @@
 #ifndef RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
 #define RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
 
+#include "AlpakaCore/device_unique_ptr.h"
 #include "AlpakaDataFormats/PixelTrackAlpaka.h"
 #include "GPUCACell.h"
 
@@ -160,43 +161,41 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           //////////////////////////////////////////////////////////
           // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER)
           //////////////////////////////////////////////////////////
-          counters_{cms::alpakatools::allocDeviceBuf<Counters>(alpaka::getDev(queue), 1u)},
+          counters_{cms::alpakatools::make_device_unique<Counters>(1u)},
 
-          device_hitToTuple_{cms::alpakatools::allocDeviceBuf<HitToTuple>(alpaka::getDev(queue), 1u)},
-          device_tupleMultiplicity_{cms::alpakatools::allocDeviceBuf<TupleMultiplicity>(alpaka::getDev(queue), 1u)},
+          device_hitToTuple_{cms::alpakatools::make_device_unique<HitToTuple>(1u)},
+          device_tupleMultiplicity_{cms::alpakatools::make_device_unique<TupleMultiplicity>(1u)},
 
-          device_theCells_{
-              cms::alpakatools::allocDeviceBuf<GPUCACell>(alpaka::getDev(queue), params.maxNumberOfDoublets_)},
+          device_theCells_{cms::alpakatools::make_device_unique<GPUCACell>(params.maxNumberOfDoublets_)},
           // in principle we can use "nhits" to heuristically dimension the workspace...
           device_isOuterHitOfCell_{
-              cms::alpakatools::allocDeviceBuf<GPUCACell::OuterHitOfCell>(alpaka::getDev(queue), std::max(1U, nhits))},
-
-          device_theCellNeighbors_{
-              cms::alpakatools::allocDeviceBuf<CAConstants::CellNeighborsVector>(alpaka::getDev(queue), 1u)},
-          device_theCellTracks_{
-              cms::alpakatools::allocDeviceBuf<CAConstants::CellTracksVector>(alpaka::getDev(queue), 1u)},
-
-          //cellStorage_{cms::alpakatools::allocDeviceBuf<unsigned char>(alpaka::getDev(queue), CAConstants::maxNumOfActiveDoublets() * sizeof(GPUCACell::CellNeighbors) + CAConstants::maxNumOfActiveDoublets() * sizeof(GPUCACell::CellTracks))},
-          device_theCellNeighborsContainer_{cms::alpakatools::allocDeviceBuf<CAConstants::CellNeighbors>(
-              alpaka::getDev(queue), CAConstants::maxNumOfActiveDoublets())},
-          device_theCellTracksContainer_{cms::alpakatools::allocDeviceBuf<CAConstants::CellTracks>(
-              alpaka::getDev(queue), CAConstants::maxNumOfActiveDoublets())},
-
-          //device_storage_{cms::alpakatools::allocDeviceBuf<::ALPAKA_ACCELERATOR_NAMESPACE::cmscuda::AtomicPairCounter::c_type>(alpaka::getDev(queue), 3u)},
-          //device_hitTuple_apc_ = (::cms::alpakatools::AtomicPairCounter*)device_storage_.get()},
-          //device_hitToTuple_apc_ = (::cms::alpakatools::AtomicPairCounter*)device_storage_.get() + 1;
+              cms::alpakatools::make_device_unique<GPUCACell::OuterHitOfCell>(std::max(1U, nhits))},
+
+          device_theCellNeighbors_{cms::alpakatools::make_device_unique<CAConstants::CellNeighborsVector>(1u)},
+          device_theCellTracks_{cms::alpakatools::make_device_unique<CAConstants::CellTracksVector>(1u)},
+
+          //cellStorage_{cms::alpakatools::allocDeviceBuf<unsigned char>(CAConstants::maxNumOfActiveDoublets() * sizeof(GPUCACell::CellNeighbors) + CAConstants::maxNumOfActiveDoublets() * sizeof(GPUCACell::CellTracks))},
+          device_theCellNeighborsContainer_{
+              cms::alpakatools::make_device_unique<CAConstants::CellNeighbors>(CAConstants::maxNumOfActiveDoublets())},
+          device_theCellTracksContainer_{
+              cms::alpakatools::make_device_unique<CAConstants::CellTracks>(CAConstants::maxNumOfActiveDoublets())},
+
+          //device_storage_{cms::alpakatools::allocDeviceBuf<cms::cuda::AtomicPairCounter::c_type>(3u)},
+          //device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get()},
+          //device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get() + 1;
           //device_nCells_ = (uint32_t*)(device_storage_.get() + 2)},
-          device_hitTuple_apc_{
-              cms::alpakatools::allocDeviceBuf<::cms::alpakatools::AtomicPairCounter>(alpaka::getDev(queue), 1u)},
-          device_hitToTuple_apc_{
-              cms::alpakatools::allocDeviceBuf<::cms::alpakatools::AtomicPairCounter>(alpaka::getDev(queue), 1u)},
-          device_nCells_{cms::alpakatools::allocDeviceBuf<uint32_t>(alpaka::getDev(queue), 1u)} {
-      alpaka::memset(queue, counters_, 0, 1u);
+          device_hitTuple_apc_{cms::alpakatools::make_device_unique<cms::alpakatools::AtomicPairCounter>(1u)},
+          device_hitToTuple_apc_{cms::alpakatools::make_device_unique<cms::alpakatools::AtomicPairCounter>(1u)},
+          device_nCells_{cms::alpakatools::make_device_unique<uint32_t>(1u)} {
+      auto counters_view = cms::alpakatools::createDeviceView<Counters>(alpaka::getDev(queue), counters_.get(), 1u);
+      alpaka::memset(queue, counters_view, 0, 1u);
 
-      alpaka::memset(queue, device_nCells_, 0, 1u);
+      auto device_nCells_view =
+          cms::alpakatools::createDeviceView<uint32_t>(alpaka::getDev(queue), device_nCells_.get(), 1u);
+      alpaka::memset(queue, device_nCells_view, 0, 1u);
 
-      launchZero(alpaka::getPtrNative(device_tupleMultiplicity_), queue);
-      launchZero(alpaka::getPtrNative(device_hitToTuple_), queue);
+      launchZero(device_tupleMultiplicity_.get(), queue);
+      launchZero(device_hitToTuple_.get(), queue);
 
       // we may wish to keep it in the edm...
       alpaka::wait(queue);
@@ -204,7 +203,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     ~CAHitNtupletGeneratorKernels() = default;
 
-    TupleMultiplicity const* tupleMultiplicity() const { return alpaka::getPtrNative(device_tupleMultiplicity_); }
+    TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_.get(); }
 
     void launchKernels(HitsOnCPU const& hh, TkSoA* tuples_d, Queue& queue);
 
@@ -222,33 +221,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     // params
     Params const& m_params;
 
-    AlpakaDeviceBuf<Counters> counters_;  // NB: Counters: In legacy, sum of the stats of all events.
+    cms::alpakatools::device::unique_ptr<Counters> counters_;  // NB: Counters: In legacy, sum of the stats of all events.
     // Here instead, these stats are per event.
     // Does not matter much, as the stats are desactivated by default anyway, and are for debug only
     // (stats are not stored eventually, no interference with any result).
     // For debug, better to be able to see info per event that just a sum.
 
     // workspace
-    AlpakaDeviceBuf<HitToTuple> device_hitToTuple_;
-    AlpakaDeviceBuf<TupleMultiplicity> device_tupleMultiplicity_;
+    cms::alpakatools::device::unique_ptr<HitToTuple> device_hitToTuple_;
+    cms::alpakatools::device::unique_ptr<TupleMultiplicity> device_tupleMultiplicity_;
 
-    AlpakaDeviceBuf<GPUCACell> device_theCells_;  // NB: In legacy, was allocated inside buildDoublets.
-    AlpakaDeviceBuf<GPUCACell::OuterHitOfCell>
+    cms::alpakatools::device::unique_ptr<GPUCACell>
+        device_theCells_;  // NB: In legacy, was allocated inside buildDoublets.
+    cms::alpakatools::device::unique_ptr<GPUCACell::OuterHitOfCell>
         device_isOuterHitOfCell_;  // NB: In legacy, was allocated inside buildDoublets.
 
-    AlpakaDeviceBuf<CAConstants::CellNeighborsVector> device_theCellNeighbors_;
-    AlpakaDeviceBuf<CAConstants::CellTracksVector> device_theCellTracks_;
+    cms::alpakatools::device::unique_ptr<CAConstants::CellNeighborsVector> device_theCellNeighbors_;
+    cms::alpakatools::device::unique_ptr<CAConstants::CellTracksVector> device_theCellTracks_;
 
     // AlpakaDeviceBuf<unsigned char> cellStorage_; // NB: In legacy, was allocated inside buildDoublets.
     // NB: Here, data from cellstorage_ (legacy) directly owned by the following:
-    AlpakaDeviceBuf<CAConstants::CellNeighbors> device_theCellNeighborsContainer_;  // Was non-owning in legacy!
-    AlpakaDeviceBuf<CAConstants::CellTracks> device_theCellTracksContainer_;        // Was non-owning in legacy!
+    cms::alpakatools::device::unique_ptr<CAConstants::CellNeighbors>
+        device_theCellNeighborsContainer_;  // Was non-owning in legacy!
+    cms::alpakatools::device::unique_ptr<CAConstants::CellTracks>
+        device_theCellTracksContainer_;  // Was non-owning in legacy!
 
     // AlpakaDeviceBuf<::cms::alpakatools::AtomicPairCounter::c_type> device_storage_; // NB: In legacy
     // NB: Here, data from device_storage_ (legacy) directly owned by the following:
-    AlpakaDeviceBuf<::cms::alpakatools::AtomicPairCounter> device_hitTuple_apc_;    // Was non-owning in legacy!
-    AlpakaDeviceBuf<::cms::alpakatools::AtomicPairCounter> device_hitToTuple_apc_;  // Was non-owning in legacy!
-    AlpakaDeviceBuf<uint32_t> device_nCells_;                                       // Was non-owning in legacy!
+    cms::alpakatools::device::unique_ptr<cms::alpakatools::AtomicPairCounter>
+        device_hitTuple_apc_;  // Was non-owning in legacy!
+    cms::alpakatools::device::unique_ptr<cms::alpakatools::AtomicPairCounter>
+        device_hitToTuple_apc_;                                     // Was non-owning in legacy!
+    cms::alpakatools::device::unique_ptr<uint32_t> device_nCells_;  // Was non-owning in legacy!
   };
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorOnGPU.cc b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorOnGPU.cc
index fdf2f4c8d..8cb499b84 100644
--- a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorOnGPU.cc
+++ b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorOnGPU.cc
@@ -7,6 +7,8 @@
 #include <functional>
 #include <vector>
 
+#include "AlpakaCore/device_unique_ptr.h"
+
 #include "Framework/Event.h"
 
 #include "CAHitNtupletGeneratorOnGPU.h"
@@ -91,8 +93,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
   PixelTrackAlpaka CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecHit2DAlpaka const& hits_d,
                                                                float bfield,
                                                                Queue& queue) const {
-    PixelTrackAlpaka tracks{cms::alpakatools::allocDeviceBuf<pixelTrack::TrackSoA>(alpaka::getDev(queue), 1u)};
-    auto* soa = alpaka::getPtrNative(tracks);
+    PixelTrackAlpaka tracks{cms::alpakatools::make_device_unique<pixelTrack::TrackSoA>(1u)};
+    auto* soa = tracks.get();
 
     CAHitNtupletGeneratorKernels kernels(m_params, hits_d.nHits(), queue);
     kernels.buildDoublets(hits_d, queue);
diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/PixelTrackSoAFromAlpaka.cc b/src/alpaka/plugin-PixelTriplets/alpaka/PixelTrackSoAFromAlpaka.cc
index dc5208a0b..e142caa49 100644
--- a/src/alpaka/plugin-PixelTriplets/alpaka/PixelTrackSoAFromAlpaka.cc
+++ b/src/alpaka/plugin-PixelTriplets/alpaka/PixelTrackSoAFromAlpaka.cc
@@ -1,4 +1,4 @@
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/host_unique_ptr.h"
 #include "AlpakaDataFormats/PixelTrackAlpaka.h"
 
 #include "Framework/EventSetup.h"
@@ -75,9 +75,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
     auto const& inputData = iEvent.get(tokenAlpaka_);
-    auto outputData = ::cms::alpakatools::allocHostBuf<pixelTrack::TrackSoA>(1u);
+    auto outputData = ::cms::alpakatools::make_host_unique<pixelTrack::TrackSoA>(1u);
     ::cms::alpakatools::ScopedContextProduce<Queue> ctx{iEvent.streamID()};
-    alpaka::memcpy(ctx.stream(), outputData, inputData, 1u);
+    auto const inputDataView =
+        ::cms::alpakatools::createDeviceView<pixelTrack::TrackSoA>(alpaka::getDev(ctx.stream()), inputData.get(), 1u);
+    auto outputDataView = ::cms::alpakatools::createHostView<pixelTrack::TrackSoA>(outputData.get(), 1u);
+    alpaka::memcpy(ctx.stream(), outputDataView, inputDataView, 1u);
 
     // DO NOT  make a copy  (actually TWO....)
     ctx.emplace(iEvent, tokenSOA_, std::move(outputData));
diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/RiemannFitOnGPU.cc b/src/alpaka/plugin-PixelTriplets/alpaka/RiemannFitOnGPU.cc
index 34b39b2b8..5492e127b 100644
--- a/src/alpaka/plugin-PixelTriplets/alpaka/RiemannFitOnGPU.cc
+++ b/src/alpaka/plugin-PixelTriplets/alpaka/RiemannFitOnGPU.cc
@@ -1,6 +1,6 @@
 #include "RiemannFitOnGPU.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
@@ -18,21 +18,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
         Vec1D::all(numberOfBlocks / 4), Vec1D::all(blockSize));
 
     //  Fit internals
-    auto hitsGPU_ = cms::alpakatools::allocDeviceBuf<double>(
-        alpaka::getDev(queue), maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix3xNd<4>) / sizeof(double));
+    auto hitsGPU_ = cms::alpakatools::make_device_unique<double>(maxNumberOfConcurrentFits_ *
+                                                                 sizeof(Rfit::Matrix3xNd<4>) / sizeof(double));
 
-    auto hits_geGPU_ = cms::alpakatools::allocDeviceBuf<float>(
-        alpaka::getDev(queue), maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix6x4f) / sizeof(float));
+    auto hits_geGPU_ = cms::alpakatools::make_device_unique<float>(maxNumberOfConcurrentFits_ *
+                                                                   sizeof(Rfit::Matrix6x4f) / sizeof(float));
 
-    auto fast_fit_resultsGPU_ = cms::alpakatools::allocDeviceBuf<double>(
-        alpaka::getDev(queue), maxNumberOfConcurrentFits_ * sizeof(Rfit::Vector4d) / sizeof(double));
+    auto fast_fit_resultsGPU_ = cms::alpakatools::make_device_unique<double>(maxNumberOfConcurrentFits_ *
+                                                                             sizeof(Rfit::Vector4d) / sizeof(double));
 
     //auto circle_fit_resultsGPU_holder =
     //cms::cuda::make_device_unique<char[]>(maxNumberOfConcurrentFits_ * sizeof(Rfit::circle_fit), stream);
     //Rfit::circle_fit *circle_fit_resultsGPU_ = (Rfit::circle_fit *)(circle_fit_resultsGPU_holder.get());
-    //auto circle_fit_resultsGPU_holder = cms::alpakatools::allocDeviceBuf<char>(alpaka::getDev(queue), maxNumberOfConcurrentFits_ * sizeof(Rfit::circle_fit));
-    auto circle_fit_resultsGPU_ =
-        cms::alpakatools::allocDeviceBuf<Rfit::circle_fit>(alpaka::getDev(queue), maxNumberOfConcurrentFits_);
+    //auto circle_fit_resultsGPU_holder = cms::alpakatools::allocDeviceBuf<char>(maxNumberOfConcurrentFits_ * sizeof(Rfit::circle_fit));
+    auto circle_fit_resultsGPU_ = cms::alpakatools::make_device_unique<Rfit::circle_fit>(maxNumberOfConcurrentFits_);
 
     for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
       // triplets
@@ -43,9 +42,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       tupleMultiplicity_d,
                                                       3,
                                                       hv,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
                                                       offset));
 
       alpaka::enqueue(queue,
@@ -54,10 +53,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       tupleMultiplicity_d,
                                                       3,
                                                       bField_,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
-                                                      alpaka::getPtrNative(circle_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
+                                                      circle_fit_resultsGPU_.get(),
                                                       offset));
 
       alpaka::enqueue(queue,
@@ -67,10 +66,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       3,
                                                       bField_,
                                                       outputSoa_d,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
-                                                      alpaka::getPtrNative(circle_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
+                                                      circle_fit_resultsGPU_.get(),
                                                       offset));
 
       // quads
@@ -81,9 +80,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       tupleMultiplicity_d,
                                                       4,
                                                       hv,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
                                                       offset));
 
       alpaka::enqueue(queue,
@@ -92,10 +91,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       tupleMultiplicity_d,
                                                       4,
                                                       bField_,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
-                                                      alpaka::getPtrNative(circle_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
+                                                      circle_fit_resultsGPU_.get(),
                                                       offset));
 
       alpaka::enqueue(queue,
@@ -105,10 +104,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                       4,
                                                       bField_,
                                                       outputSoa_d,
-                                                      alpaka::getPtrNative(hitsGPU_),
-                                                      alpaka::getPtrNative(hits_geGPU_),
-                                                      alpaka::getPtrNative(fast_fit_resultsGPU_),
-                                                      alpaka::getPtrNative(circle_fit_resultsGPU_),
+                                                      hitsGPU_.get(),
+                                                      hits_geGPU_.get(),
+                                                      fast_fit_resultsGPU_.get(),
+                                                      circle_fit_resultsGPU_.get(),
                                                       offset));
 
       if (fit5as4_) {
@@ -120,9 +119,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         tupleMultiplicity_d,
                                                         5,
                                                         hv,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
                                                         offset));
 
         alpaka::enqueue(queue,
@@ -131,10 +130,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         tupleMultiplicity_d,
                                                         5,
                                                         bField_,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
-                                                        alpaka::getPtrNative(circle_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
+                                                        circle_fit_resultsGPU_.get(),
                                                         offset));
 
         alpaka::enqueue(queue,
@@ -144,10 +143,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         5,
                                                         bField_,
                                                         outputSoa_d,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
-                                                        alpaka::getPtrNative(circle_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
+                                                        circle_fit_resultsGPU_.get(),
                                                         offset));
         alpaka::wait(queue);
       } else {
@@ -159,9 +158,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         tupleMultiplicity_d,
                                                         5,
                                                         hv,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
                                                         offset));
 
         alpaka::enqueue(queue,
@@ -170,10 +169,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         tupleMultiplicity_d,
                                                         5,
                                                         bField_,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
-                                                        alpaka::getPtrNative(circle_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
+                                                        circle_fit_resultsGPU_.get(),
                                                         offset));
 
         alpaka::enqueue(queue,
@@ -183,10 +182,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         5,
                                                         bField_,
                                                         outputSoa_d,
-                                                        alpaka::getPtrNative(hitsGPU_),
-                                                        alpaka::getPtrNative(hits_geGPU_),
-                                                        alpaka::getPtrNative(fast_fit_resultsGPU_),
-                                                        alpaka::getPtrNative(circle_fit_resultsGPU_),
+                                                        hitsGPU_.get(),
+                                                        hits_geGPU_.get(),
+                                                        fast_fit_resultsGPU_.get(),
+                                                        circle_fit_resultsGPU_.get(),
                                                         offset));
         alpaka::wait(queue);
       }
diff --git a/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexProducerAlpaka.cc b/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexProducerAlpaka.cc
index 0f77fbc6f..02b571b7b 100644
--- a/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexProducerAlpaka.cc
+++ b/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexProducerAlpaka.cc
@@ -48,8 +48,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
   {}
 
   void PixelVertexProducerAlpaka::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
-    auto const& tracksBuf = iEvent.get(tokenTrack_);
-    auto const tracks = alpaka::getPtrNative(tracksBuf);
+    auto const& tracksPtr = iEvent.get(tokenTrack_);
+    auto const tracks = tracksPtr.get();
 
     ::cms::alpakatools::ScopedContextProduce<Queue> ctx{iEvent.streamID()};
     ctx.emplace(iEvent, tokenVertex_, m_gpuAlgo.makeAsync(tracks, m_ptMin, ctx.stream()));
diff --git a/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexSoAFromAlpaka.cc b/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexSoAFromAlpaka.cc
index fe6cd2b17..4e7255f9b 100644
--- a/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexSoAFromAlpaka.cc
+++ b/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexSoAFromAlpaka.cc
@@ -1,6 +1,7 @@
 #include "AlpakaCore/ScopedContext.h"
 #include "AlpakaCore/alpakaCommon.h"
 #include "AlpakaCore/alpakaMemoryHelper.h"
+#include "AlpakaCore/host_unique_ptr.h"
 #include "AlpakaDataFormats/ZVertexAlpaka.h"
 #include "Framework/EDProducer.h"
 #include "Framework/Event.h"
@@ -57,9 +58,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
   void PixelVertexSoAFromAlpaka::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) {
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
     auto const& inputData = iEvent.get(tokenAlpaka_);
-    auto outputData = ::cms::alpakatools::allocHostBuf<ZVertexSoA>(1u);
+    auto outputData = ::cms::alpakatools::make_host_unique<ZVertexSoA>(1u);
     ::cms::alpakatools::ScopedContextProduce<Queue> ctx{iEvent.streamID()};
-    alpaka::memcpy(ctx.stream(), outputData, inputData, 1u);
+    auto const inputDataView =
+        ::cms::alpakatools::createDeviceView<ZVertexSoA>(alpaka::getDev(ctx.stream()), inputData.get(), 1u);
+    auto outputDataView = ::cms::alpakatools::createHostView<ZVertexSoA>(outputData.get(), 1u);
+    alpaka::memcpy(ctx.stream(), outputDataView, inputDataView, 1u);
 
     // No copies....
     ctx.emplace(iEvent, tokenSOA_, std::move(outputData));
diff --git a/src/alpaka/plugin-PixelVertexFinding/alpaka/gpuVertexFinder.cc b/src/alpaka/plugin-PixelVertexFinding/alpaka/gpuVertexFinder.cc
index 600756040..0ab97ec62 100644
--- a/src/alpaka/plugin-PixelVertexFinding/alpaka/gpuVertexFinder.cc
+++ b/src/alpaka/plugin-PixelVertexFinding/alpaka/gpuVertexFinder.cc
@@ -1,4 +1,4 @@
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 #include "gpuVertexFinder.h"
 #include "gpuClusterTracksByDensity.h"
@@ -107,12 +107,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       // std::cout << "producing Vertices on GPU" << std::endl;
       ALPAKA_ASSERT_OFFLOAD(tksoa);
 
-      ZVertexAlpaka vertices{cms::alpakatools::allocDeviceBuf<ZVertexSoA>(alpaka::getDev(queue), 1u)};
-      auto* soa = alpaka::getPtrNative(vertices);
+      ZVertexAlpaka vertices{::cms::alpakatools::make_device_unique<ZVertexSoA>(1u)};
+      auto* soa = vertices.get();
       ALPAKA_ASSERT_OFFLOAD(soa);
 
-      auto ws_dBuf{cms::alpakatools::allocDeviceBuf<WorkSpace>(alpaka::getDev(queue), 1u)};
-      auto ws_d = alpaka::getPtrNative(ws_dBuf);
+      auto ws_d = ws_dPtr.get();
 
       auto nvFinalVerticesView = cms::alpakatools::createDeviceView<uint32_t>(alpaka::getDev(queue), &soa->nvFinal, 1u);
       alpaka::memset(queue, nvFinalVerticesView, 0, 1u);
diff --git a/src/alpaka/plugin-PixelVertexFinding/alpaka/gpuVertexFinder.h b/src/alpaka/plugin-PixelVertexFinding/alpaka/gpuVertexFinder.h
index 909b2bf49..b877b26ef 100644
--- a/src/alpaka/plugin-PixelVertexFinding/alpaka/gpuVertexFinder.h
+++ b/src/alpaka/plugin-PixelVertexFinding/alpaka/gpuVertexFinder.h
@@ -2,7 +2,7 @@
 #ifndef RecoPixelVertexing_PixelVertexFinding_src_gpuVertexFinder_h
 #define RecoPixelVertexing_PixelVertexFinding_src_gpuVertexFinder_h
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 #include "AlpakaDataFormats/ZVertexAlpaka.h"
 #include "AlpakaDataFormats/PixelTrackAlpaka.h"
 
@@ -43,7 +43,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                float ierrmax,  // max error to be "seed"
                float ichi2max  // max normalized distance to cluster
                )
-          : oneKernel_(oneKernel && !(useDBSCAN || useIterative)),
+          : ws_dPtr(::cms::alpakatools::make_device_unique<WorkSpace>(1u)),
+            oneKernel_(oneKernel && !(useDBSCAN || useIterative)),
             useDensity_(useDensity),
             useDBSCAN_(useDBSCAN),
             useIterative_(useIterative),
@@ -57,6 +58,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       ZVertexAlpaka makeAsync(TkSoA const* tksoa, float ptMin, Queue& queue) const;
 
     private:
+      ::cms::alpakatools::device::unique_ptr<WorkSpace> ws_dPtr;
+
       const bool oneKernel_;
       const bool useDensity_;
       const bool useDBSCAN_;
diff --git a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelFedCablingMapESProducer.cc b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelFedCablingMapESProducer.cc
index 4c1bb9c6e..2eecbe749 100644
--- a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelFedCablingMapESProducer.cc
+++ b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelFedCablingMapESProducer.cc
@@ -4,7 +4,7 @@
 #include "Framework/EventSetup.h"
 #include "Framework/ESPluginFactory.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 #include <fstream>
 #include <memory>
@@ -33,19 +33,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     Queue queue(devices[0]);
 
     auto cablingMap_h{cms::alpakatools::createHostView<SiPixelFedCablingMapGPU>(&obj, 1u)};
-    auto cablingMap_d{cms::alpakatools::allocDeviceBuf<SiPixelFedCablingMapGPU>(alpaka::getDev(queue), 1u)};
-    alpaka::prepareForAsyncCopy(cablingMap_d);
-    alpaka::memcpy(queue, cablingMap_d, cablingMap_h, 1u);
+    auto cablingMap_d{cms::alpakatools::make_device_unique<SiPixelFedCablingMapGPU>(1u)};
+    auto cablingMap_d_view =
+        cms::alpakatools::createDeviceView<SiPixelFedCablingMapGPU>(alpaka::getDev(queue), cablingMap_d.get(), 1u);
+    alpaka::memcpy(queue, cablingMap_d_view, cablingMap_h, 1u);
     eventSetup.put(std::make_unique<SiPixelFedCablingMapGPUWrapper>(std::move(cablingMap_d), true));
 
     auto modToUnp_h{cms::alpakatools::createHostView<unsigned char>(modToUnpDefault.data(), modToUnpDefSize)};
-    auto modToUnp_d{cms::alpakatools::allocDeviceBuf<unsigned char>(alpaka::getDev(queue), modToUnpDefSize)};
-    alpaka::prepareForAsyncCopy(modToUnp_d);
-    alpaka::memcpy(queue, modToUnp_d, modToUnp_h, modToUnpDefSize);
+    auto modToUnp_d{cms::alpakatools::make_device_unique<unsigned char>(modToUnpDefSize)};
+    auto modToUnp_d_view =
+        cms::alpakatools::createDeviceView<unsigned char>(alpaka::getDev(queue), modToUnp_d.get(), modToUnpDefSize);
+    alpaka::memcpy(queue, modToUnp_d_view, modToUnp_h, modToUnpDefSize);
 
     alpaka::wait(queue);
 
-    eventSetup.put(std::make_unique<AlpakaDeviceBuf<unsigned char>>(std::move(modToUnp_d)));
+    eventSetup.put(std::make_unique<cms::alpakatools::device::unique_ptr<unsigned char>>(std::move(modToUnp_d)));
   }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelGainCalibrationForHLTESProducer.cc b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelGainCalibrationForHLTESProducer.cc
index dd12bca26..31cd934c1 100644
--- a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelGainCalibrationForHLTESProducer.cc
+++ b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelGainCalibrationForHLTESProducer.cc
@@ -3,7 +3,7 @@
 #include "Framework/EventSetup.h"
 #include "Framework/ESPluginFactory.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 #include <fstream>
 #include <memory>
@@ -48,22 +48,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     const uint32_t numDecodingStructures = gainData.size() / sizeof(SiPixelGainForHLTonGPU_DecodingStructure);
     auto ped_h{cms::alpakatools::createHostView<SiPixelGainForHLTonGPU::DecodingStructure>(
         reinterpret_cast<SiPixelGainForHLTonGPU::DecodingStructure*>(gainData.data()), numDecodingStructures)};
-    auto ped_d{cms::alpakatools::allocDeviceBuf<SiPixelGainForHLTonGPU::DecodingStructure>(alpaka::getDev(queue),
-                                                                                           numDecodingStructures)};
-    alpaka::prepareForAsyncCopy(ped_d);
-    alpaka::memcpy(queue, ped_d, ped_h, numDecodingStructures);
+    auto ped_d{cms::alpakatools::make_device_unique<SiPixelGainForHLTonGPU::DecodingStructure>(numDecodingStructures)};
+    auto ped_d_view = cms::alpakatools::createDeviceView<SiPixelGainForHLTonGPU::DecodingStructure>(
+        alpaka::getDev(queue), ped_d.get(), numDecodingStructures);
+    alpaka::memcpy(queue, ped_d_view, ped_h, numDecodingStructures);
 
     auto rangeAndCols_h{
         cms::alpakatools::createHostView<SiPixelGainForHLTonGPU::RangeAndCols>(gain.rangeAndCols, 2000u)};
-    auto rangeAndCols_d{
-        cms::alpakatools::allocDeviceBuf<SiPixelGainForHLTonGPU::RangeAndCols>(alpaka::getDev(queue), 2000u)};
-    alpaka::prepareForAsyncCopy(rangeAndCols_d);
-    alpaka::memcpy(queue, rangeAndCols_d, rangeAndCols_h, 2000u);
+    auto rangeAndCols_d{cms::alpakatools::make_device_unique<SiPixelGainForHLTonGPU::RangeAndCols>(2000u)};
+    auto rangeAndCols_d_view = cms::alpakatools::createDeviceView<SiPixelGainForHLTonGPU::RangeAndCols>(
+        alpaka::getDev(queue), rangeAndCols_d.get(), 2000u);
+    alpaka::memcpy(queue, rangeAndCols_d_view, rangeAndCols_h, 2000u);
 
     auto fields_h{cms::alpakatools::createHostView<SiPixelGainForHLTonGPU::Fields>(&gain.fields_, 1u)};
-    auto fields_d{cms::alpakatools::allocDeviceBuf<SiPixelGainForHLTonGPU::Fields>(alpaka::getDev(queue), 1u)};
-    alpaka::prepareForAsyncCopy(fields_d);
-    alpaka::memcpy(queue, fields_d, fields_h, 1u);
+    auto fields_d{cms::alpakatools::make_device_unique<SiPixelGainForHLTonGPU::Fields>(1u)};
+    auto fields_d_view =
+        cms::alpakatools::createDeviceView<SiPixelGainForHLTonGPU::Fields>(alpaka::getDev(queue), fields_d.get(), 1u);
+    alpaka::memcpy(queue, fields_d_view, fields_h, 1u);
 
     alpaka::wait(queue);
 
diff --git a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToCluster.cc b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToCluster.cc
index b6fb2c74f..b8ec96847 100644
--- a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToCluster.cc
+++ b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToCluster.cc
@@ -17,7 +17,7 @@
 #include "../ErrorChecker.h"
 #include "SiPixelRawToClusterGPUKernel.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 #include <memory>
 #include <string>
@@ -71,7 +71,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     }
     // get the GPU product already here so that the async transfer can begin
     const auto* gpuMap = hgpuMap.cablingMap();
-    const unsigned char* gpuModulesToUnpack = alpaka::getPtrNative(iSetup.get<AlpakaDeviceBuf<unsigned char>>());
+    const unsigned char* gpuModulesToUnpack = iSetup.get<::cms::alpakatools::device::unique_ptr<unsigned char>>().get();
     const auto* gpuGains = &(iSetup.get<SiPixelGainForHLTonGPU>());
 
     auto const& fedIds_ = iSetup.get<SiPixelFedIds>().fedIds();
diff --git a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToClusterGPUKernel.cc b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToClusterGPUKernel.cc
index 36b6b5884..f2623873d 100644
--- a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToClusterGPUKernel.cc
+++ b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToClusterGPUKernel.cc
@@ -18,6 +18,8 @@
 #include <string>
 
 // Alpaka includes
+#include "AlpakaCore/host_unique_ptr.h"
+#include "AlpakaCore/device_unique_ptr.h"
 #include "AlpakaCore/prefixScan.h"
 
 // CMSSW includes
@@ -32,15 +34,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
   namespace pixelgpudetails {
 
     SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender()
-        : word_{::cms::alpakatools::allocHostBuf<unsigned int>(MAX_FED_WORDS)},
-          fedId_{::cms::alpakatools::allocHostBuf<unsigned char>(MAX_FED_WORDS)} {}
+        : word_{cms::alpakatools::make_host_unique<unsigned int>(MAX_FED_WORDS)},
+          fedId_{cms::alpakatools::make_host_unique<unsigned char>(MAX_FED_WORDS)} {}
 
     void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId,
                                                                           unsigned int wordCounterGPU,
                                                                           const uint32_t *src,
                                                                           unsigned int length) {
-      std::memcpy(alpaka::getPtrNative(word_) + wordCounterGPU, src, sizeof(uint32_t) * length);
-      std::memset(alpaka::getPtrNative(fedId_) + wordCounterGPU / 2, fedId - 1200, length / 2);
+      std::memcpy(word() + wordCounterGPU, src, sizeof(uint32_t) * length);
+      std::memset(this->fedId() + wordCounterGPU / 2, fedId - 1200, length / 2);
     }
 
     ////////////////////
@@ -563,12 +565,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       std::cout << "decoding " << wordCounter << " digis. Max is " << pixelgpudetails::MAX_FED_WORDS << std::endl;
 #endif
 
-      digis_d = SiPixelDigisAlpaka(alpaka::getDev(queue), pixelgpudetails::MAX_FED_WORDS);
+      digis_d = SiPixelDigisAlpaka(pixelgpudetails::MAX_FED_WORDS);
       if (includeErrors) {
-        digiErrors_d =
-            SiPixelDigiErrorsAlpaka(alpaka::getDev(queue), pixelgpudetails::MAX_FED_WORDS, std::move(errors), queue);
+        digiErrors_d = SiPixelDigiErrorsAlpaka(pixelgpudetails::MAX_FED_WORDS, std::move(errors), queue);
       }
-      clusters_d = SiPixelClustersAlpaka(alpaka::getDev(queue), gpuClustering::MaxNumModules);
+      clusters_d = SiPixelClustersAlpaka(gpuClustering::MaxNumModules);
 
       if (wordCounter)  // protect in case of empty event....
       {
@@ -585,14 +586,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
         ALPAKA_ASSERT_OFFLOAD(0 == wordCounter % 2);
         // wordCounter is the total no of words in each event to be trasfered on device
-        auto word_d = cms::alpakatools::allocDeviceBuf<uint32_t>(alpaka::getDev(queue), wordCounter);
+        auto word_d = cms::alpakatools::make_device_unique<uint32_t>(wordCounter);
         // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated.
         // However, only the first half of elements end up eventually used:
         // hence, here, only wordCounter/2 elements are allocated.
-        auto fedId_d = cms::alpakatools::allocDeviceBuf<uint8_t>(alpaka::getDev(queue), wordCounter / 2);
+        auto fedId_d = cms::alpakatools::make_device_unique<uint8_t>(wordCounter / 2);
 
-        alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter);
-        alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2);
+        auto word_d_view =
+            cms::alpakatools::createDeviceView<uint32_t>(alpaka::getDev(queue), word_d.get(), wordCounter);
+        auto fedId_d_view =
+            cms::alpakatools::createDeviceView<uint8_t>(alpaka::getDev(queue), fedId_d.get(), wordCounter / 2);
+        auto word_view = cms::alpakatools::createHostView<unsigned int>(wordFed.word(), MAX_FED_WORDS);
+        auto fedId_view = cms::alpakatools::createHostView<unsigned char>(wordFed.fedId(), MAX_FED_WORDS);
+        alpaka::memcpy(queue, word_d_view, word_view, wordCounter);
+        alpaka::memcpy(queue, fedId_d_view, fedId_view, wordCounter / 2);
 
         // Launch rawToDigi kernel
         alpaka::enqueue(queue,
@@ -601,8 +608,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                         cablingMap,
                                                         modToUnp,
                                                         wordCounter,
-                                                        alpaka::getPtrNative(word_d),
-                                                        alpaka::getPtrNative(fedId_d),
+                                                        word_d.get(),
+                                                        fedId_d.get(),
                                                         digis_d->xx(),
                                                         digis_d->yy(),
                                                         digis_d->adc(),
@@ -675,7 +682,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
         auto moduleStartFirstElement =
             cms::alpakatools::createDeviceView<uint32_t>(alpaka::getDev(queue), clusters_d->moduleStart(), 1u);
 
-        alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement, 1u);
+        auto nModules_Clusters_h_view = cms::alpakatools::createHostView<uint32_t>(nModules_Clusters_h.get(), 2u);
+        alpaka::memcpy(queue, nModules_Clusters_h_view, moduleStartFirstElement, 1u);
 
         const WorkDiv1D &workDivMaxNumModules =
             ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::make_workdiv(Vec1D::all(MaxNumModules), Vec1D::all(256));
@@ -737,13 +745,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
         const auto clusModuleStartLastElement =
             AlpakaDeviceSubView<uint32_t>(clusModuleStartView, 1u, gpuClustering::MaxNumModules);
         // slice on host
-        auto nModules_Clusters_1_h{::cms::alpakatools::allocHostBuf<uint32_t>(1u)};
-        auto p_nModules_Clusters_1_h = alpaka::getPtrNative(nModules_Clusters_1_h);
+        auto nModules_Clusters_1_h{cms::alpakatools::make_host_unique<uint32_t>(1u)};
+        auto p_nModules_Clusters_1_h = nModules_Clusters_1_h.get();
+        auto nModules_Clusters_1_h_view = cms::alpakatools::createHostView<uint32_t>(p_nModules_Clusters_1_h, 1u);
 
-        alpaka::memcpy(queue, nModules_Clusters_1_h, clusModuleStartLastElement, 1u);
+        alpaka::memcpy(queue, nModules_Clusters_1_h_view, clusModuleStartLastElement, 1u);
         // Wait for memory transfer to host to complete before looking at host data!
         alpaka::wait(queue);
-        auto p_nModules_Clusters_h = alpaka::getPtrNative(nModules_Clusters_h);
+        auto p_nModules_Clusters_h = nModules_Clusters_h.get();
         p_nModules_Clusters_h[1] = p_nModules_Clusters_1_h[0];
       }  // end clusterizer scope
     }
diff --git a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToClusterGPUKernel.h b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToClusterGPUKernel.h
index 04130fe91..ff4faede9 100644
--- a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToClusterGPUKernel.h
+++ b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToClusterGPUKernel.h
@@ -7,6 +7,8 @@
 #include "AlpakaDataFormats/SiPixelClustersAlpaka.h"
 #include "AlpakaDataFormats/SiPixelDigiErrorsAlpaka.h"
 #include "AlpakaDataFormats/SiPixelDigisAlpaka.h"
+#include "AlpakaCore/host_unique_ptr.h"
+
 #include "AlpakaDataFormats/gpuClusteringConstants.h"
 #include "CondFormats/SiPixelFedCablingMapGPU.h"
 #include "CondFormats/SiPixelGainForHLTonGPU.h"
@@ -158,16 +160,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
         void initializeWordFed(int fedId, unsigned int wordCounterGPU, const uint32_t* src, unsigned int length);
 
-        auto word() const { return word_; }
-        auto fedId() const { return fedId_; }
+        auto word() const { return word_.get(); }
+        auto fedId() const { return fedId_.get(); }
 
       private:
-        AlpakaHostBuf<unsigned int> word_;
-        AlpakaHostBuf<unsigned char> fedId_;
+        ::cms::alpakatools::host::unique_ptr<unsigned int> word_;
+        ::cms::alpakatools::host::unique_ptr<unsigned char> fedId_;
       };
 
-      SiPixelRawToClusterGPUKernel() : nModules_Clusters_h{::cms::alpakatools::allocHostBuf<uint32_t>(2u)} {}
-
+      SiPixelRawToClusterGPUKernel() : nModules_Clusters_h{::cms::alpakatools::make_host_unique<uint32_t>(2u)} {};
+      // digis_d{SiPixelDigisAlpaka(0u)},
+      // clusters_d{SiPixelClustersAlpaka(0u)},
+      // digiErrors_d{SiPixelDigiErrorsAlpaka(0u, PixelFormatterErrors())} {};
       ~SiPixelRawToClusterGPUKernel() = default;
 
       SiPixelRawToClusterGPUKernel(const SiPixelRawToClusterGPUKernel&) = delete;
@@ -189,7 +193,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                              Queue& queue);
 
       std::pair<SiPixelDigisAlpaka, SiPixelClustersAlpaka> getResults() {
-        auto pnModules_Clusters_h = alpaka::getPtrNative(nModules_Clusters_h);
+        auto pnModules_Clusters_h = nModules_Clusters_h.get();
         digis_d->setNModulesDigis(pnModules_Clusters_h[0], nDigis);
         clusters_d->setNClusters(pnModules_Clusters_h[1]);
         return std::make_pair(std::move(*digis_d), std::move(*clusters_d));
@@ -201,7 +205,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       uint32_t nDigis = 0;
 
       // Data to be put in the event
-      AlpakaHostBuf<uint32_t> nModules_Clusters_h;
+      ::cms::alpakatools::host::unique_ptr<uint32_t> nModules_Clusters_h;
       std::optional<SiPixelDigisAlpaka> digis_d;
       std::optional<SiPixelClustersAlpaka> clusters_d;
       std::optional<SiPixelDigiErrorsAlpaka> digiErrors_d;
diff --git a/src/alpaka/plugin-SiPixelRecHits/alpaka/PixelCPEFastESProducer.cc b/src/alpaka/plugin-SiPixelRecHits/alpaka/PixelCPEFastESProducer.cc
index 67d407eae..0b620a6a9 100644
--- a/src/alpaka/plugin-SiPixelRecHits/alpaka/PixelCPEFastESProducer.cc
+++ b/src/alpaka/plugin-SiPixelRecHits/alpaka/PixelCPEFastESProducer.cc
@@ -3,7 +3,7 @@
 #include "Framework/EventSetup.h"
 #include "Framework/ESPluginFactory.h"
 
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/device_unique_ptr.h"
 
 #include <fstream>
 #include <iostream>
@@ -23,15 +23,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     std::ifstream in((data_ + "/cpefast.bin").c_str(), std::ios::binary);
     in.exceptions(std::ifstream::badbit | std::ifstream::failbit | std::ifstream::eofbit);
 
-    // TODO FIXME use the correct device
-    Queue queue(devices[0]);
+    Queue queue(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0]);
 
     pixelCPEforGPU::CommonParams commonParams;
     in.read(reinterpret_cast<char *>(&commonParams), sizeof(pixelCPEforGPU::CommonParams));
     auto commonParams_h{cms::alpakatools::createHostView<pixelCPEforGPU::CommonParams>(&commonParams, 1u)};
-    auto commonParams_d{cms::alpakatools::allocDeviceBuf<pixelCPEforGPU::CommonParams>(alpaka::getDev(queue), 1u)};
-    alpaka::prepareForAsyncCopy(commonParams_d);
-    alpaka::memcpy(queue, commonParams_d, commonParams_h, 1u);
+    auto commonParams_d{cms::alpakatools::make_device_unique<pixelCPEforGPU::CommonParams>(1u)};
+    auto commonParams_d_view{cms::alpakatools::createDeviceView<pixelCPEforGPU::CommonParams>(
+        alpaka::getDev(queue), commonParams_d.get(), 1u)};
+    alpaka::memcpy(queue, commonParams_d_view, commonParams_h, 1u);
 
     unsigned int ndetParams;
     in.read(reinterpret_cast<char *>(&ndetParams), sizeof(unsigned int));
@@ -40,34 +40,37 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     std::vector<pixelCPEforGPU::DetParams> detParams(ndetParams);
     in.read(reinterpret_cast<char *>(detParams.data()), ndetParams * sizeof(pixelCPEforGPU::DetParams));
     auto detParams_h{cms::alpakatools::createHostView<pixelCPEforGPU::DetParams>(detParams.data(), ndetParams)};
-    auto detParams_d{cms::alpakatools::allocDeviceBuf<pixelCPEforGPU::DetParams>(alpaka::getDev(queue), ndetParams)};
-    alpaka::prepareForAsyncCopy(detParams_d);
-    alpaka::memcpy(queue, detParams_d, detParams_h, ndetParams);
+    auto detParams_d{cms::alpakatools::make_device_unique<pixelCPEforGPU::DetParams>(ndetParams)};
+    auto detParams_d_view{cms::alpakatools::createDeviceView<pixelCPEforGPU::DetParams>(
+        alpaka::getDev(queue), detParams_d.get(), ndetParams)};
+    alpaka::memcpy(queue, detParams_d_view, detParams_h, ndetParams);
 
     pixelCPEforGPU::AverageGeometry averageGeometry;
     in.read(reinterpret_cast<char *>(&averageGeometry), sizeof(pixelCPEforGPU::AverageGeometry));
     auto averageGeometry_h{cms::alpakatools::createHostView<pixelCPEforGPU::AverageGeometry>(&averageGeometry, 1u)};
-    auto averageGeometry_d{
-        cms::alpakatools::allocDeviceBuf<pixelCPEforGPU::AverageGeometry>(alpaka::getDev(queue), 1u)};
-    alpaka::prepareForAsyncCopy(averageGeometry_d);
-    alpaka::memcpy(queue, averageGeometry_d, averageGeometry_h, 1u);
+    auto averageGeometry_d{cms::alpakatools::make_device_unique<pixelCPEforGPU::AverageGeometry>(1u)};
+    auto averageGeometry_d_view{cms::alpakatools::createDeviceView<pixelCPEforGPU::AverageGeometry>(
+        alpaka::getDev(queue), averageGeometry_d.get(), 1u)};
+    alpaka::memcpy(queue, averageGeometry_d_view, averageGeometry_h, 1u);
 
     pixelCPEforGPU::LayerGeometry layerGeometry;
     in.read(reinterpret_cast<char *>(&layerGeometry), sizeof(pixelCPEforGPU::LayerGeometry));
     auto layerGeometry_h{cms::alpakatools::createHostView<pixelCPEforGPU::LayerGeometry>(&layerGeometry, 1u)};
-    auto layerGeometry_d{cms::alpakatools::allocDeviceBuf<pixelCPEforGPU::LayerGeometry>(alpaka::getDev(queue), 1u)};
-    alpaka::prepareForAsyncCopy(layerGeometry_d);
-    alpaka::memcpy(queue, layerGeometry_d, layerGeometry_h, 1u);
+    auto layerGeometry_d{cms::alpakatools::make_device_unique<pixelCPEforGPU::LayerGeometry>(1u)};
+    auto layerGeometry_d_view{cms::alpakatools::createDeviceView<pixelCPEforGPU::LayerGeometry>(
+        alpaka::getDev(queue), layerGeometry_d.get(), 1u)};
+    alpaka::memcpy(queue, layerGeometry_d_view, layerGeometry_h, 1u);
 
     pixelCPEforGPU::ParamsOnGPU params;
-    params.m_commonParams = alpaka::getPtrNative(commonParams_d);
-    params.m_detParams = alpaka::getPtrNative(detParams_d);
-    params.m_layerGeometry = alpaka::getPtrNative(layerGeometry_d);
-    params.m_averageGeometry = alpaka::getPtrNative(averageGeometry_d);
+    params.m_commonParams = commonParams_d.get();
+    params.m_detParams = detParams_d.get();
+    params.m_layerGeometry = layerGeometry_d.get();
+    params.m_averageGeometry = averageGeometry_d.get();
     auto params_h{cms::alpakatools::createHostView<pixelCPEforGPU::ParamsOnGPU>(&params, 1u)};
-    auto params_d{cms::alpakatools::allocDeviceBuf<pixelCPEforGPU::ParamsOnGPU>(alpaka::getDev(queue), 1u)};
-    alpaka::prepareForAsyncCopy(params_d);
-    alpaka::memcpy(queue, params_d, params_h, 1u);
+    auto params_d{cms::alpakatools::make_device_unique<pixelCPEforGPU::ParamsOnGPU>(1u)};
+    auto params_d_view{
+        cms::alpakatools::createDeviceView<pixelCPEforGPU::ParamsOnGPU>(alpaka::getDev(queue), params_d.get(), 1u)};
+    alpaka::memcpy(queue, params_d_view, params_h, 1u);
 
     alpaka::wait(queue);
 
diff --git a/src/alpaka/plugin-Validation/alpaka/CountValidator.cc b/src/alpaka/plugin-Validation/alpaka/CountValidator.cc
index 7d715061f..c0be191c3 100644
--- a/src/alpaka/plugin-Validation/alpaka/CountValidator.cc
+++ b/src/alpaka/plugin-Validation/alpaka/CountValidator.cc
@@ -92,8 +92,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     {
       auto const& count = iEvent.get(trackCountToken_);
-      auto const& tracksBuf = iEvent.get(trackToken_);
-      auto const tracks = alpaka::getPtrNative(tracksBuf);
+      auto const& tracksPtr = iEvent.get(trackToken_);
+      auto const tracks = tracksPtr.get();
 
       int nTracks = 0;
       for (int i = 0; i < tracks->stride(); ++i) {
@@ -116,8 +116,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     {
       auto const& count = iEvent.get(vertexCountToken_);
-      auto const& verticesBuf = iEvent.get(vertexToken_);
-      auto const vertices = alpaka::getPtrNative(verticesBuf);
+      auto const& verticesPtr = iEvent.get(vertexToken_);
+      auto const vertices = verticesPtr.get();
 
       auto diff = std::abs(int(vertices->nvFinal) - int(count.nVertices()));
       if (diff != 0) {
diff --git a/src/alpaka/plugin-Validation/alpaka/HistoValidator.cc b/src/alpaka/plugin-Validation/alpaka/HistoValidator.cc
index 29ef61b38..2899e77bd 100644
--- a/src/alpaka/plugin-Validation/alpaka/HistoValidator.cc
+++ b/src/alpaka/plugin-Validation/alpaka/HistoValidator.cc
@@ -1,4 +1,4 @@
-#include "AlpakaCore/alpakaCommon.h"
+#include "AlpakaCore/host_unique_ptr.h"
 #include "AlpakaDataFormats/gpuClusteringConstants.h"
 #include "AlpakaDataFormats/PixelTrackAlpaka.h"
 #include "AlpakaDataFormats/SiPixelClustersAlpaka.h"
@@ -120,39 +120,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     auto const nHits = hits.nHits();
 
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-    // TODO FIXME use the correct device
-    Queue queue(devices[0]);
-    auto const h_adcBuf = digis.adcToHostAsync(queue);
-    auto const h_adc = alpaka::getPtrNative(h_adcBuf);
+    Queue queue(ALPAKA_ACCELERATOR_NAMESPACE::devices[0]);
+    auto const h_adcPtr = digis.adcToHostAsync(queue);
+    auto const h_adc = h_adcPtr.get();
 
     auto const d_clusInModuleView = cms::alpakatools::createDeviceView<uint32_t>(
         alpaka::getDev(queue), clusters.clusInModule(), gpuClustering::MaxNumModules);
-    auto h_clusInModuleBuf{::cms::alpakatools::allocHostBuf<uint32_t>(gpuClustering::MaxNumModules)};
-    alpaka::memcpy(queue, h_clusInModuleBuf, d_clusInModuleView, gpuClustering::MaxNumModules);
-    auto h_clusInModule = alpaka::getPtrNative(h_clusInModuleBuf);
-
-    auto const h_lxBuf = hits.xlToHostAsync(queue);
-    auto const h_lx = alpaka::getPtrNative(h_lxBuf);
-    auto const h_lyBuf = hits.ylToHostAsync(queue);
-    auto const h_ly = alpaka::getPtrNative(h_lyBuf);
-    auto const h_lexBuf = hits.xerrToHostAsync(queue);
-    auto const h_lex = alpaka::getPtrNative(h_lexBuf);
-    auto const h_leyBuf = hits.yerrToHostAsync(queue);
-    auto const h_ley = alpaka::getPtrNative(h_leyBuf);
-    auto const h_gxBuf = hits.xgToHostAsync(queue);
-    auto const h_gx = alpaka::getPtrNative(h_gxBuf);
-    auto const h_gyBuf = hits.ygToHostAsync(queue);
-    auto const h_gy = alpaka::getPtrNative(h_gyBuf);
-    auto const h_gzBuf = hits.zgToHostAsync(queue);
-    auto const h_gz = alpaka::getPtrNative(h_gzBuf);
-    auto const h_grBuf = hits.rgToHostAsync(queue);
-    auto const h_gr = alpaka::getPtrNative(h_grBuf);
-    auto const h_chargeBuf = hits.chargeToHostAsync(queue);
-    auto const h_charge = alpaka::getPtrNative(h_chargeBuf);
-    auto const h_sizexBuf = hits.xsizeToHostAsync(queue);
-    auto const h_sizex = alpaka::getPtrNative(h_sizexBuf);
-    auto const h_sizeyBuf = hits.ysizeToHostAsync(queue);
-    auto const h_sizey = alpaka::getPtrNative(h_sizeyBuf);
+    auto h_clusInModulePtr{cms::alpakatools::make_host_unique<uint32_t>(gpuClustering::MaxNumModules)};
+    auto h_clusInModule{h_clusInModulePtr.get()};
+    auto h_clusInModuleView{cms::alpakatools::createHostView<uint32_t>(h_clusInModule, gpuClustering::MaxNumModules)};
+    alpaka::memcpy(queue, h_clusInModuleView, d_clusInModuleView, gpuClustering::MaxNumModules);
+
+    auto const h_lxPtr = hits.xlToHostAsync(queue);
+    auto const h_lx = h_lxPtr.get();
+    auto const h_lyPtr = hits.ylToHostAsync(queue);
+    auto const h_ly = h_lyPtr.get();
+    auto const h_lexPtr = hits.xerrToHostAsync(queue);
+    auto const h_lex = h_lexPtr.get();
+    auto const h_leyPtr = hits.yerrToHostAsync(queue);
+    auto const h_ley = h_leyPtr.get();
+    auto const h_gxPtr = hits.xgToHostAsync(queue);
+    auto const h_gx = h_gxPtr.get();
+    auto const h_gyPtr = hits.ygToHostAsync(queue);
+    auto const h_gy = h_gyPtr.get();
+    auto const h_gzPtr = hits.zgToHostAsync(queue);
+    auto const h_gz = h_gzPtr.get();
+    auto const h_grPtr = hits.rgToHostAsync(queue);
+    auto const h_gr = h_grPtr.get();
+    auto const h_chargePtr = hits.chargeToHostAsync(queue);
+    auto const h_charge = h_chargePtr.get();
+    auto const h_sizexPtr = hits.xsizeToHostAsync(queue);
+    auto const h_sizex = h_sizexPtr.get();
+    auto const h_sizeyPtr = hits.ysizeToHostAsync(queue);
+    auto const h_sizey = h_sizeyPtr.get();
 
     alpaka::wait(queue);
 #else
@@ -200,8 +200,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     }
 
     {
-      auto const& tracksBuf = iEvent.get(trackToken_);
-      auto const tracks = alpaka::getPtrNative(tracksBuf);
+      auto const& tracksPtr = iEvent.get(trackToken_);
+      auto const tracks = tracksPtr.get();
 
       int nTracks = 0;
       for (int i = 0; i < tracks->stride(); ++i) {
@@ -224,8 +224,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     }
 
     {
-      auto const& verticesBuf = iEvent.get(vertexToken_);
-      auto const vertices = alpaka::getPtrNative(verticesBuf);
+      auto const& verticesPtr = iEvent.get(vertexToken_);
+      auto const vertices = verticesPtr.get();
 
       histos["vertex_n"].fill(vertices->nvFinal);
       for (uint32_t i = 0; i < vertices->nvFinal; ++i) {