diff --git a/src/alpaka/AlpakaCore/ContextState.h b/src/alpaka/AlpakaCore/ContextState.h
index 1263f4f0e..a1efa8913 100644
--- a/src/alpaka/AlpakaCore/ContextState.h
+++ b/src/alpaka/AlpakaCore/ContextState.h
@@ -4,7 +4,6 @@
 #include <memory>
 
 #include "AlpakaCore/alpakaConfig.h"
-#include "AlpakaCore/SharedStreamPtr.h"
 
 namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
 
@@ -15,6 +14,9 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
      */
   class ContextState {
   public:
+    using Queue = ::ALPAKA_ACCELERATOR_NAMESPACE::Queue;
+    using Device = alpaka::Dev<Queue>;
+
     ContextState() = default;
     ~ContextState() = default;
 
@@ -28,24 +30,23 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
     friend class ScopedContextProduce;
     friend class ScopedContextTask;
 
-    void set(int device, SharedStreamPtr stream) {
+    void set(std::shared_ptr<Queue> stream) {
       throwIfStream();
-      device_ = device;
       stream_ = std::move(stream);
     }
 
-    int device() const { return device_; }
+    Device device() const { return alpaka::getDev(*stream_); }
 
-    const SharedStreamPtr& streamPtr() const {
+    const std::shared_ptr<Queue>& streamPtr() const {
       throwIfNoStream();
       return stream_;
     }
 
-    SharedStreamPtr releaseStreamPtr() {
+    std::shared_ptr<Queue> releaseStreamPtr() {
       throwIfNoStream();
       // This function needs to effectively reset stream_ (i.e. stream_
       // must be empty after this function). This behavior ensures that
-      // the SharedStreamPtr is not hold for inadvertedly long (i.e. to
+      // the std::shared_ptr<Queue> is not hold for inadvertedly long (i.e. to
       // the next event), and is checked at run time.
       return std::move(stream_);
     }
@@ -53,8 +54,7 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
     void throwIfStream() const;
     void throwIfNoStream() const;
 
-    SharedStreamPtr stream_;
-    int device_;
+    std::shared_ptr<Queue> stream_;
   };
 
 }  // namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/AlpakaCore/ESProduct.h b/src/alpaka/AlpakaCore/ESProduct.h
index f6a134e11..3dfb1a682 100644
--- a/src/alpaka/AlpakaCore/ESProduct.h
+++ b/src/alpaka/AlpakaCore/ESProduct.h
@@ -8,7 +8,6 @@
 
 #include "AlpakaCore/alpakaConfig.h"
 #include "AlpakaCore/EventCache.h"
-#include "AlpakaCore/SharedEventPtr.h"
 #include "AlpakaCore/currentDevice.h"
 #include "AlpakaCore/deviceCount.h"
 #include "AlpakaCore/eventWorkHasCompleted.h"
@@ -18,11 +17,12 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
   template <typename T>
   class ESProduct {
   public:
+    using Event = ::ALPAKA_ACCELERATOR_NAMESPACE::Event;
+
     template <typename T_Acc>
     ESProduct(T_Acc acc) : gpuDataPerDevice_(::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::deviceCount()) {
       for (size_t i = 0; i < gpuDataPerDevice_.size(); ++i) {
-        gpuDataPerDevice_[i].m_event =
-            ::cms::alpakatools::getEventCache<::ALPAKA_ACCELERATOR_NAMESPACE::Event>().get(acc);
+        gpuDataPerDevice_[i].m_event = ::cms::alpakatools::getEventCache<Event>().get(acc);
       }
     }
 
@@ -92,7 +92,7 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
   private:
     struct Item {
       mutable std::mutex m_mutex;
-      mutable SharedEventPtr m_event;  // guarded by m_mutex
+      mutable std::shared_ptr<Event> m_event;  // guarded by m_mutex
       // non-null if some thread is already filling (cudaStream_t is just a pointer)
       mutable ::ALPAKA_ACCELERATOR_NAMESPACE::Queue* m_fillingStream = nullptr;  // guarded by m_mutex
       mutable std::atomic<bool> m_filled = false;  // easy check if data has been filled already or not
diff --git a/src/alpaka/AlpakaCore/Product.h b/src/alpaka/AlpakaCore/Product.h
index 6a5e0f5f6..a14a2fad6 100644
--- a/src/alpaka/AlpakaCore/Product.h
+++ b/src/alpaka/AlpakaCore/Product.h
@@ -46,12 +46,12 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
     friend class ScopedContextProduce;
     friend class edm::Wrapper<Product<T>>;
 
-    explicit Product(int device, SharedStreamPtr stream, SharedEventPtr event, T data)
-        : ProductBase(device, std::move(stream), std::move(event)), data_(std::move(data)) {}
+    explicit Product(std::shared_ptr<Queue> stream, std::shared_ptr<Event> event, T data)
+        : ProductBase(std::move(stream), std::move(event)), data_(std::move(data)) {}
 
     template <typename... Args>
-    explicit Product(int device, SharedStreamPtr stream, SharedEventPtr event, Args&&... args)
-        : ProductBase(device, std::move(stream), std::move(event)), data_(std::forward<Args>(args)...) {}
+    explicit Product(std::shared_ptr<Queue> stream, std::shared_ptr<Event> event, Args&&... args)
+        : ProductBase(std::move(stream), std::move(event)), data_(std::forward<Args>(args)...) {}
 
     T data_;  //!
   };
diff --git a/src/alpaka/AlpakaCore/ProductBase.h b/src/alpaka/AlpakaCore/ProductBase.h
index 3d1f09cf0..63b071a3a 100644
--- a/src/alpaka/AlpakaCore/ProductBase.h
+++ b/src/alpaka/AlpakaCore/ProductBase.h
@@ -4,8 +4,9 @@
 #include <atomic>
 #include <memory>
 
-#include "AlpakaCore/SharedEventPtr.h"
-#include "AlpakaCore/SharedStreamPtr.h"
+#include <alpaka/alpaka.hpp>
+
+#include "AlpakaCore/alpakaConfigAcc.h"
 
 namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
 
@@ -19,6 +20,9 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
      */
   class ProductBase {
   public:
+    using Queue = ::ALPAKA_ACCELERATOR_NAMESPACE::Queue;
+    using Event = alpaka::Event<Queue>;
+
     ProductBase() = default;  // Needed only for ROOT dictionary generation
     ~ProductBase();
 
@@ -27,43 +31,41 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
     ProductBase(ProductBase&& other)
         : stream_{std::move(other.stream_)},
           event_{std::move(other.event_)},
-          mayReuseStream_{other.mayReuseStream_.load()},
-          device_{other.device_} {}
+          mayReuseStream_{other.mayReuseStream_.load()} {}
     ProductBase& operator=(ProductBase&& other) {
       stream_ = std::move(other.stream_);
       event_ = std::move(other.event_);
       mayReuseStream_ = other.mayReuseStream_.load();
-      device_ = other.device_;
       return *this;
     }
 
     bool isValid() const { return stream_.get() != nullptr; }
     bool isAvailable() const;
 
-    int device() const { return device_; }
+    alpaka::Dev<Queue> device() const { return alpaka::getDev(stream()); }
 
     // cudaStream_t is a pointer to a thread-safe object, for which a
     // mutable access is needed even if the ::cms::alpakatools::ScopedContext itself
     // would be const. Therefore it is ok to return a non-const
     // pointer from a const method here.
-    ::ALPAKA_ACCELERATOR_NAMESPACE::Queue& stream() const { return *(stream_.get()); }
+    Queue& stream() const { return *(stream_.get()); }
 
     // cudaEvent_t is a pointer to a thread-safe object, for which a
     // mutable access is needed even if the ::cms::alpakatools::ScopedContext itself
     // would be const. Therefore it is ok to return a non-const
     // pointer from a const method here.
-    alpaka::Event<::ALPAKA_ACCELERATOR_NAMESPACE::Queue>& event() const { return *(event_.get()); }
+    Event& event() const { return *(event_.get()); }
 
   protected:
-    explicit ProductBase(int device, SharedStreamPtr stream, SharedEventPtr event)
-        : stream_{std::move(stream)}, event_{std::move(event)}, device_{device} {}
+    explicit ProductBase(std::shared_ptr<Queue> stream, std::shared_ptr<Event> event)
+        : stream_{std::move(stream)}, event_{std::move(event)} {}
 
   private:
     friend class impl::ScopedContextBase;
     friend class ScopedContextProduce;
 
     // The following function is intended to be used only from ScopedContext
-    const SharedStreamPtr& streamPtr() const { return stream_; }
+    const std::shared_ptr<Queue>& streamPtr() const { return stream_; }
 
     bool mayReuseStream() const {
       bool expected = true;
@@ -75,17 +77,14 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
 
     // The cudaStream_t is really shared among edm::Event products, so
     // using shared_ptr also here
-    SharedStreamPtr stream_;  //!
+    std::shared_ptr<Queue> stream_;  //!
     // shared_ptr because of caching in ::cms::alpakatools::EventCache
-    SharedEventPtr event_;  //!
+    std::shared_ptr<Event> event_;  //!
 
     // This flag tells whether the CUDA stream may be reused by a
     // consumer or not. The goal is to have a "chain" of modules to
     // queue their work to the same stream.
     mutable std::atomic<bool> mayReuseStream_ = true;  //!
-
-    // The CUDA device associated with this product
-    int device_ = -1;  //!
   };
 
 }  // namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/AlpakaCore/ScopedContext.h b/src/alpaka/AlpakaCore/ScopedContext.h
index c20e8dbed..a146422b2 100644
--- a/src/alpaka/AlpakaCore/ScopedContext.h
+++ b/src/alpaka/AlpakaCore/ScopedContext.h
@@ -7,8 +7,6 @@
 #include "AlpakaCore/ContextState.h"
 #include "AlpakaCore/EventCache.h"
 #include "AlpakaCore/Product.h"
-#include "AlpakaCore/SharedEventPtr.h"
-#include "AlpakaCore/SharedStreamPtr.h"
 #include "Framework/EDGetToken.h"
 #include "Framework/EDPutToken.h"
 #include "Framework/Event.h"
@@ -26,14 +24,17 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
     // This class is intended to be derived by other ScopedContext*, not for general use
     class ScopedContextBase {
     public:
-      int device() const { return currentDevice_; }
+      using Queue = ::ALPAKA_ACCELERATOR_NAMESPACE::Queue;
+      using Device = alpaka::Dev<Queue>;
+
+      Device device() const { return alpaka::getDev(*stream_); }
 
       // cudaStream_t is a pointer to a thread-safe object, for which a
       // mutable access is needed even if the ScopedContext itself
       // would be const. Therefore it is ok to return a non-const
       // pointer from a const method here.
-      ::ALPAKA_ACCELERATOR_NAMESPACE::Queue& stream() const { return *(stream_.get()); }
-      const SharedStreamPtr& streamPtr() const { return stream_; }
+      Queue& stream() const { return *(stream_.get()); }
+      const std::shared_ptr<Queue>& streamPtr() const { return stream_; }
 
     protected:
       // The constructors set the current device, but the device
@@ -43,44 +44,24 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
       // the scope where this context is. The current device doesn't
       // really matter between modules (or across TBB tasks).
 
-      template <typename T_Acc>
-      ScopedContextBase(T_Acc acc, const ProductBase& data) : currentDevice_(data.device()) {
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-        cudaSetDevice(currentDevice_);
-#endif
-        if (data.mayReuseStream()) {
-          stream_ = data.streamPtr();
-        } else {
-          stream_ = getStreamCache<::ALPAKA_ACCELERATOR_NAMESPACE::Queue>().get(acc);
-        }
-      }
+      ScopedContextBase(const ProductBase& data)
+          : stream_{data.mayReuseStream() ? data.streamPtr() : getStreamCache<Queue>().get(data.device())} {}
 
-      explicit ScopedContextBase(int device, SharedStreamPtr stream)
-          : currentDevice_(device), stream_(std::move(stream)) {
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-        cudaSetDevice(currentDevice_);
-#endif
-      }
+      explicit ScopedContextBase(std::shared_ptr<Queue> stream) : stream_(std::move(stream)) {}
 
-      template <typename T_Acc>
-      explicit ScopedContextBase(T_Acc acc, edm::StreamID streamID)
-          : currentDevice_(::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::chooseDevice(streamID)) {
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-        cudaSetDevice(currentDevice_);
-#endif
-        stream_ = getStreamCache<::ALPAKA_ACCELERATOR_NAMESPACE::Queue>().get(acc);
-      }
+      explicit ScopedContextBase(edm::StreamID streamID)
+          : stream_{getStreamCache<Queue>().get(
+                ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::chooseDevice(streamID))} {}
 
     private:
-      int currentDevice_;
-      SharedStreamPtr stream_;
+      std::shared_ptr<Queue> stream_;
     };
 
     class ScopedContextGetterBase : public ScopedContextBase {
     public:
       template <typename T>
       const T& get(const Product<T>& data) {
-        synchronizeStreams(data.device(), data.stream(), data.isAvailable(), data.event());
+        synchronizeStreams(data.stream(), data.isAvailable(), data.event());
         return data.data_;
       }
 
@@ -93,10 +74,7 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
       template <typename... Args>
       ScopedContextGetterBase(Args&&... args) : ScopedContextBase(std::forward<Args>(args)...) {}
 
-      void synchronizeStreams(int dataDevice,
-                              ::ALPAKA_ACCELERATOR_NAMESPACE::Queue& dataStream,
-                              bool available,
-                              alpaka::Event<::ALPAKA_ACCELERATOR_NAMESPACE::Queue> dataEvent);
+      void synchronizeStreams(Queue& dataStream, bool available, alpaka::Event<Queue> dataEvent);
     };
 
     class ScopedContextHolderHelper {
@@ -111,7 +89,7 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
         waitingTaskHolder_ = std::move(waitingTaskHolder);
       }
 
-      void enqueueCallback(int device, ::ALPAKA_ACCELERATOR_NAMESPACE::Queue& stream);
+      void enqueueCallback(ScopedContextBase::Queue& stream);
 
     private:
       edm::WaitingTaskWithArenaHolder waitingTaskHolder_;
@@ -128,30 +106,24 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
   class ScopedContextAcquire : public impl::ScopedContextGetterBase {
   public:
     /// Constructor to create a new CUDA stream (no need for context beyond acquire())
-    template <typename T_Acc>
-    explicit ScopedContextAcquire(T_Acc acc, edm::StreamID streamID, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
-        : ScopedContextGetterBase(acc, streamID), holderHelper_{std::move(waitingTaskHolder)} {}
+    explicit ScopedContextAcquire(edm::StreamID streamID, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
+        : ScopedContextGetterBase(streamID), holderHelper_{std::move(waitingTaskHolder)} {}
 
     // /// Constructor to create a new CUDA stream, and the context is needed after acquire()
-    template <typename T_Acc>
-    explicit ScopedContextAcquire(T_Acc acc,
-                                  edm::StreamID streamID,
+    explicit ScopedContextAcquire(edm::StreamID streamID,
                                   edm::WaitingTaskWithArenaHolder waitingTaskHolder,
                                   ContextState& state)
-        : ScopedContextGetterBase(acc, streamID), holderHelper_{std::move(waitingTaskHolder)}, contextState_{&state} {}
+        : ScopedContextGetterBase(streamID), holderHelper_{std::move(waitingTaskHolder)}, contextState_{&state} {}
 
     // /// Constructor to (possibly) re-use a CUDA stream (no need for context beyond acquire())
-    template <typename T_Acc>
-    explicit ScopedContextAcquire(T_Acc acc, const ProductBase& data, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
-        : ScopedContextGetterBase(acc, data), holderHelper_{std::move(waitingTaskHolder)} {}
+    explicit ScopedContextAcquire(const ProductBase& data, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
+        : ScopedContextGetterBase(data), holderHelper_{std::move(waitingTaskHolder)} {}
 
     // /// Constructor to (possibly) re-use a CUDA stream, and the context is needed after acquire()
-    template <typename T_Acc>
-    explicit ScopedContextAcquire(T_Acc acc,
-                                  const ProductBase& data,
+    explicit ScopedContextAcquire(const ProductBase& data,
                                   edm::WaitingTaskWithArenaHolder waitingTaskHolder,
                                   ContextState& state)
-        : ScopedContextGetterBase(acc, data), holderHelper_{std::move(waitingTaskHolder)}, contextState_{&state} {}
+        : ScopedContextGetterBase(data), holderHelper_{std::move(waitingTaskHolder)}, contextState_{&state} {}
 
     ~ScopedContextAcquire();
 
@@ -182,27 +154,24 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
   class ScopedContextProduce : public impl::ScopedContextGetterBase {
   public:
     /// Constructor to re-use the CUDA stream of acquire() (ExternalWork module)
-    explicit ScopedContextProduce(ContextState& state)
-        : ScopedContextGetterBase(state.device(), state.releaseStreamPtr()) {}
+    explicit ScopedContextProduce(ContextState& state) : ScopedContextGetterBase(state.releaseStreamPtr()) {}
 
-    template <typename T_Acc>
-    explicit ScopedContextProduce(T_Acc acc, const ProductBase& data) : ScopedContextGetterBase(acc, data) {}
+    explicit ScopedContextProduce(const ProductBase& data) : ScopedContextGetterBase(data) {}
 
-    template <typename T_Acc>
-    explicit ScopedContextProduce(T_Acc acc, edm::StreamID streamID) : ScopedContextGetterBase(acc, streamID) {}
+    explicit ScopedContextProduce(edm::StreamID streamID) : ScopedContextGetterBase(streamID) {}
 
     /// Record the CUDA event, all asynchronous work must have been queued before the destructor
     ~ScopedContextProduce();
 
-    template <typename T_Acc, typename T>
-    std::unique_ptr<Product<T>> wrap(T_Acc acc, T data) {
+    template <typename T>
+    std::unique_ptr<Product<T>> wrap(T data) {
       // make_unique doesn't work because of private constructor
-      return std::unique_ptr<Product<T>>(new Product<T>(device(), streamPtr(), getEvent(acc), std::move(data)));
+      return std::unique_ptr<Product<T>>(new Product<T>(streamPtr(), std::move(data)));
     }
 
-    template <typename T_Acc, typename T, typename... Args>
-    auto emplace(T_Acc acc, edm::Event& iEvent, edm::EDPutTokenT<T> token, Args&&... args) {
-      // return iEvent.emplace(token, device(), streamPtr(), getEvent(acc), std::forward<Args>(args)...);
+    template <typename T, typename... Args>
+    auto emplace(edm::Event& iEvent, edm::EDPutTokenT<T> token, Args&&... args) {
+      // return iEvent.emplace(token, streamPtr(), getEvent(acc), std::forward<Args>(args)...);
       return iEvent.emplace(token, std::forward<Args>(args)...);
       // TODO
     }
@@ -210,13 +179,9 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
   private:
     friend class ::ALPAKA_ACCELERATOR_NAMESPACE::cms::alpakatest::TestScopedContext;
 
-    explicit ScopedContextProduce(int device, SharedStreamPtr stream)
-        : ScopedContextGetterBase(device, std::move(stream)) {}
+    explicit ScopedContextProduce(std::shared_ptr<Queue> stream) : ScopedContextGetterBase(std::move(stream)) {}
 
-    template <typename T_Acc>
-    auto getEvent(T_Acc acc) {
-      return getEventCache<::ALPAKA_ACCELERATOR_NAMESPACE::Event>().get(acc);
-    }
+    auto getEvent() { return getEventCache<::ALPAKA_ACCELERATOR_NAMESPACE::Event>().get(device()); }
 
     // create the CUDA Event upfront to catch possible errors from its creation
   };
@@ -231,7 +196,7 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
   public:
     /// Constructor to re-use the CUDA stream of acquire() (ExternalWork module)
     explicit ScopedContextTask(ContextState const* state, edm::WaitingTaskWithArenaHolder waitingTaskHolder)
-        : ScopedContextBase(state->device(), state->streamPtr()),  // don't move, state is re-used afterwards
+        : ScopedContextBase(state->streamPtr()),  // don't move, state is re-used afterwards
           holderHelper_{std::move(waitingTaskHolder)},
           contextState_{state} {}
 
@@ -260,8 +225,7 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
   class ScopedContextAnalyze : public impl::ScopedContextGetterBase {
   public:
     /// Constructor to (possibly) re-use a CUDA stream
-    template <typename T_Acc>
-    explicit ScopedContextAnalyze(T_Acc acc, const ProductBase& data) : ScopedContextGetterBase(acc, data) {}
+    explicit ScopedContextAnalyze(const ProductBase& data) : ScopedContextGetterBase(data) {}
   };
 
   namespace impl {
diff --git a/src/alpaka/AlpakaCore/SharedEventPtr.h b/src/alpaka/AlpakaCore/SharedEventPtr.h
deleted file mode 100644
index 3582a928c..000000000
--- a/src/alpaka/AlpakaCore/SharedEventPtr.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef HeterogeneousCore_AlpakaUtilities_SharedEventPtr_h
-#define HeterogeneousCore_AlpakaUtilities_SharedEventPtr_h
-
-#include <memory>
-#include <type_traits>
-
-#include "AlpakaCore/alpakaConfig.h"
-
-namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
-
-  using SharedEventPtr = std::shared_ptr<::ALPAKA_ACCELERATOR_NAMESPACE::Event>;
-
-}  // namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE
-
-#endif
diff --git a/src/alpaka/AlpakaCore/SharedStreamPtr.h b/src/alpaka/AlpakaCore/SharedStreamPtr.h
deleted file mode 100644
index 38909af5a..000000000
--- a/src/alpaka/AlpakaCore/SharedStreamPtr.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef HeterogeneousCore_AlpakaUtilities_SharedStreamPtr_h
-#define HeterogeneousCore_AlpakaUtilities_SharedStreamPtr_h
-
-#include <memory>
-#include <type_traits>
-
-#include "AlpakaCore/alpakaConfig.h"
-
-namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
-
-  using SharedStreamPtr = std::shared_ptr<::ALPAKA_ACCELERATOR_NAMESPACE::Queue>;
-
-}  // namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE
-
-#endif
diff --git a/src/alpaka/AlpakaCore/StreamCache.h b/src/alpaka/AlpakaCore/StreamCache.h
index e24f7dfb9..22b93a312 100644
--- a/src/alpaka/AlpakaCore/StreamCache.h
+++ b/src/alpaka/AlpakaCore/StreamCache.h
@@ -22,7 +22,7 @@ namespace cms::alpakatools {
     // Gets a (cached) CUDA stream for the current device. The stream
     // will be returned to the cache by the shared_ptr destructor.
     // This function is thread safe
-    ALPAKA_FN_HOST std::shared_ptr<Queue> get(Device dev) {
+    ALPAKA_FN_HOST std::shared_ptr<Queue> get(Device const& dev) {
       return cache_[cms::alpakatools::getDevIndex(dev)].makeOrGet([dev]() { return std::make_unique<Queue>(dev); });
     }
 
diff --git a/src/alpaka/AlpakaCore/alpaka/ScopedContext.cc b/src/alpaka/AlpakaCore/alpaka/ScopedContext.cc
index 52967f959..2944d9638 100644
--- a/src/alpaka/AlpakaCore/alpaka/ScopedContext.cc
+++ b/src/alpaka/AlpakaCore/alpaka/ScopedContext.cc
@@ -1,28 +1,21 @@
 #include "AlpakaCore/alpakaConfig.h"
 #include "AlpakaCore/ScopedContext.h"
 
-namespace {
-  struct CallbackData {
-    edm::WaitingTaskWithArenaHolder holder;
-    int device;
-  };
-}  // namespace
-
 namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
 
   namespace impl {
-    void ScopedContextGetterBase::synchronizeStreams(int dataDevice,
-                                                     ::ALPAKA_ACCELERATOR_NAMESPACE::Queue& dataStream,
+    void ScopedContextGetterBase::synchronizeStreams(Queue& dataStream,
                                                      bool available,
-                                                     alpaka::Event<::ALPAKA_ACCELERATOR_NAMESPACE::Queue> dataEvent) {
-      if (dataDevice != device()) {
-        // Eventually replace with prefetch to current device (assuming unified memory works)
-        // If we won't go to unified memory, need to figure out something else...
-        throw std::runtime_error("Handling data from multiple devices is not yet supported");
-      }
-
+                                                     alpaka::Event<Queue> dataEvent) {
       if (dataStream != stream()) {
-        // Different streams, need to synchronize
+        // Different streams, check if the underlying device is the same
+        if (alpaka::getDev(dataStream) != device()) {
+          // Eventually replace with prefetch to current device (assuming unified memory works)
+          // If we won't go to unified memory, need to figure out something else...
+          throw std::runtime_error("Handling data from multiple devices is not yet supported");
+        }
+
+        // Synchronize the two streams
         if (not available) {
           // Event not yet occurred, so need to add synchronization
           // here. Sychronization is done by making the CUDA stream to
@@ -34,13 +27,13 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
       }
     }
 
-    void ScopedContextHolderHelper::enqueueCallback(int device, ::ALPAKA_ACCELERATOR_NAMESPACE::Queue& stream) {
-      alpaka::enqueue(stream, [this, device]() {
-        auto data = new CallbackData{waitingTaskHolder_, device};
-        std::unique_ptr<CallbackData> guard{reinterpret_cast<CallbackData*>(data)};
-        edm::WaitingTaskWithArenaHolder& waitingTaskHolder = guard->holder;
-        int device2 = guard->device;
-        waitingTaskHolder.doneWaiting(nullptr);
+    void ScopedContextHolderHelper::enqueueCallback(ScopedContextBase::Queue& stream) {
+      alpaka::enqueue(stream, [holder = waitingTaskHolder_]() {
+        // TODO: The functor is required to be const, so can't use
+        // 'mutable', so I'm copying the object as a workaround. I
+        // wonder if there are any wider implications.
+        auto h = holder;
+        h.doneWaiting(nullptr);
       });
     }
   }  // namespace impl
@@ -48,9 +41,9 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
   ////////////////////
 
   ScopedContextAcquire::~ScopedContextAcquire() {
-    holderHelper_.enqueueCallback(device(), stream());
+    holderHelper_.enqueueCallback(stream());
     if (contextState_) {
-      contextState_->set(device(), streamPtr());
+      contextState_->set(streamPtr());
     }
   }
 
@@ -73,6 +66,6 @@ namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
 
   ////////////////////
 
-  ScopedContextTask::~ScopedContextTask() { holderHelper_.enqueueCallback(device(), stream()); }
+  ScopedContextTask::~ScopedContextTask() { holderHelper_.enqueueCallback(stream()); }
 
 }  // namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/AlpakaCore/alpaka/chooseDevice.cc b/src/alpaka/AlpakaCore/alpaka/chooseDevice.cc
index d7296fd41..ab0364a73 100644
--- a/src/alpaka/AlpakaCore/alpaka/chooseDevice.cc
+++ b/src/alpaka/AlpakaCore/alpaka/chooseDevice.cc
@@ -1,17 +1,18 @@
 #include "AlpakaCore/alpakaConfig.h"
 #include "AlpakaCore/chooseDevice.h"
-#include "AlpakaCore/deviceCount.h"
+#include "AlpakaCore/alpakaDevAcc.h"
 
 namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
 
-  int chooseDevice(edm::StreamID id) {
+  ::ALPAKA_ACCELERATOR_NAMESPACE::Device const& chooseDevice(edm::StreamID id) {
     // For startes we "statically" assign the device based on
     // edm::Stream number. This is suboptimal if the number of
     // edm::Streams is not a multiple of the number of CUDA devices
     // (and even then there is no load balancing).
 
     // TODO: improve the "assignment" logic
-    return id % ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::deviceCount();
+    auto const& devices = ::ALPAKA_ACCELERATOR_NAMESPACE::devices;
+    return devices[id % devices.size()];
   }
 
 }  // namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/AlpakaCore/chooseDevice.h b/src/alpaka/AlpakaCore/chooseDevice.h
index 9580e4439..fec1bdbf4 100644
--- a/src/alpaka/AlpakaCore/chooseDevice.h
+++ b/src/alpaka/AlpakaCore/chooseDevice.h
@@ -6,7 +6,7 @@
 
 namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE {
 
-  int chooseDevice(edm::StreamID id);
+  ::ALPAKA_ACCELERATOR_NAMESPACE::Device const& chooseDevice(edm::StreamID id);
 
 }  // namespace cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE
 
diff --git a/src/alpaka/plugin-BeamSpotProducer/alpaka/BeamSpotToAlpaka.cc b/src/alpaka/plugin-BeamSpotProducer/alpaka/BeamSpotToAlpaka.cc
index 157aca30b..0423aea84 100644
--- a/src/alpaka/plugin-BeamSpotProducer/alpaka/BeamSpotToAlpaka.cc
+++ b/src/alpaka/plugin-BeamSpotProducer/alpaka/BeamSpotToAlpaka.cc
@@ -26,11 +26,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
   void BeamSpotToAlpaka::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
     auto const& bsRaw = iSetup.get<BeamSpotPOD>();
 
-    // TO DO: Add inter-event parallelization. cms::alpaka::ScopedContextProduce?
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                                                                               iEvent.streamID()};
+    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{iEvent.streamID()};
     BeamSpotAlpaka bsDevice(&bsRaw, ctx.stream());
-    ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0], iEvent, bsPutToken_, std::move(bsDevice));
+    ctx.emplace(iEvent, bsPutToken_, std::move(bsDevice));
   }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletAlpaka.cc b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletAlpaka.cc
index 51801c83f..004228751 100644
--- a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletAlpaka.cc
+++ b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletAlpaka.cc
@@ -36,12 +36,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     auto const& hits = iEvent.get(tokenHitGPU_);
 
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                                                                               iEvent.streamID()};
-    ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                iEvent,
-                tokenTrackGPU_,
-                gpuAlgo_.makeTuplesAsync(hits, bf, ctx.stream()));
+    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{iEvent.streamID()};
+    ctx.emplace(iEvent, tokenTrackGPU_, gpuAlgo_.makeTuplesAsync(hits, bf, ctx.stream()));
   }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/PixelTrackSoAFromAlpaka.cc b/src/alpaka/plugin-PixelTriplets/alpaka/PixelTrackSoAFromAlpaka.cc
index 064648534..6e7c08a6a 100644
--- a/src/alpaka/plugin-PixelTriplets/alpaka/PixelTrackSoAFromAlpaka.cc
+++ b/src/alpaka/plugin-PixelTriplets/alpaka/PixelTrackSoAFromAlpaka.cc
@@ -76,12 +76,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
     auto const& inputData = iEvent.get(tokenAlpaka_);
     auto outputData = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::allocHostBuf<pixelTrack::TrackSoA>(1u);
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                                                                               iEvent.streamID()};
+    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{iEvent.streamID()};
     alpaka::memcpy(ctx.stream(), outputData, inputData, 1u);
 
     // DO NOT  make a copy  (actually TWO....)
-    ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0], iEvent, tokenSOA_, std::move(outputData));
+    ctx.emplace(iEvent, tokenSOA_, std::move(outputData));
 #endif
   }
 
diff --git a/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexProducerAlpaka.cc b/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexProducerAlpaka.cc
index 175d22163..01e238f8b 100644
--- a/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexProducerAlpaka.cc
+++ b/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexProducerAlpaka.cc
@@ -51,12 +51,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     auto const& tracksBuf = iEvent.get(tokenTrack_);
     auto const tracks = alpaka::getPtrNative(tracksBuf);
 
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                                                                               iEvent.streamID()};
-    ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                iEvent,
-                tokenVertex_,
-                m_gpuAlgo.makeAsync(tracks, m_ptMin, ctx.stream()));
+    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{iEvent.streamID()};
+    ctx.emplace(iEvent, tokenVertex_, m_gpuAlgo.makeAsync(tracks, m_ptMin, ctx.stream()));
   }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexSoAFromAlpaka.cc b/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexSoAFromAlpaka.cc
index eb017e2eb..7fd14cc50 100644
--- a/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexSoAFromAlpaka.cc
+++ b/src/alpaka/plugin-PixelVertexFinding/alpaka/PixelVertexSoAFromAlpaka.cc
@@ -57,12 +57,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
     auto const& inputData = iEvent.get(tokenAlpaka_);
     auto outputData = ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::allocHostBuf<ZVertexSoA>(1u);
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                                                                               iEvent.streamID()};
+    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{iEvent.streamID()};
     alpaka::memcpy(ctx.stream(), outputData, inputData, 1u);
 
     // No copies....
-    ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0], iEvent, tokenSOA_, std::move(outputData));
+    ctx.emplace(iEvent, tokenSOA_, std::move(outputData));
 #endif
   }
 
diff --git a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToCluster.cc b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToCluster.cc
index edd08fd14..60ca31bd0 100644
--- a/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToCluster.cc
+++ b/src/alpaka/plugin-SiPixelClusterizer/alpaka/SiPixelRawToCluster.cc
@@ -140,8 +140,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     }  // end of for loop
 
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                                                                               iEvent.streamID()};
+    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{iEvent.streamID()};
     gpuAlgo_.makeClustersAsync(isRun2_,
                                gpuMap,
                                gpuModulesToUnpack,
@@ -156,10 +155,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                ctx.stream());
 
     auto tmp = gpuAlgo_.getResults();
-    ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0], iEvent, digiPutToken_, std::move(tmp.first));
-    ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0], iEvent, clusterPutToken_, std::move(tmp.second));
+    ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first));
+    ctx.emplace(iEvent, clusterPutToken_, std::move(tmp.second));
     if (includeErrors_) {
-      ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0], iEvent, digiErrorPutToken_, gpuAlgo_.getErrors());
+      ctx.emplace(iEvent, digiErrorPutToken_, gpuAlgo_.getErrors());
     }
   }
 
diff --git a/src/alpaka/plugin-SiPixelRecHits/alpaka/SiPixelRecHitAlpaka.cc b/src/alpaka/plugin-SiPixelRecHits/alpaka/SiPixelRecHitAlpaka.cc
index c8b9046b6..2098dd1a8 100644
--- a/src/alpaka/plugin-SiPixelRecHits/alpaka/SiPixelRecHitAlpaka.cc
+++ b/src/alpaka/plugin-SiPixelRecHits/alpaka/SiPixelRecHitAlpaka.cc
@@ -54,12 +54,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     }
 
     // TO DO: Async: Would need to add a queue as a parameter, not async for now!
-    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                                                                               iEvent.streamID()};
-    ctx.emplace(::ALPAKA_ACCELERATOR_NAMESPACE::devices[0],
-                iEvent,
-                tokenHit_,
-                gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe.params(), ctx.stream()));
+    ::cms::alpakatools::ALPAKA_ACCELERATOR_NAMESPACE::ScopedContextProduce ctx{iEvent.streamID()};
+    ctx.emplace(iEvent, tokenHit_, gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe.params(), ctx.stream()));
   }
 
 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE