Skip to content
Merged
189 changes: 189 additions & 0 deletions CUDADataFormats/Common/interface/HeterogeneousSoA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#ifndef CUDADataFormatsCommonHeterogeneousSoA_H
#define CUDADataFormatsCommonHeterogeneousSoA_H

#include <cassert>

#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

// a heterogeneous unique pointer...
template <typename T>
class HeterogeneousSoA {
public:
using Product = T;

HeterogeneousSoA() = default; // make root happy
~HeterogeneousSoA() = default;
HeterogeneousSoA(HeterogeneousSoA &&) = default;
HeterogeneousSoA &operator=(HeterogeneousSoA &&) = default;

explicit HeterogeneousSoA(cms::cuda::device::unique_ptr<T> &&p) : dm_ptr(std::move(p)) {}
explicit HeterogeneousSoA(cms::cuda::host::unique_ptr<T> &&p) : hm_ptr(std::move(p)) {}
explicit HeterogeneousSoA(std::unique_ptr<T> &&p) : std_ptr(std::move(p)) {}

auto const *get() const { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }

auto const &operator*() const { return *get(); }

auto const *operator->() const { return get(); }

auto *get() { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }

auto &operator*() { return *get(); }

auto *operator->() { return get(); }

// in reality valid only for GPU version...
cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const {
assert(dm_ptr);
auto ret = cms::cuda::make_host_unique<T>(stream);
cudaCheck(cudaMemcpyAsync(ret.get(), dm_ptr.get(), sizeof(T), cudaMemcpyDefault, stream));
return ret;
}

private:
// a union wan't do it, a variant will not be more efficienct
cms::cuda::device::unique_ptr<T> dm_ptr; //!
cms::cuda::host::unique_ptr<T> hm_ptr; //!
std::unique_ptr<T> std_ptr; //!
};

namespace cms {
namespace cudacompat {

struct GPUTraits {
template <typename T>
using unique_ptr = cms::cuda::device::unique_ptr<T>;

template <typename T>
static auto make_unique(cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(stream);
}

template <typename T>
static auto make_unique(size_t size, cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(size, stream);
}

template <typename T>
static auto make_host_unique(cudaStream_t stream) {
return cms::cuda::make_host_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(size_t size, cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(size, stream);
}
};

struct HostTraits {
template <typename T>
using unique_ptr = cms::cuda::host::unique_ptr<T>;

template <typename T>
static auto make_unique(cudaStream_t stream) {
return cms::cuda::make_host_unique<T>(stream);
}

template <typename T>
static auto make_host_unique(cudaStream_t stream) {
return cms::cuda::make_host_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(size_t size, cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(size, stream);
}
};

struct CPUTraits {
template <typename T>
using unique_ptr = std::unique_ptr<T>;

template <typename T>
static auto make_unique(cudaStream_t) {
return std::make_unique<T>();
}

template <typename T>
static auto make_unique(size_t size, cudaStream_t) {
return std::make_unique<T>(size);
}

template <typename T>
static auto make_host_unique(cudaStream_t) {
return std::make_unique<T>();
}

template <typename T>
static auto make_device_unique(cudaStream_t) {
return std::make_unique<T>();
}

template <typename T>
static auto make_device_unique(size_t size, cudaStream_t) {
return std::make_unique<T>(size);
}
};

} // namespace cudacompat
} // namespace cms

// a heterogeneous unique pointer (of a different sort) ...
template <typename T, typename Traits>
class HeterogeneousSoAImpl {
public:
template <typename V>
using unique_ptr = typename Traits::template unique_ptr<V>;

HeterogeneousSoAImpl() = default; // make root happy
~HeterogeneousSoAImpl() = default;
HeterogeneousSoAImpl(HeterogeneousSoAImpl &&) = default;
HeterogeneousSoAImpl &operator=(HeterogeneousSoAImpl &&) = default;

explicit HeterogeneousSoAImpl(unique_ptr<T> &&p) : m_ptr(std::move(p)) {}
explicit HeterogeneousSoAImpl(cudaStream_t stream);

T const *get() const { return m_ptr.get(); }

T *get() { return m_ptr.get(); }

cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const;

private:
unique_ptr<T> m_ptr; //!
};

template <typename T, typename Traits>
HeterogeneousSoAImpl<T, Traits>::HeterogeneousSoAImpl(cudaStream_t stream) {
m_ptr = Traits::template make_unique<T>(stream);
}

// in reality valid only for GPU version...
template <typename T, typename Traits>
cms::cuda::host::unique_ptr<T> HeterogeneousSoAImpl<T, Traits>::toHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<T>(stream);
cudaCheck(cudaMemcpyAsync(ret.get(), get(), sizeof(T), cudaMemcpyDefault, stream));
return ret;
}

template <typename T>
using HeterogeneousSoAGPU = HeterogeneousSoAImpl<T, cms::cudacompat::GPUTraits>;
template <typename T>
using HeterogeneousSoACPU = HeterogeneousSoAImpl<T, cms::cudacompat::CPUTraits>;
template <typename T>
using HeterogeneousSoAHost = HeterogeneousSoAImpl<T, cms::cudacompat::HostTraits>;

#endif
29 changes: 29 additions & 0 deletions CUDADataFormats/Common/interface/HostProduct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef CUDADataFormatsCommonHostProduct_H
#define CUDADataFormatsCommonHostProduct_H

#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

// a heterogeneous unique pointer...
template <typename T>
class HostProduct {
public:
HostProduct() = default; // make root happy
~HostProduct() = default;
HostProduct(HostProduct&&) = default;
HostProduct& operator=(HostProduct&&) = default;

explicit HostProduct(cms::cuda::host::unique_ptr<T>&& p) : hm_ptr(std::move(p)) {}
explicit HostProduct(std::unique_ptr<T>&& p) : std_ptr(std::move(p)) {}

auto const* get() const { return hm_ptr ? hm_ptr.get() : std_ptr.get(); }

auto const& operator*() const { return *get(); }

auto const* operator->() const { return get(); }

private:
cms::cuda::host::unique_ptr<T> hm_ptr; //!
std::unique_ptr<T> std_ptr; //!
};

#endif
5 changes: 5 additions & 0 deletions CUDADataFormats/StdDictionaries/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<use name="rootcore"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<export>
<lib name="1"/>
</export>
4 changes: 4 additions & 0 deletions CUDADataFormats/StdDictionaries/src/classes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include <cstddef>
#include <vector>

#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
14 changes: 14 additions & 0 deletions CUDADataFormats/StdDictionaries/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<lcgdict>
<class name="std::vector<std::byte, cms::cuda::HostAllocator<std::byte, 0>>" />
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you remind me, the plan ẁas to eventually get rid of these, right?

(well, eventually likely HostProduct and HeterogneeousSoA will evolve)


<class name="std::vector<uint8_t, cms::cuda::HostAllocator<uint8_t, 0>>" />
<class name="std::vector<uint16_t, cms::cuda::HostAllocator<uint16_t, 0>>" />
<class name="std::vector<uint32_t, cms::cuda::HostAllocator<uint32_t, 0>>" />

<class name="std::vector<int8_t, cms::cuda::HostAllocator<int8_t, 0>>" />
<class name="std::vector<int16_t, cms::cuda::HostAllocator<int16_t, 0>>" />
<class name="std::vector<int32_t, cms::cuda::HostAllocator<int32_t, 0>>" />

<class name="std::vector<float, cms::cuda::HostAllocator<float, 0>>" />
<class name="std::vector<double, cms::cuda::HostAllocator<double, 0>>" />
</lcgdict>