-
Notifications
You must be signed in to change notification settings - Fork 4.6k
Added SoABlocks feature #48629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added SoABlocks feature #48629
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
| #define DataFormats_Portable_interface_PortableDeviceCollection_h | ||
|
|
||
| #include <cassert> | ||
| #include <concepts> | ||
| #include <optional> | ||
| #include <type_traits> | ||
|
|
||
|
|
@@ -32,14 +33,16 @@ class PortableDeviceCollection { | |
| explicit PortableDeviceCollection(edm::Uninitialized) noexcept {} | ||
|
|
||
| PortableDeviceCollection(int32_t elements, TDev const& device) | ||
| requires(!portablecollection::hasBlocksNumber<Layout>) | ||
| : buffer_{cms::alpakatools::make_device_buffer<std::byte[]>(device, Layout::computeDataSize(elements))}, | ||
| layout_{buffer_->data(), elements}, | ||
| view_{layout_} { | ||
| // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 | ||
| assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0); | ||
| } | ||
|
|
||
| template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>> | ||
| template <typename TQueue> | ||
| requires(alpaka::isQueue<TQueue> && (!portablecollection::hasBlocksNumber<Layout>)) | ||
| PortableDeviceCollection(int32_t elements, TQueue const& queue) | ||
| : buffer_{cms::alpakatools::make_device_buffer<std::byte[]>(queue, Layout::computeDataSize(elements))}, | ||
| layout_{buffer_->data(), elements}, | ||
|
|
@@ -48,6 +51,44 @@ class PortableDeviceCollection { | |
| assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0); | ||
| } | ||
|
|
||
| // constructor for SoA by blocks with a variadic of sizes | ||
| template <std::integral... Ints> | ||
| requires(portablecollection::hasBlocksNumber<Layout>) | ||
| explicit PortableDeviceCollection(TDev const& device, Ints... sizes) | ||
| requires(sizeof...(sizes) == Layout::blocksNumber) | ||
| : PortableDeviceCollection(device, std::to_array({static_cast<int32_t>(sizes)...})) {} | ||
|
|
||
| // constructor for SoA by blocks with a variadic of sizes | ||
| template <typename TQueue, std::integral... Ints> | ||
| requires(alpaka::isQueue<TQueue> && portablecollection::hasBlocksNumber<Layout>) | ||
| explicit PortableDeviceCollection(TQueue const& queue, Ints... sizes) | ||
| requires(sizeof...(sizes) == Layout::blocksNumber) | ||
| : PortableDeviceCollection(queue, std::to_array({static_cast<int32_t>(sizes)...})) {} | ||
|
|
||
| // constructor for SoA by blocks with an array of sizes | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you make Wouldn't it be simpler to take as argument an
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having said that, what is the use case for this constructor, instead of the one that takes a variadic list of integers ?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First, I need to ensure that the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if this constructor is still useful. Someone would like to specify the array of sizes? @felicepantaleo
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No, you don't. You can write a
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would this work ? explicit PortableDeviceCollection(TDev const& device, std::array<int32_t, Layout::blocksNumber> const& sizes) requires(portablecollection::hasBlocksNumber<Layout>) { ... }
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately no. The compiler sees the constructor's arguments before the requires, so it tries to access Another way could be to add
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, OK, I see.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One more curiosity: why do you require
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it was a way to address this comment, but now I realize you meant to require that the constructor for SoA without blocks must require the Layout to be without blocks. An SoABlocks with a single block is nothing but a SoA, but I don't see any reason why we should not support this case. |
||
| template <std::size_t N> | ||
| requires(portablecollection::hasBlocksNumber<Layout>) | ||
| explicit PortableDeviceCollection(TDev const& device, std::array<int32_t, N> const& sizes) | ||
| : buffer_{cms::alpakatools::make_device_buffer<std::byte[]>(device, Layout::computeDataSize(sizes))}, | ||
| layout_{buffer_->data(), sizes}, | ||
| view_{layout_} { | ||
| static_assert(Layout::blocksNumber == N, "Number of sizes must match the number of blocks in the Layout"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can this check be moved to the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, for the same reason of this. The compiler checks both the requirements at the same time, causing this compilation error: |
||
| // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 | ||
| assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0); | ||
| } | ||
|
|
||
| // constructor for SoA by blocks with an array of sizes | ||
| template <typename TQueue, std::size_t N> | ||
| requires(alpaka::isQueue<TQueue> && portablecollection::hasBlocksNumber<Layout>) | ||
| explicit PortableDeviceCollection(TQueue const& queue, std::array<int32_t, N> const& sizes) | ||
| : buffer_{cms::alpakatools::make_device_buffer<std::byte[]>(queue, Layout::computeDataSize(sizes))}, | ||
| layout_{buffer_->data(), sizes}, | ||
| view_{layout_} { | ||
| static_assert(Layout::blocksNumber == N, "Number of sizes must match the number of blocks in the Layout"); | ||
| // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 | ||
| assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0); | ||
| } | ||
|
|
||
| // non-copyable | ||
| PortableDeviceCollection(PortableDeviceCollection const&) = delete; | ||
| PortableDeviceCollection& operator=(PortableDeviceCollection const&) = delete; | ||
|
|
@@ -76,13 +117,16 @@ class PortableDeviceCollection { | |
| ConstBuffer const_buffer() const { return *buffer_; } | ||
|
|
||
| // erases the data in the Buffer by writing zeros (bytes containing '\0') to it | ||
| template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>> | ||
| template <typename TQueue> | ||
| requires(alpaka::isQueue<TQueue>) | ||
| void zeroInitialise(TQueue&& queue) { | ||
| alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00); | ||
| } | ||
|
|
||
| // Copy column by column heterogeneously for device to host/device data transfer. | ||
| // TODO: implement heterogeneous deepCopy for SoA blocks | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this something you plan for a different PR ?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I am still thinking about the best design choice for |
||
| template <typename TQueue> | ||
| requires(alpaka::isQueue<TQueue> && (!portablecollection::hasBlocksNumber<Layout>)) | ||
| void deepCopy(ConstView const& view, TQueue& queue) { | ||
| ConstDescriptor desc{view}; | ||
| Descriptor desc_{view_}; | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some comments for the DeviceCollection apply to the HostCollection, and vice versa. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,24 @@ | ||
| <bin name="TestDataFormatsPortableOnHost" file="test_catch2_*.cc"> | ||
| <use name="DataFormats/Portable"/> | ||
| <use name="DataFormats/SoATemplate"/> | ||
| <use name="catch2"/> | ||
| <use name="eigen"/> | ||
| <use name="DataFormats/Portable"/> | ||
| <use name="DataFormats/SoATemplate"/> | ||
| </bin> | ||
|
|
||
| <bin name="TestDataFormatsPortableHeterogenous" file="alpaka/test_catch2_heterogeneousDeepCopy.dev.cc"> | ||
| <use name="catch2"/> | ||
| <use name="eigen"/> | ||
| <use name="DataFormats/Portable"/> | ||
| <use name="DataFormats/SoATemplate"/> | ||
| <use name="HeterogeneousCore/AlpakaInterface"/> | ||
| <flags ALPAKA_BACKENDS="1"/> | ||
| </bin> | ||
|
|
||
| <bin name="TestDataFormatsPortableSoABlocks" file="alpaka/test_catch2_heterogeneousSoABlocks.dev.cc"> | ||
| <use name="catch2"/> | ||
| <use name="eigen"/> | ||
| <use name="DataFormats/Portable"/> | ||
| <use name="DataFormats/SoATemplate"/> | ||
| <use name="HeterogeneousCore/AlpakaInterface"/> | ||
| <flags ALPAKA_BACKENDS="1"/> | ||
| </bin> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,161 @@ | ||
| #include <Eigen/Core> | ||
| #include <Eigen/Dense> | ||
|
|
||
| #include <alpaka/alpaka.hpp> | ||
|
|
||
| #define CATCH_CONFIG_MAIN | ||
| #include <catch2/catch_all.hpp> | ||
|
|
||
| #include "DataFormats/SoATemplate/interface/SoABlocks.h" | ||
| #include "DataFormats/Portable/interface/PortableCollection.h" | ||
| #include "HeterogeneousCore/AlpakaInterface/interface/config.h" | ||
| #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" | ||
| #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" | ||
|
|
||
| using namespace ALPAKA_ACCELERATOR_NAMESPACE; | ||
|
|
||
| // This test checks the correctness of using SoABlocks with PortableCollections. | ||
|
|
||
| GENERATE_SOA_LAYOUT(NodesT, SOA_COLUMN(int, id), SOA_SCALAR(int, count)) | ||
|
|
||
| using Nodes = NodesT<>; | ||
|
|
||
| GENERATE_SOA_LAYOUT(EdgesT, SOA_COLUMN(int, src), SOA_COLUMN(int, dst), SOA_COLUMN(float, cost), SOA_SCALAR(int, count)) | ||
|
|
||
| using Edges = EdgesT<>; | ||
|
|
||
| GENERATE_SOA_BLOCKS(GraphT, SOA_BLOCK(nodes, NodesT), SOA_BLOCK(edges, EdgesT)) | ||
|
|
||
| using Graph = GraphT<>; | ||
| using GraphView = Graph::View; | ||
| using GraphConstView = Graph::ConstView; | ||
|
|
||
| // Fill SoAs | ||
| struct FillSoAs { | ||
| ALPAKA_FN_ACC void operator()(Acc1D const& acc, Nodes::View nodes, Edges::View edges) const { | ||
| const int N = static_cast<int>(nodes.metadata().size()); | ||
| const int E = static_cast<int>(edges.metadata().size()); | ||
|
|
||
| // Fill nodes with the indexes | ||
| for (auto i : cms::alpakatools::uniform_elements(acc, nodes.metadata().size())) { | ||
| nodes[i].id() = static_cast<int>(i); | ||
| } | ||
| if (cms::alpakatools::once_per_grid(acc)) { | ||
| nodes.count() = N; | ||
| } | ||
|
|
||
| // Fill edges with some arbitrary but deterministic values | ||
| for (auto j : cms::alpakatools::uniform_elements(acc, edges.metadata().size())) { | ||
| int src = static_cast<int>(j % N); | ||
| int dst = static_cast<int>((j * 7 + 3) % N); | ||
| edges[j].src() = src; | ||
| edges[j].dst() = dst; | ||
| edges[j].cost() = 0.5f * float(src + dst); | ||
| } | ||
| if (cms::alpakatools::once_per_grid(acc)) { | ||
| edges.count() = E; | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| // Fill SoABlocks | ||
| struct FillBlocks { | ||
| ALPAKA_FN_ACC void operator()(Acc1D const& acc, GraphView blocksView) const { | ||
| const int N = static_cast<int>(blocksView.nodes().metadata().size()); | ||
| const int E = static_cast<int>(blocksView.edges().metadata().size()); | ||
|
|
||
| // Fill nodes with the indexes | ||
| for (auto i : cms::alpakatools::uniform_elements(acc, blocksView.nodes().metadata().size())) { | ||
| blocksView.nodes()[i].id() = static_cast<int>(i); | ||
| } | ||
| if (cms::alpakatools::once_per_grid(acc)) { | ||
| blocksView.nodes().count() = N; | ||
| } | ||
|
|
||
| // Fill edges with some arbitrary but deterministic values | ||
| for (auto j : cms::alpakatools::uniform_elements(acc, blocksView.edges().metadata().size())) { | ||
| int src = static_cast<int>(j % N); | ||
| int dst = static_cast<int>((j * 7 + 3) % N); | ||
| blocksView.edges()[j].src() = src; | ||
| blocksView.edges()[j].dst() = dst; | ||
| blocksView.edges()[j].cost() = 0.5f * float(src + dst); | ||
| } | ||
| if (cms::alpakatools::once_per_grid(acc)) { | ||
| blocksView.edges().count() = E; | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| TEST_CASE("SoABlocks minimal graph in heterogeneous environment") { | ||
| auto const& devices = cms::alpakatools::devices<Platform>(); | ||
| if (devices.empty()) { | ||
| std::cout << "No devices available for the " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) | ||
| << " backend, skipping.\n"; | ||
| return; | ||
| } | ||
|
|
||
| for (auto const& device : devices) { | ||
| std::cout << "Running on " << alpaka::getName(device) << std::endl; | ||
| Queue queue(device); | ||
|
|
||
| // Number of elements | ||
| const int N = 50; | ||
| const int E = 120; | ||
|
|
||
| // Portable Collections for SoAs | ||
| PortableCollection<Nodes, Device> nodesCollection(N, queue); | ||
| PortableCollection<Edges, Device> edgesCollection(E, queue); | ||
| Nodes::View& nodesCollectionView = nodesCollection.view(); | ||
| Edges::View& edgesCollectionView = edgesCollection.view(); | ||
|
|
||
| // Portable Collection for SoABlocks | ||
| PortableCollection<Graph, Device> graphCollection(queue, N, E); | ||
| GraphView& graphCollectionView = graphCollection.view(); | ||
|
|
||
| // Work division | ||
| const std::size_t blockSize = 256; | ||
| const std::size_t maxElems = std::max<std::size_t>(N, E); | ||
| const std::size_t numberOfBlocks = cms::alpakatools::divide_up_by(maxElems, blockSize); | ||
| const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize); | ||
|
|
||
| // Fill: separate e blocks | ||
| alpaka::exec<Acc1D>(queue, workDiv, FillSoAs{}, nodesCollectionView, edgesCollectionView); | ||
| alpaka::exec<Acc1D>(queue, workDiv, FillBlocks{}, graphCollectionView); | ||
| alpaka::wait(queue); | ||
|
|
||
| // Check results on host | ||
| PortableHostCollection<Nodes> nodesHost(N, cms::alpakatools::host()); | ||
| PortableHostCollection<Edges> edgesHost(E, cms::alpakatools::host()); | ||
| PortableHostCollection<Graph> graphHost(cms::alpakatools::host(), N, E); | ||
|
|
||
| alpaka::memcpy(queue, nodesHost.buffer(), nodesCollection.buffer()); | ||
| alpaka::memcpy(queue, edgesHost.buffer(), edgesCollection.buffer()); | ||
| alpaka::memcpy(queue, graphHost.buffer(), graphCollection.buffer()); | ||
| alpaka::wait(queue); | ||
|
|
||
| const Nodes::ConstView nodesHostView = nodesHost.const_view(); | ||
| const Edges::ConstView edgesHostView = edgesHost.const_view(); | ||
| const GraphConstView graphHostView = graphHost.const_view(); | ||
|
|
||
| // Nodes | ||
| REQUIRE(graphHostView.nodes().count() == N); | ||
| for (int i = 0; i < N; ++i) { | ||
| REQUIRE(graphHostView.nodes()[i].id() == nodesHostView[i].id()); | ||
| REQUIRE(graphHostView.nodes()[i].id() == i); | ||
| } | ||
|
|
||
| // Edges | ||
| REQUIRE(graphHostView.edges().count() == E); | ||
| for (int j = 0; j < E; ++j) { | ||
| REQUIRE(graphHostView.edges()[j].src() == edgesHostView[j].src()); | ||
| REQUIRE(graphHostView.edges()[j].dst() == edgesHostView[j].dst()); | ||
| REQUIRE(graphHostView.edges()[j].cost() == edgesHostView[j].cost()); | ||
|
|
||
| int src = j % N; | ||
| int dst = (j * 7 + 3) % N; | ||
| REQUIRE(graphHostView.edges()[j].src() == src); | ||
| REQUIRE(graphHostView.edges()[j].dst() == dst); | ||
| REQUIRE_THAT(graphHostView.edges()[j].cost(), Catch::Matchers::WithinAbs(0.5f * float(src + dst), 1e-6)); | ||
| } | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The constructors that take a single
elementssize should be deleted or disabled usingrequiresfor the Layouts with blocks.