diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 525356e3d1..05845937e0 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -40,6 +40,8 @@ The Axom project release numbers follow [Semantic Versioning](http://semver.org/ - 2D and 3D implementations for `axom::for_all` were added. - Adds `axom::FlatMapView`, a helper class associated with `axom::FlatMap` to support queries from within a GPU kernel. +- Adds an `axom::FlatMap::create()` method to support constructing a hash map over a batch of keys + and values on the GPU or with OpenMP. - Adds support for custom allocators to `axom::FlatMap`. - Primal: Adds ability to perform sample-based shaping on tetrahedral shapes. - Improves efficiency of volume fraction computation from quadrature samples during sample-based shaping. diff --git a/src/axom/core/CMakeLists.txt b/src/axom/core/CMakeLists.txt index 3acb3fdbee..89fa639774 100644 --- a/src/axom/core/CMakeLists.txt +++ b/src/axom/core/CMakeLists.txt @@ -68,6 +68,7 @@ set(core_headers MapCollection.hpp FlatMap.hpp FlatMapView.hpp + FlatMapUtil.hpp DeviceHash.hpp NumericArray.hpp NumericLimits.hpp diff --git a/src/axom/core/FlatMap.hpp b/src/axom/core/FlatMap.hpp index 1796a84984..6365beef45 100644 --- a/src/axom/core/FlatMap.hpp +++ b/src/axom/core/FlatMap.hpp @@ -602,7 +602,7 @@ class FlatMap : detail::flat_map::SequentialLookupPolicy + static FlatMap create(axom::ArrayView keys, + axom::ArrayView values, + Allocator allocator = Allocator {}); + private: friend class FlatMapView; friend class FlatMapView; @@ -715,13 +739,13 @@ FlatMap::FlatMap(IndexType bucket_count, Allocator all , m_loadCount(0) { IndexType minBuckets = MIN_NUM_BUCKETS; - bucket_count = axom::utilities::max(minBuckets, bucket_count); + bucket_count = axom::utilities::max(minBuckets, bucket_count / MAX_LOAD_FACTOR); // Get the smallest power-of-two number of groups satisfying: // N * GroupSize - 1 >= minBuckets // TODO: we should add a countl_zero overload for 64-bit integers { std::int32_t numGroups = std::ceil((bucket_count + 1) / (double)BucketsPerGroup); - m_numGroups2 = 31 - (axom::utilities::countl_zero(numGroups)); + m_numGroups2 = 32 - (axom::utilities::countl_zero(numGroups - 1)); } IndexType numGroupsRounded = 1 << m_numGroups2; @@ -860,4 +884,6 @@ auto FlatMap::erase(const_iterator pos) -> iterator } // namespace axom +#include "FlatMapUtil.hpp" + #endif // Axom_Core_FlatMap_HPP diff --git a/src/axom/core/FlatMapUtil.hpp b/src/axom/core/FlatMapUtil.hpp new file mode 100644 index 0000000000..eb112c70f7 --- /dev/null +++ b/src/axom/core/FlatMapUtil.hpp @@ -0,0 +1,210 @@ +// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and +// other Axom Project Developers. See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#ifndef Axom_Core_FlatMap_Util_HPP +#define Axom_Core_FlatMap_Util_HPP + +#include "axom/config.hpp" +#include "axom/core/FlatMap.hpp" +#include "axom/core/execution/reductions.hpp" + +namespace axom +{ +namespace detail +{ + +struct SpinLock +{ + int value {0}; + + AXOM_HOST_DEVICE bool tryLock() + { + int still_locked = 0; +#if defined(__HIP_DEVICE_COMPILE__) + still_locked = __hip_atomic_exchange(&value, 1, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_AGENT); +#elif defined(AXOM_USE_RAJA) && defined(__CUDA_ARCH__) + still_locked = RAJA::atomicExchange(&value, 1); + // We really want an acquire-fenced atomic here + __threadfence(); +#elif defined(AXOM_USE_RAJA) && defined(AXOM_USE_OPENMP) + still_locked = RAJA::atomicExchange(&value, 1); + std::atomic_thread_fence(std::memory_order_acquire); +#endif + return !still_locked; + } + + AXOM_HOST_DEVICE void unlock() + { +#if defined(__HIP_DEVICE_COMPILE__) + __hip_atomic_exchange(&value, 0, __ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT); +#elif defined(AXOM_USE_RAJA) && defined(__CUDA_ARCH__) + // We really want a release-fenced atomic here + __threadfence(); + RAJA::atomicExchange(&value, 0); +#elif defined(AXOM_USE_RAJA) && defined(AXOM_USE_OPENMP) + std::atomic_thread_fence(std::memory_order_release); + RAJA::atomicExchange(&value, 0); +#else + value = 0; +#endif + } +}; + +} // namespace detail + +template +template +auto FlatMap::create(ArrayView keys, + ArrayView values, + Allocator allocator) -> FlatMap +{ + assert(keys.size() == values.size()); + + const IndexType num_elems = keys.size(); + + FlatMap new_map(allocator); + new_map.reserve(num_elems); + + using HashResult = typename Hash::result_type; + using GroupBucket = detail::flat_map::GroupBucket; + + // Grab some needed internal fields from the flat map. + // We're going to be constructing metadata and the K-V pairs directly + // in-place. + const int ngroups_pow_2 = new_map.m_numGroups2; + const auto meta_group = new_map.m_metadata.view(); + const auto buckets = new_map.m_buckets.view(); + + // Construct an array of locks per-group. This guards metadata updates for + // each insertion. + const IndexType num_groups = 1 << ngroups_pow_2; + Array lock_vec(num_groups, num_groups, allocator.getID()); + const auto group_locks = lock_vec.view(); + + // Map bucket slots to k-v pair indices. This is used to deduplicate pairs + // with the same key value. + Array key_index_dedup_vec(0, 0, allocator.getID()); + key_index_dedup_vec.resize(num_groups * GroupBucket::Size, -1); + const auto key_index_dedup = key_index_dedup_vec.view(); + + // Map k-v pair indices to bucket slots. This is essentially the inverse of + // the above mapping. + Array key_index_to_bucket_vec(num_elems, num_elems, allocator.getID()); + const auto key_index_to_bucket = key_index_to_bucket_vec.view(); + + for_all( + num_elems, + AXOM_LAMBDA(IndexType idx) { + // Hash keys. + auto hash = Hash {}(keys[idx]); + + // We use the k MSBs of the hash as the initial group probe point, + // where ngroups = 2^k. + int bitshift_right = ((CHAR_BIT * sizeof(HashResult)) - ngroups_pow_2); + HashResult curr_group = hash >> bitshift_right; + curr_group &= ((1 << ngroups_pow_2) - 1); + + std::uint8_t hash_8 = static_cast(hash); + + IndexType duplicate_bucket_index = -1; + IndexType empty_bucket_index = -1; + int iteration = 0; + while(iteration < meta_group.size()) + { + // Try to lock the group. We do this in a non-blocking manner to avoid + // intra-warp progress hazards. + bool group_locked = group_locks[curr_group].tryLock(); + + if(group_locked) + { + // Every bucket visit - check prior filled buckets for duplicate + // keys. + int empty_slot_index = + meta_group[curr_group].visitHashOrEmptyBucket(hash_8, [&](int matching_slot) { + IndexType bucket_index = curr_group * GroupBucket::Size + matching_slot; + + if(keys[key_index_dedup[bucket_index]] == keys[idx]) + { + // Highest-indexed kv pair wins. + axom::atomicMax(&key_index_dedup[bucket_index], idx); + key_index_to_bucket[idx] = bucket_index; + duplicate_bucket_index = bucket_index; + } + }); + + if(duplicate_bucket_index == -1) + { + if(empty_slot_index == GroupBucket::InvalidSlot) + { + // Group is full. Set overflow bit for the group. + meta_group[curr_group].template setOverflow(hash_8); + } + else + { + // Got to end of probe sequence without a duplicate. + // Update empty bucket index. + empty_bucket_index = curr_group * GroupBucket::Size + empty_slot_index; + meta_group[curr_group].template setBucket(empty_slot_index, hash_8); + key_index_dedup[empty_bucket_index] = idx; + key_index_to_bucket[idx] = empty_bucket_index; + } + } + // Unlock group once we're done. + group_locks[curr_group].unlock(); + + if(duplicate_bucket_index != -1 || empty_bucket_index != -1) + { + // We've found an empty slot or a duplicate key to place the + // value at. Empty slots should only occur at the end of the + // probe sequence, since we're only inserting. + break; + } + else + { + // Move to next group. + curr_group = (curr_group + LookupPolicy {}.getNext(iteration)) % meta_group.size(); + iteration++; + } + } + } + }); + + // Add a counter for duplicated inserts. + axom::ReduceSum total_inserts(0); + + // Using key-deduplication map, assign unique k-v pairs to buckets. + for_all( + num_elems, + AXOM_LAMBDA(IndexType kv_idx) { + IndexType bucket_idx = key_index_to_bucket[kv_idx]; + IndexType winning_idx = key_index_dedup[bucket_idx]; + // Place k-v pair at bucket_idx. + if(kv_idx == winning_idx) + { +#if defined(__CUDA_ARCH__) + // HACK: std::pair constructor is not host-device annotated, but CUDA + // requires passing in --expt-relaxed-constexpr for it to work. + // Instead of requiring this flag, construct each member of the pair + // individually. + KeyType& key_dst = const_cast(buckets[bucket_idx].get().first); + ValueType& value_dst = buckets[bucket_idx].get().second; + new(&key_dst) KeyType {keys[kv_idx]}; + new(&value_dst) ValueType {values[kv_idx]}; +#else + new(&buckets[bucket_idx]) KeyValuePair(keys[kv_idx], values[kv_idx]); +#endif + total_inserts += 1; + } + }); + + new_map.m_size = total_inserts.get(); + new_map.m_loadCount = total_inserts.get(); + + return new_map; +} + +} // namespace axom + +#endif diff --git a/src/axom/core/detail/FlatTable.hpp b/src/axom/core/detail/FlatTable.hpp index f21ea16952..25a4a1c000 100644 --- a/src/axom/core/detail/FlatTable.hpp +++ b/src/axom/core/detail/FlatTable.hpp @@ -175,6 +175,37 @@ struct GroupBucket return InvalidSlot; } + /*! + * \brief Visits matching hash buckets until an empty bucket is encountered. + * + * This is used when performing batched insertion: since elements are only + * inserted, not deleted, an empty bucket will always be encountered only at + * the very end of a given probe sequence. + * The visitor function is used to allow for detecting duplicate keys. + * + * \param [in] hash reduced hash to search for + * \param [in] visitor functor to call for each matching bucket slot + * + * \return the first empty slot found, or InvalidSlot + */ + template + AXOM_HOST_DEVICE int visitHashOrEmptyBucket(std::uint8_t hash, Func&& visitor) const + { + std::uint8_t reducedHash = reduceHash(hash); + for(int i = 0; i < Size; i++) + { + if(metadata.buckets[i] == reducedHash) + { + visitor(i); + } + else if(metadata.buckets[i] == GroupBucket::Empty) + { + return i; + } + } + return InvalidSlot; + } + template AXOM_HOST_DEVICE void setBucket(int index, std::uint8_t hash) { diff --git a/src/axom/core/examples/CMakeLists.txt b/src/axom/core/examples/CMakeLists.txt index 4160b243b0..45d44b9419 100644 --- a/src/axom/core/examples/CMakeLists.txt +++ b/src/axom/core/examples/CMakeLists.txt @@ -96,3 +96,10 @@ if(AXOM_ENABLE_TESTS) endif() endforeach() endif() + +axom_add_executable( + NAME core_flatmap_perf_ex + SOURCES core_flatmap_perf.cpp + OUTPUT_DIR ${EXAMPLE_OUTPUT_DIRECTORY} + DEPENDS_ON core + FOLDER axom/core/examples ) diff --git a/src/axom/core/examples/core_flatmap_perf.cpp b/src/axom/core/examples/core_flatmap_perf.cpp new file mode 100644 index 0000000000..1eef8cb742 --- /dev/null +++ b/src/axom/core/examples/core_flatmap_perf.cpp @@ -0,0 +1,188 @@ +// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and +// other Axom Project Developers. See the top-level LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +/*! \file core_flatmap_perf.cpp + * \brief This example measures performance of the FlatMap batched construction + * interface and FlatMapView, demonstrating portability between CPU, GPU, and OpenMP. + */ + +#include "axom/core/Array.hpp" +#include "axom/core/FlatMap.hpp" +#include "axom/core/FlatMapView.hpp" +#include "axom/core/execution/for_all.hpp" +#include "axom/core/execution/runtime_policy.hpp" +#include "axom/core/utilities/Timer.hpp" + +#include "axom/fmt.hpp" +#include "axom/CLI11.hpp" + +struct InputParams +{ + using RuntimePolicy = axom::runtime_policy::Policy; + + axom::IndexType num_elems = 10000; + RuntimePolicy runtime_policy = RuntimePolicy::seq; + axom::IndexType rep_count = 100; + +public: + void parse(int argc, char** argv, axom::CLI::App& app) + { + app.add_option("-n, --numElems", num_elems)->description("Number of elements to insert"); + + app.add_option("-p, --policy", runtime_policy) + ->description("Set runtime policy for test") + ->capture_default_str() + ->transform(axom::CLI::CheckedTransformer(axom::runtime_policy::s_nameToPolicy)); + + app.add_option("-r, --repCount", rep_count)->description("Number of repetitions to run"); + + app.get_formatter()->column_width(60); + + app.parse(argc, argv); + } +}; + +/*! + * \brief Sample an RNG with lookahead. + * Based on PCG implementation: https://www.pcg-random.org + */ +AXOM_HOST_DEVICE uint64_t SampleRNG(uint64_t seed, int distance) +{ + uint64_t a = 6364136223846793005ULL; + uint64_t output = seed; + + // LCG recurrence relation: + // x_n+1 = a*x_n mod m (m is just size of integer seed, i.e. 2^64) + // Closed-form solution is: + // x_n+1 = a^n*x_0 mod m + + // Compute modular exponent a^n mod 2^64 + if(distance > 0) + { + uint64_t a_n = 1; + int n = distance; + while(n > 0) + { + if(n % 2 == 1) + { + a_n *= a; + } + a = a * a; + n /= 2; + } + output *= a_n; + } + + uint32_t xorshifted = ((output >> 18u) ^ output) >> 27u; + uint32_t rot = output >> 59u; + return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); +} + +template +void test_flatmap_init_and_query(axom::IndexType num_elems, axom::IndexType rep_count) +{ + int allocatorID = axom::execution_space::allocatorID(); + + axom::utilities::Timer initTimer(false); + axom::utilities::Timer findTimer(false); + std::random_device rnd_dev {}; + for(int i = 0; i < rep_count; i++) + { + axom::Array keys_vec(num_elems, num_elems, allocatorID); + axom::Array values_vec(num_elems, num_elems, allocatorID); + const auto keys = keys_vec.view(); + const auto values = values_vec.view(); + + // Get a random seed + uint64_t rnd = rand(); + rnd <<= 32; + rnd += rand(); + + // Generate random keys and values + axom::for_all( + num_elems, + AXOM_LAMBDA(axom::IndexType index) { + keys[index] = SampleRNG(rnd, 2 * index); + values[index] = SampleRNG(rnd, 2 * index + 1); + }); + + // Construct a flat map in a single batch. + initTimer.start(); + auto new_map = + axom::FlatMap::template create(keys, values, axom::Allocator {allocatorID}); + initTimer.stop(); + + // Get a view of the map. + auto map_view = new_map.view(); + + findTimer.start(); + // Test use of FlatMap::find() within a kernel. + axom::for_all( + num_elems, + AXOM_LAMBDA(axom::IndexType index) { + auto it = map_view.find(keys[index]); + if(it != map_view.end()) + { + T value = it->second; + values[index] = value * 2; + } + }); + findTimer.stop(); + } + + axom::fmt::print(" - Average construction time: {:.8f} seconds\n", + initTimer.elapsedTimeInSec() / rep_count); + axom::fmt::print(" - Average construction throughput: {:.3f} keys/s\n", + (num_elems * rep_count) / initTimer.elapsedTimeInSec()); + axom::fmt::print(" - Average query time: {:.8f} seconds\n", + findTimer.elapsedTimeInSec() / rep_count); + axom::fmt::print(" - Average query throughput: {:.3f} keys/s\n", + (num_elems * rep_count) / findTimer.elapsedTimeInSec()); +} + +int main(int argc, char** argv) +{ + axom::CLI::App app {"Driver for flat-map performance tests"}; + + InputParams params; + try + { + params.parse(argc, argv, app); + } + catch(const axom::CLI::ParseError& e) + { + auto retval = app.exit(e); + exit(retval); + } + + axom::fmt::print("Runtime policy: {}\n", axom::runtime_policy::policyToName(params.runtime_policy)); + axom::fmt::print("Number of key-value pairs: {}\n", params.num_elems); + axom::fmt::print("Repetition count: {}\n", params.rep_count); + + using RuntimePolicy = axom::runtime_policy::Policy; + + if(params.runtime_policy == RuntimePolicy::seq) + { + test_flatmap_init_and_query(params.num_elems, params.rep_count); + } +#ifdef AXOM_RUNTIME_POLICY_USE_OPENMP + else if(params.runtime_policy == RuntimePolicy::omp) + { + test_flatmap_init_and_query(params.num_elems, params.rep_count); + } +#endif +#ifdef AXOM_RUNTIME_POLICY_USE_CUDA + else if(params.runtime_policy == RuntimePolicy::cuda) + { + test_flatmap_init_and_query, int>(params.num_elems, params.rep_count); + } +#endif +#ifdef AXOM_RUNTIME_POLICY_USE_HIP + else if(params.runtime_policy == RuntimePolicy::hip) + { + test_flatmap_init_and_query, int>(params.num_elems, params.rep_count); + } +#endif +} diff --git a/src/axom/core/tests/core_flatmap.hpp b/src/axom/core/tests/core_flatmap.hpp index 7840e90052..e2f1c9ca38 100644 --- a/src/axom/core/tests/core_flatmap.hpp +++ b/src/axom/core/tests/core_flatmap.hpp @@ -114,6 +114,20 @@ AXOM_TYPED_TEST(core_flatmap, default_init) EXPECT_EQ(true, test_map.empty()); } +AXOM_TYPED_TEST(core_flatmap, prealloc_buckets) +{ + using MapType = typename TestFixture::MapType; + + const std::vector sizes = {100, 400, 1000, 4000, 10000}; + + for(int size : sizes) + { + MapType test_map(size); + EXPECT_EQ(0, test_map.size()); + EXPECT_LE(size, test_map.bucket_count() * test_map.max_load_factor()); + } +} + AXOM_TYPED_TEST(core_flatmap, insert_only) { using MapType = typename TestFixture::MapType; diff --git a/src/axom/core/tests/core_flatmap_for_all.hpp b/src/axom/core/tests/core_flatmap_for_all.hpp index 23b1329261..f53389484d 100644 --- a/src/axom/core/tests/core_flatmap_for_all.hpp +++ b/src/axom/core/tests/core_flatmap_for_all.hpp @@ -206,3 +206,194 @@ AXOM_TYPED_TEST(core_flatmap_forall, insert_and_modify) EXPECT_EQ(test_map.count(i), false); } } + +AXOM_TYPED_TEST(core_flatmap_forall, insert_batched) +{ + using MapType = typename TestFixture::MapType; + using ExecSpace = typename TestFixture::ExecSpace; + + const int NUM_ELEMS = 100; + + axom::Array keys_vec(NUM_ELEMS); + axom::Array values_vec(NUM_ELEMS); + // Create batch of array elements + for(int i = 0; i < NUM_ELEMS; i++) + { + auto key = this->getKey(i); + auto value = this->getValue(i * 10.0 + 5.0); + + keys_vec[i] = key; + values_vec[i] = value; + } + + // Copy keys and values to GPU space. + axom::Array keys_gpu(keys_vec, this->getKernelAllocatorID()); + axom::Array values_gpu(values_vec, this->getKernelAllocatorID()); + + // Construct a flat map with the key-value pairs. + MapType test_map_gpu = + MapType::template create(keys_gpu, + values_gpu, + axom::Allocator {this->getKernelAllocatorID()}); + + // Copy back flat map to host for testing. + MapType test_map(test_map_gpu, axom::Allocator {this->getHostAllocatorID()}); + + // Check contents on the host + EXPECT_EQ(NUM_ELEMS, test_map.size()); + + // Check that every element we inserted is in the map + for(int i = 0; i < NUM_ELEMS; i++) + { + auto expected_key = this->getKey(i); + auto expected_val = this->getValue(i * 10.0 + 5.0); + EXPECT_EQ(1, test_map.count(expected_key)); + EXPECT_EQ(expected_val, test_map.at(expected_key)); + } +} + +AXOM_TYPED_TEST(core_flatmap_forall, insert_batched_with_dups) +{ + using MapType = typename TestFixture::MapType; + using ExecSpace = typename TestFixture::ExecSpace; + + const int NUM_ELEMS = 100; + + axom::Array keys_vec(NUM_ELEMS * 2); + axom::Array values_vec(NUM_ELEMS * 2); + // Create batch of array elements + for(int i = 0; i < NUM_ELEMS; i++) + { + auto key = this->getKey(i); + auto value = this->getValue(i * 10.0 + 5.0); + + keys_vec[i] = key; + values_vec[i] = value; + } + + // Add some duplicate key values + for(int i = 0; i < NUM_ELEMS; i++) + { + auto key = this->getKey(i); + auto value = this->getValue(i * 10.0 + 7.0); + + keys_vec[i + NUM_ELEMS] = key; + values_vec[i + NUM_ELEMS] = value; + } + + // Copy keys and values to GPU space. + axom::Array keys_gpu(keys_vec, this->getKernelAllocatorID()); + axom::Array values_gpu(values_vec, this->getKernelAllocatorID()); + + // Construct a flat map with the key-value pairs. + MapType test_map_gpu = + MapType::template create(keys_gpu, + values_gpu, + axom::Allocator {this->getKernelAllocatorID()}); + + // Copy back flat map to host for testing. + MapType test_map(test_map_gpu, axom::Allocator {this->getHostAllocatorID()}); + + // Check contents on the host. Only one of the duplicate keys should remain. + EXPECT_EQ(NUM_ELEMS, test_map.size()); + + // Check that every element we inserted is in the map + for(int i = 0; i < NUM_ELEMS; i++) + { + auto expected_key = this->getKey(i); + auto expected_val1 = this->getValue(i * 10.0 + 5.0); + auto expected_val2 = this->getValue(i * 10.0 + 7.0); + EXPECT_EQ(1, test_map.count(expected_key)); + // Second key-value pair in batch-order should overwrite first pair with + // same key. + EXPECT_EQ(expected_val2, test_map.at(expected_key)); + EXPECT_NE(expected_val1, test_map.at(expected_key)); + } + + // Check that we only have one instance of every key in the map + axom::Array> kv_out(NUM_ELEMS); + int index = 0; + for(auto &pair : test_map) + { + EXPECT_LT(index, NUM_ELEMS); + kv_out[index++] = {pair.first, pair.second}; + } + + std::sort(kv_out.begin(), + kv_out.end(), + [](const std::pair &first, const std::pair &second) -> bool { + return first.first < second.first; + }); + + for(int i = 0; i < NUM_ELEMS; i++) + { + auto expected_key = this->getKey(i); + auto expected_val1 = this->getValue(i * 10.0 + 5.0); + auto expected_val2 = this->getValue(i * 10.0 + 7.0); + EXPECT_EQ(kv_out[i].first, expected_key); + EXPECT_EQ(expected_val2, test_map.at(expected_key)); + EXPECT_NE(expected_val1, test_map.at(expected_key)); + } +} + +template +struct ConstantHash +{ + using argument_type = KeyType; + using result_type = axom::IndexType; + + AXOM_HOST_DEVICE axom::IndexType operator()(KeyType) const { return 0; } +}; + +/** + * Test hash map with a hash that returns the same value for all elements. + * Even with worst-case hash collision behavior, the FlatMap should + * nevertheless be correctly constructible and queryable. + */ +AXOM_TYPED_TEST(core_flatmap_forall, insert_batched_constant_hash) +{ + using ExecSpace = typename TestFixture::ExecSpace; + using KeyType = typename TestFixture::KeyType; + using ValueType = typename TestFixture::ValueType; + + using MapType = axom::FlatMap>; + + const int NUM_ELEMS = 100; + + axom::Array keys_vec(NUM_ELEMS); + axom::Array values_vec(NUM_ELEMS); + // Create batch of array elements + for(int i = 0; i < NUM_ELEMS; i++) + { + auto key = this->getKey(i); + auto value = this->getValue(i * 10.0 + 5.0); + + keys_vec[i] = key; + values_vec[i] = value; + } + + // Copy keys and values to GPU space. + axom::Array keys_gpu(keys_vec, this->getKernelAllocatorID()); + axom::Array values_gpu(values_vec, this->getKernelAllocatorID()); + + // Construct a flat map with the key-value pairs. + MapType test_map_gpu = + MapType::template create(keys_gpu, + values_gpu, + axom::Allocator {this->getKernelAllocatorID()}); + + // Copy back flat map to host for testing. + MapType test_map(test_map_gpu, axom::Allocator {this->getHostAllocatorID()}); + + // Check contents on the host + EXPECT_EQ(NUM_ELEMS, test_map.size()); + + // Check that every element we inserted is in the map + for(int i = 0; i < NUM_ELEMS; i++) + { + auto expected_key = this->getKey(i); + auto expected_val = this->getValue(i * 10.0 + 5.0); + EXPECT_EQ(1, test_map.count(expected_key)); + EXPECT_EQ(expected_val, test_map.at(expected_key)); + } +}