diff --git a/src/realm/transfer/transfer.cc b/src/realm/transfer/transfer.cc index 54020a174f..84c0ab59e9 100644 --- a/src/realm/transfer/transfer.cc +++ b/src/realm/transfer/transfer.cc @@ -3534,6 +3534,19 @@ namespace Realm { } } + TransferDesc::TransferDesc(TestTag, TransferDomain *_domain) + : refcount(1) + , deferred_analysis(this) + , prs() + , analysis_complete(false) + , analysis_successful(false) + , fill_data(0) + , fill_size(0) + , analysis_init_done(false) + { + domain = _domain; + } + void TransferDesc::check_analysis_preconditions() { log_xplan.info() << "created: plan=" << (void *)this << " domain=" << *domain @@ -3609,7 +3622,7 @@ namespace Realm { } // no (untriggered) preconditions, so we fall through to immediate analysis - perform_analysis(); + perform_analysis(TimeLimit()); } static size_t compute_ib_size(size_t combined_field_size, size_t domain_size, @@ -3678,65 +3691,81 @@ namespace Realm { const std::vector &edges; }; - void TransferDesc::perform_analysis() + bool TransferDesc::perform_analysis(TimeLimit work_until) { - // initialize profiling data - prof_usage.source = Memory::NO_MEMORY; - prof_usage.target = Memory::NO_MEMORY; - prof_usage.size = 0; + if(!analysis_init_done) { + // initialize profiling data + prof_usage.source = Memory::NO_MEMORY; + prof_usage.target = Memory::NO_MEMORY; + prof_usage.size = 0; + + // quick check - if the domain is empty, there's nothing to actually do + if(domain->empty()) { + log_xplan.debug() << "analysis: plan=" << (void *)this << " empty"; + + // well, we still have to poke pending ops + std::vector to_alloc; + { + AutoLock<> al(mutex); + to_alloc.swap(pending_ops); + // release before the mutex is released so to_alloc is visible before the + // analysis_complete flag is set + analysis_complete.store_release(true); + } - // quick check - if the domain is empty, there's nothing to actually do - if(domain->empty()) { - log_xplan.debug() << "analysis: plan=" << (void *)this << " empty"; + for(size_t i = 0; i < to_alloc.size(); i++) { + to_alloc[i]->allocate_ibs(); + } + return true; + } - // well, we still have to poke pending ops - std::vector to_alloc; - { - AutoLock<> al(mutex); - to_alloc.swap(pending_ops); - // release before the mutex is released so to_alloc is visible before the - // analysis_complete flag is set - analysis_complete.store_release(true); + // first, scan over the sources and figure out how much space we need + // for fill data - don't need to know field order yet + for(size_t i = 0; i < srcs.size(); i++) { + if(srcs[i].field_id == FieldID(-1)) { + fill_size += srcs[i].size; + } } - for(size_t i = 0; i < to_alloc.size(); i++) { - to_alloc[i]->allocate_ibs(); + if(fill_size > 0) { + fill_data = malloc(fill_size); + assert(fill_data); } - return; - } - size_t domain_size = domain->volume(); + // for now, pick a global dimension ordering + // TODO: allow this to vary for independent subgraphs (or dependent ones + // with transposes in line) + domain->choose_dim_order(dim_order, srcs, dsts, indirects, + false /*!force_fortran_order*/, 65536 /*max_stride*/); - // first, scan over the sources and figure out how much space we need - // for fill data - don't need to know field order yet - for(size_t i = 0; i < srcs.size(); i++) { - if(srcs[i].field_id == FieldID(-1)) { - fill_size += srcs[i].size; - } - } + src_fields.resize(srcs.size()); + dst_fields.resize(dsts.size()); - size_t fill_ofs = 0; - if(fill_size > 0) { - fill_data = malloc(fill_size); - assert(fill_data); + analysis_init_done = true; + analysis_field_idx = 0; + analysis_fld_start = 0; + analysis_fill_ofs = 0; + analysis_field_done.assign(srcs.size(), false); } - // for now, pick a global dimension ordering - // TODO: allow this to vary for independent subgraphs (or dependent ones - // with transposes in line) - domain->choose_dim_order(dim_order, srcs, dsts, indirects, - false /*!force_fortran_order*/, 65536 /*max_stride*/); - - src_fields.resize(srcs.size()); - dst_fields.resize(dsts.size()); + size_t domain_size = domain->volume(); // TODO: look at layouts and decide if fields should be grouped into // a smaller number of copies assert(srcs.size() == dsts.size()); - std::vector field_done(srcs.size(), false); + size_t fill_ofs = analysis_fill_ofs; + size_t fld_start = analysis_fld_start; + auto &field_done = analysis_field_done; // fields will get reordered to be contiguous per xd subgraph - size_t fld_start = 0; - for(size_t i = 0; i < srcs.size(); i++) { + for(size_t i = analysis_field_idx; i < srcs.size(); i++) { + // check time limit between field iterations + if(work_until.is_expired()) { + analysis_fill_ofs = fill_ofs; + analysis_fld_start = fld_start; + analysis_field_idx = i; + return false; + } + // did this field already get grouped into a previous path? if(field_done[i]) { continue; @@ -4257,6 +4286,7 @@ namespace Realm { for(size_t i = 0; i < to_alloc.size(); i++) { to_alloc[i]->allocate_ibs(); } + return true; } void TransferDesc::cancel_analysis(Event failed_precondition) @@ -4294,7 +4324,7 @@ namespace Realm { if(poisoned) { desc->cancel_analysis(precondition); } else { - desc->perform_analysis(); + desc->perform_analysis(TimeLimit()); } } diff --git a/src/realm/transfer/transfer.h b/src/realm/transfer/transfer.h index 8a68a240c1..c751e9210a 100644 --- a/src/realm/transfer/transfer.h +++ b/src/realm/transfer/transfer.h @@ -29,6 +29,8 @@ #include "realm/transfer/channel.h" #include "realm/profiling.h" +class PerformAnalysisTest; + namespace Realm { // the data transfer engine has too much code to have it all be templated on the @@ -424,8 +426,13 @@ namespace Realm { protected: atomic refcount; + // test-only constructor: creates a TransferDesc with the given domain, + // bypassing check_analysis_preconditions and TransferDomain::construct + struct TestTag {}; + TransferDesc(TestTag, TransferDomain *_domain); + void check_analysis_preconditions(); - void perform_analysis(); + bool perform_analysis(TimeLimit work_until); void cancel_analysis(Event failed_precondition); class DeferredAnalysis : public EventWaiter { @@ -441,6 +448,7 @@ namespace Realm { DeferredAnalysis deferred_analysis; friend class TransferOperation; + friend class ::PerformAnalysisTest; TransferDomain *domain; std::vector srcs, dsts; @@ -456,6 +464,11 @@ namespace Realm { std::vector src_fields, dst_fields; void *fill_data; size_t fill_size; + bool analysis_init_done; + size_t analysis_field_idx; + size_t analysis_fld_start; + size_t analysis_fill_ofs; + std::vector analysis_field_done; ProfilingMeasurements::OperationMemoryUsage prof_usage; ProfilingMeasurements::OperationCopyInfo prof_cpinfo; }; diff --git a/src/realm/transfer/transfer.inl b/src/realm/transfer/transfer.inl index 7188130ebb..06b0d6ebe9 100644 --- a/src/realm/transfer/transfer.inl +++ b/src/realm/transfer/transfer.inl @@ -189,6 +189,7 @@ namespace Realm { , analysis_successful(false) , fill_data(0) , fill_size(0) + , analysis_init_done(false) { domain = TransferDomain::construct(_is); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a6213d8b46..fb241a1ed8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -112,6 +112,7 @@ list( nodeset_test.cc transfer_iterator_test.cc lowlevel_dma_test.cc + perform_analysis_test.cc circ_queue_test.cc gather_scatter_test.cc sparsity_map_test.cc diff --git a/tests/unit_tests/perform_analysis_test.cc b/tests/unit_tests/perform_analysis_test.cc new file mode 100644 index 0000000000..be0a33f8b4 --- /dev/null +++ b/tests/unit_tests/perform_analysis_test.cc @@ -0,0 +1,200 @@ +/* + * Copyright 2025 Stanford University, NVIDIA Corporation + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "realm/transfer/transfer.h" +#include "realm/timers.h" +#include + +using namespace Realm; + +// Minimal TransferDomain implementation that avoids runtime dependencies. +// Used to replace the empty domain after construction so that perform_analysis +// takes the non-empty path and enters the main field loop. +class MockTransferDomain : public TransferDomain { +public: + TransferDomain *clone() const override { return new MockTransferDomain; } + Event request_metadata() override { return Event::NO_EVENT; } + bool empty() const override { return false; } + size_t volume() const override { return 10; } + void choose_dim_order(std::vector &dim_order, + const std::vector &srcs, + const std::vector &dsts, + const std::vector &indirects, + bool force_fortran_order, size_t max_stride) const override + { + dim_order.clear(); + dim_order.push_back(0); + } + void count_fragments(RegionInstance inst, const std::vector &dim_order, + const std::vector &fields, + const std::vector &fld_sizes, + std::vector &fragments) const override + { + fragments.clear(); + } + TransferIterator *create_iterator(RegionInstance inst, + const std::vector &dim_order, + const std::vector &fields, + const std::vector &fld_offsets, + const std::vector &fld_sizes) const override + { + return nullptr; + } + TransferIterator *create_iterator(RegionInstance inst, RegionInstance peer, + const std::vector &fields, + const std::vector &fld_offsets, + const std::vector &fld_sizes) const override + { + return nullptr; + } + void print(std::ostream &os) const override { os << "MockTransferDomain"; } +}; + +// PerformAnalysisTest is a friend of TransferDesc, allowing direct access to +// protected members for testing the incremental analysis behavior. +class PerformAnalysisTest : public ::testing::Test { +protected: + // Helper to construct a TransferDesc using the test-only constructor. + // This bypasses TransferDomain::construct and check_analysis_preconditions, + // avoiding runtime dependencies. + static TransferDesc *create_desc(TransferDomain *domain) + { + return new TransferDesc(TransferDesc::TestTag{}, domain); + } + + // Set up dummy src/dst field pairs on a TransferDesc. Uses NO_INST so that + // choose_dim_order skips the preferred_dim_order call (which requires + // the runtime). The loop body won't execute in timeout tests since + // is_expired() fires before any per-field work. + static void setup_dummy_fields(TransferDesc *desc, size_t num_fields) + { + for(size_t i = 0; i < num_fields; i++) { + CopySrcDstField src; + src.set_field(RegionInstance::NO_INST, FieldID(i), /*size=*/8); + CopySrcDstField dst; + dst.set_field(RegionInstance::NO_INST, FieldID(i), /*size=*/8); + desc->srcs.push_back(src); + desc->dsts.push_back(dst); + } + } + + // Accessors for protected members (friendship is not inherited by TEST_F + // subclasses, so all access must go through PerformAnalysisTest methods). + static bool call_perform_analysis(TransferDesc *desc, TimeLimit work_until) + { + return desc->perform_analysis(work_until); + } + static bool get_analysis_complete(TransferDesc *desc) + { + return desc->analysis_complete.load(); + } + static bool get_analysis_init_done(TransferDesc *desc) + { + return desc->analysis_init_done; + } + static size_t get_analysis_field_idx(TransferDesc *desc) + { + return desc->analysis_field_idx; + } + static size_t get_dim_order_size(TransferDesc *desc) { return desc->dim_order.size(); } + static const int *get_dim_order_data(TransferDesc *desc) + { + return desc->dim_order.data(); + } + static size_t get_src_fields_size(TransferDesc *desc) + { + return desc->src_fields.size(); + } + static size_t get_dst_fields_size(TransferDesc *desc) + { + return desc->dst_fields.size(); + } +}; + +// Test that perform_analysis returns true immediately for an empty domain. +TEST_F(PerformAnalysisTest, EmptyDomainCompletesImmediately) +{ + // MockEmptyDomain returns empty() == true + class MockEmptyDomain : public MockTransferDomain { + public: + bool empty() const override { return true; } + size_t volume() const override { return 0; } + }; + + TransferDesc *desc = create_desc(new MockEmptyDomain); + + bool completed = call_perform_analysis(desc, TimeLimit()); + EXPECT_TRUE(completed); + EXPECT_TRUE(get_analysis_complete(desc)); + + desc->remove_reference(); +} + +// Test that perform_analysis with an immediately-expired TimeLimit returns +// false (timed out) without completing the analysis. +TEST_F(PerformAnalysisTest, ExpiredTimeLimitCausesTimeout) +{ + const size_t num_fields = 5; + TransferDesc *desc = create_desc(new MockTransferDomain); + setup_dummy_fields(desc, num_fields); + + // Call perform_analysis with an already-expired time limit. + // The init phase runs (it doesn't check the time limit), but the loop + // should immediately detect the expired limit and return false. + bool completed = call_perform_analysis(desc, TimeLimit::relative(0)); + + EXPECT_FALSE(completed); + // Init should have completed + EXPECT_TRUE(get_analysis_init_done(desc)); + // But the field loop should not have progressed + EXPECT_EQ(get_analysis_field_idx(desc), 0u); + // analysis_complete should still be false + EXPECT_FALSE(get_analysis_complete(desc)); + + desc->remove_reference(); +} + +// Test that after a timeout, calling perform_analysis again with an expired +// TimeLimit preserves the init state (doesn't redo it). +TEST_F(PerformAnalysisTest, InitStatePreservedAcrossTimeoutCalls) +{ + const size_t num_fields = 3; + TransferDesc *desc = create_desc(new MockTransferDomain); + setup_dummy_fields(desc, num_fields); + + // First call: times out immediately + bool completed = call_perform_analysis(desc, TimeLimit::relative(0)); + ASSERT_FALSE(completed); + ASSERT_TRUE(get_analysis_init_done(desc)); + + // Verify that dim_order was set during init (1D domain -> single entry) + EXPECT_EQ(get_dim_order_size(desc), 1u); + // Verify src/dst fields were resized during init + EXPECT_EQ(get_src_fields_size(desc), num_fields); + EXPECT_EQ(get_dst_fields_size(desc), num_fields); + + // Second call: also times out, but init should not be redone. + // Capture dim_order pointer to verify it's the same vector (not reallocated). + const int *dim_order_data = get_dim_order_data(desc); + completed = call_perform_analysis(desc, TimeLimit::relative(0)); + EXPECT_FALSE(completed); + EXPECT_TRUE(get_analysis_init_done(desc)); + // dim_order should not have been modified (init was skipped) + EXPECT_EQ(get_dim_order_data(desc), dim_order_data); + + desc->remove_reference(); +}