Skip to content

Commit 2ee8e4c

Browse files
committed
[RFC] dash::copy: Implement global-to-global
Active team selection is now done by tag struct argument. # Conflicts: # dash/test/algorithm/CopyTest.cc
1 parent f2439c1 commit 2ee8e4c

File tree

2 files changed

+217
-8
lines changed

2 files changed

+217
-8
lines changed

dash/include/dash/algorithm/Copy.h

+149-8
Original file line numberDiff line numberDiff line change
@@ -650,25 +650,166 @@ copy_async(
650650
}
651651
#endif
652652

653+
struct ActiveDestination{};
654+
struct ActiveSource{};
655+
656+
/**
657+
* Specialization of \c dash::copy as global-to-global blocking copy
658+
* operation.
659+
*
660+
* \ingroup DashAlgorithms
661+
*/
662+
template <
663+
class GlobInputIt,
664+
class GlobOutputIt,
665+
bool UseHandles = false>
666+
GlobOutputIt copy(
667+
GlobInputIt in_first,
668+
GlobInputIt in_last,
669+
GlobOutputIt out_first,
670+
ActiveDestination /*unused*/)
671+
{
672+
DASH_LOG_TRACE("dash::copy()", "blocking, global to global, active destination");
673+
674+
using size_type = typename GlobInputIt::size_type;
675+
using input_value_type = typename GlobInputIt::value_type;
676+
using output_value_type = typename GlobOutputIt::value_type;
677+
678+
size_type num_elem_total = dash::distance(in_first, in_last);
679+
if (num_elem_total <= 0) {
680+
DASH_LOG_TRACE("dash::copy", "input range empty");
681+
return out_first;
682+
}
683+
684+
auto g_out_first = out_first;
685+
auto g_out_last = g_out_first + num_elem_total;
686+
687+
internal::ContiguousRangeSet<GlobOutputIt> range_set{g_out_first, g_out_last};
688+
689+
const auto & out_team = out_first.team();
690+
out_team.barrier();
691+
692+
std::vector<dart_handle_t> handles;
693+
std::vector<dart_handle_t>* handles_arg = UseHandles ? &handles : nullptr;
694+
695+
dash::internal::local_copy_chunks<input_value_type, output_value_type> local_chunks;
696+
697+
size_type num_elem_processed = 0;
698+
699+
for (auto range : range_set) {
700+
701+
auto cur_out_first = range.first;
702+
auto num_copy_elem = range.second;
703+
704+
DASH_ASSERT_GT(num_copy_elem, 0,
705+
"Number of elements to copy is 0");
706+
707+
// handle local data only
708+
if (cur_out_first.is_local()) {
709+
auto dest_ptr = cur_out_first.local();
710+
auto src_ptr = in_first + num_elem_processed;
711+
internal::copy_impl(src_ptr,
712+
src_ptr + num_copy_elem,
713+
dest_ptr,
714+
handles_arg,
715+
local_chunks);
716+
}
717+
num_elem_processed += num_copy_elem;
718+
}
719+
720+
dash::internal::do_local_copies(local_chunks);
721+
722+
if (!handles.empty()) {
723+
DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,",
724+
"num_handles: ", handles.size());
725+
dart_waitall_local(handles.data(), handles.size());
726+
} else if (!UseHandles) {
727+
dart_flush_local_all(in_first.dart_gptr());
728+
}
729+
out_team.barrier();
730+
731+
DASH_ASSERT_EQ(num_elem_processed, num_elem_total,
732+
"Failed to find all contiguous subranges in range");
733+
734+
return g_out_last;
735+
}
736+
653737
/**
654738
* Specialization of \c dash::copy as global-to-global blocking copy
655739
* operation.
656740
*
657741
* \ingroup DashAlgorithms
658742
*/
659-
template <typename ValueType, class GlobInputIt, class GlobOutputIt>
743+
template <
744+
class GlobInputIt,
745+
class GlobOutputIt,
746+
bool UseHandles = false>
660747
GlobOutputIt copy(
661-
GlobInputIt /*in_first*/,
662-
GlobInputIt /*in_last*/,
663-
GlobOutputIt /*out_first*/)
748+
GlobInputIt in_first,
749+
GlobInputIt in_last,
750+
GlobOutputIt out_first,
751+
ActiveSource /*unused*/)
664752
{
665753
DASH_LOG_TRACE("dash::copy()", "blocking, global to global");
666754

667-
// TODO:
668-
// - Implement adapter for local-to-global dash::copy here
669-
// - Return if global input range has no local sub-range
755+
using size_type = typename GlobInputIt::size_type;
756+
using input_value_type = typename GlobInputIt::value_type;
757+
using output_value_type = typename GlobOutputIt::value_type;
758+
759+
size_type num_elem_total = dash::distance(in_first, in_last);
760+
if (num_elem_total <= 0) {
761+
DASH_LOG_TRACE("dash::copy", "input range empty");
762+
return out_first;
763+
}
764+
765+
internal::ContiguousRangeSet<GlobOutputIt> range_set{in_first, in_last};
766+
767+
const auto & in_team = in_first.team();
768+
in_team.barrier();
769+
770+
std::vector<dart_handle_t> handles;
771+
std::vector<dart_handle_t>* handles_arg = UseHandles ? &handles : nullptr;
772+
773+
dash::internal::local_copy_chunks<input_value_type, output_value_type> local_chunks;
774+
775+
size_type num_elem_processed = 0;
776+
777+
for (auto range : range_set) {
778+
779+
auto cur_in_first = range.first;
780+
auto num_copy_elem = range.second;
781+
782+
DASH_ASSERT_GT(num_copy_elem, 0,
783+
"Number of elements to copy is 0");
784+
785+
// handle local data only
786+
if (cur_in_first.is_local()) {
787+
auto src_ptr = cur_in_first.local();
788+
auto dest_ptr = out_first + num_elem_processed;
789+
internal::copy_impl(src_ptr,
790+
src_ptr + num_copy_elem,
791+
dest_ptr,
792+
handles_arg,
793+
local_chunks);
794+
}
795+
num_elem_processed += num_copy_elem;
796+
}
797+
798+
internal::do_local_copies(local_chunks);
799+
800+
if (!handles.empty()) {
801+
DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,",
802+
"num_handles: ", handles.size());
803+
dart_waitall(handles.data(), handles.size());
804+
} else if (!UseHandles) {
805+
dart_flush_all(out_first.dart_gptr());
806+
}
807+
in_team.barrier();
808+
809+
DASH_ASSERT_EQ(num_elem_processed, num_elem_total,
810+
"Failed to find all contiguous subranges in range");
670811

671-
return GlobOutputIt();
812+
return out_first + num_elem_total;
672813
}
673814

674815
#endif // DOXYGEN

dash/test/algorithm/CopyTest.cc

+68
Original file line numberDiff line numberDiff line change
@@ -1040,3 +1040,71 @@ TEST_F(CopyTest, InputOutputTypeTest)
10401040
ASSERT_TRUE_U((dash::internal::is_dash_copyable<const point_t, point_t>::value));
10411041

10421042
}
1043+
1044+
TEST_F(CopyTest, MatrixTransfersGlobalToGlobal)
1045+
{
1046+
if (_dash_size < 2) {
1047+
SKIP_TEST_MSG("At least 2 units required for this test.");
1048+
}
1049+
1050+
using TeamSpecT = dash::TeamSpec<2>;
1051+
using MatrixT = dash::NArray<double, 2>;
1052+
using PatternT = typename MatrixT::pattern_type;
1053+
using SizeSpecT = dash::SizeSpec<2>;
1054+
using DistSpecT = dash::DistributionSpec<2>;
1055+
1056+
auto& team_all = dash::Team::All();
1057+
TeamSpecT team_all_spec(team_all.size(), 1);
1058+
team_all_spec.balance_extents();
1059+
1060+
auto size_spec = SizeSpecT(4*team_all_spec.extent(1),
1061+
4*team_all_spec.extent(1));
1062+
auto dist_spec = DistSpecT(dash::BLOCKED, dash::BLOCKED);
1063+
1064+
MatrixT grid_more(size_spec, dist_spec, team_all, team_all_spec);
1065+
dash::fill(grid_more.begin(), grid_more.end(), (double)team_all.myid());
1066+
team_all.barrier();
1067+
1068+
// create a smaller team
1069+
dash::Team& team_fewer= team_all.split(2);
1070+
team_all.barrier();
1071+
if (!team_fewer.is_null() && 0 == team_fewer.position()) {
1072+
TeamSpecT team_fewer_spec(team_fewer.size(), 1);
1073+
team_fewer_spec.balance_extents();
1074+
1075+
MatrixT grid_fewer(size_spec, dist_spec, team_fewer, team_fewer_spec);
1076+
dash::fill(grid_fewer.begin(), grid_fewer.end(), -1.0);
1077+
1078+
auto lextents= grid_fewer.pattern().local_extents();
1079+
1080+
dash::copy(grid_more.begin(), grid_more.end(),
1081+
grid_fewer.begin(), dash::ActiveDestination());
1082+
1083+
if (team_fewer.myid() == 0) {
1084+
auto gextents = grid_fewer.extents();
1085+
for (uint32_t y = 0; y < gextents[0]; ++y) {
1086+
for (uint32_t x = 0; x < gextents[1]; ++x) {
1087+
ASSERT_EQ_U(grid_more(y, x), grid_fewer(y, x));
1088+
}
1089+
}
1090+
}
1091+
1092+
team_fewer.barrier();
1093+
1094+
dash::fill(grid_fewer.begin(), grid_fewer.end(), (double)team_fewer.myid());
1095+
1096+
dash::copy(grid_fewer.begin(), grid_fewer.end(),
1097+
grid_more.begin(), dash::ActiveSource());
1098+
1099+
if (team_fewer.myid() == 0) {
1100+
auto gextents = grid_fewer.extents();
1101+
for (uint32_t y = 0; y < gextents[0]; ++y) {
1102+
for (uint32_t x = 0; x < gextents[1]; ++x) {
1103+
ASSERT_EQ_U(grid_more(y, x), grid_fewer(y, x));
1104+
}
1105+
}
1106+
}
1107+
1108+
team_fewer.barrier();
1109+
}
1110+
}

0 commit comments

Comments
 (0)