Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 090cb5c

Browse files
committedAug 8, 2023
Adding hierarchical operation to index_queue spawning
- flyby: fixing integral conversion warnings - flyby: fixing target.num_pus
1 parent e7c31a4 commit 090cb5c

File tree

6 files changed

+204
-84
lines changed

6 files changed

+204
-84
lines changed
 

‎libs/core/algorithms/tests/performance/foreach_scaling.cpp

+26-18
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ std::uint64_t averageout_plain_for(std::size_t vector_size)
3535
std::iota(
3636
std::begin(data_representation), std::end(data_representation), gen());
3737

38-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
38+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
3939

4040
// average out 100 executions to avoid varying results
4141
for (auto i = 0; i < test_count; i++)
@@ -52,7 +52,7 @@ std::uint64_t averageout_plain_for_iter(std::size_t vector_size)
5252
std::iota(
5353
std::begin(data_representation), std::end(data_representation), gen());
5454

55-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
55+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
5656

5757
// average out 100 executions to avoid varying results
5858
for (auto i = 0; i < test_count; i++)
@@ -72,7 +72,7 @@ std::uint64_t averageout_parallel_foreach(
7272
std::iota(
7373
std::begin(data_representation), std::end(data_representation), gen());
7474

75-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
75+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
7676

7777
// average out 100 executions to avoid varying results
7878
for (auto i = 0; i < test_count; i++)
@@ -92,7 +92,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)
9292

9393
if (num_overlapping_loops <= 0)
9494
{
95-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
95+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
9696

9797
for (auto i = 0; i < test_count; i++)
9898
measure_task_foreach(data_representation, exec).wait();
@@ -103,7 +103,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)
103103
std::vector<hpx::shared_future<void>> tests;
104104
tests.resize(num_overlapping_loops);
105105

106-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
106+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
107107

108108
for (auto i = 0; i < test_count; i++)
109109
{
@@ -124,7 +124,7 @@ std::uint64_t averageout_sequential_foreach(std::size_t vector_size)
124124
std::iota(
125125
std::begin(data_representation), std::end(data_representation), gen());
126126

127-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
127+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
128128

129129
// average out 100 executions to avoid varying results
130130
for (auto i = 0; i < test_count; i++)
@@ -142,7 +142,7 @@ std::uint64_t averageout_parallel_forloop(
142142
std::iota(
143143
std::begin(data_representation), std::end(data_representation), gen());
144144

145-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
145+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
146146

147147
// average out 100 executions to avoid varying results
148148
for (auto i = 0; i < test_count; i++)
@@ -167,7 +167,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)
167167

168168
if (num_overlapping_loops <= 0)
169169
{
170-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
170+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
171171

172172
for (auto i = 0; i < test_count; i++)
173173
measure_task_forloop(data_representation, exec).wait();
@@ -178,7 +178,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)
178178
std::vector<hpx::shared_future<void>> tests;
179179
tests.resize(num_overlapping_loops);
180180

181-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
181+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
182182

183183
for (auto i = 0; i < test_count; i++)
184184
{
@@ -199,7 +199,7 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
199199
std::iota(
200200
std::begin(data_representation), std::end(data_representation), gen());
201201

202-
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
202+
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
203203

204204
// average out 100 executions to avoid varying results
205205
for (auto i = 0; i < test_count; i++)
@@ -212,8 +212,8 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
212212
int hpx_main(hpx::program_options::variables_map& vm)
213213
{
214214
// pull values from cmd
215-
std::size_t vector_size = vm["vector_size"].as<std::size_t>();
216-
bool csvoutput = vm.count("csv_output") != 0;
215+
std::size_t const vector_size = vm["vector_size"].as<std::size_t>();
216+
bool const csvoutput = vm.count("csv_output") != 0;
217217
delay = vm["work_delay"].as<int>();
218218
test_count = vm["test_count"].as<int>();
219219
chunk_size = vm["chunk_size"].as<int>();
@@ -264,8 +264,8 @@ int hpx_main(hpx::program_options::variables_map& vm)
264264
std::uint64_t task_time_forloop = 0;
265265
std::uint64_t seq_time_forloop = 0;
266266

267-
std::uint64_t plain_time_for = averageout_plain_for(vector_size);
268-
std::uint64_t plain_time_for_iter =
267+
std::uint64_t const plain_time_for = averageout_plain_for(vector_size);
268+
std::uint64_t const plain_time_for_iter =
269269
averageout_plain_for_iter(vector_size);
270270

271271
if (vm["executor"].as<std::string>() == "forkjoin")
@@ -467,11 +467,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
467467
<< std::left
468468
<< "Parallel Scale : " << std::right
469469
<< std::setw(8)
470-
<< (double(seq_time_foreach) / par_time_foreach) << "\n"
470+
<< (static_cast<double>(seq_time_foreach) /
471+
par_time_foreach)
472+
<< "\n"
471473
<< std::left
472474
<< "Task Scale : " << std::right
473475
<< std::setw(8)
474-
<< (double(seq_time_foreach) / task_time_foreach) << "\n"
476+
<< (static_cast<double>(seq_time_foreach) /
477+
task_time_foreach)
478+
<< "\n"
475479
<< std::flush;
476480

477481
std::cout << "-------------Average-(for_loop)----------------\n"
@@ -490,11 +494,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
490494
<< std::left
491495
<< "Parallel Scale : " << std::right
492496
<< std::setw(8)
493-
<< (double(seq_time_forloop) / par_time_forloop) << "\n"
497+
<< (static_cast<double>(seq_time_forloop) /
498+
par_time_forloop)
499+
<< "\n"
494500
<< std::left
495501
<< "Task Scale : " << std::right
496502
<< std::setw(8)
497-
<< (double(seq_time_forloop) / task_time_forloop) << "\n";
503+
<< (static_cast<double>(seq_time_forloop) /
504+
task_time_forloop)
505+
<< "\n";
498506
}
499507
}
500508

‎libs/core/compute_local/include/hpx/compute_local/host/target.hpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ namespace hpx::compute::host {
3333
{
3434
}
3535

36-
explicit native_handle_type(hpx::threads::mask_type mask)
36+
explicit native_handle_type(hpx::threads::mask_type const& mask)
3737
: mask_(mask)
3838
{
3939
}
@@ -58,7 +58,7 @@ namespace hpx::compute::host {
5858
target() = default;
5959

6060
// Constructs target from a given mask of processing units
61-
explicit target(hpx::threads::mask_type mask)
61+
explicit target(hpx::threads::mask_type const& mask)
6262
: handle_(mask)
6363
{
6464
}
@@ -74,12 +74,12 @@ namespace hpx::compute::host {
7474

7575
std::pair<std::size_t, std::size_t> num_pus() const;
7676

77-
constexpr void synchronize() const noexcept
77+
static constexpr void synchronize() noexcept
7878
{
7979
// nothing to do here...
8080
}
8181

82-
hpx::future<void> get_future() const
82+
static hpx::future<void> get_future()
8383
{
8484
return hpx::make_ready_future();
8585
}
@@ -98,7 +98,7 @@ namespace hpx::compute::host {
9898
friend class hpx::serialization::access;
9999

100100
void serialize(serialization::input_archive& ar, unsigned int);
101-
void serialize(serialization::output_archive& ar, unsigned int);
101+
void serialize(serialization::output_archive& ar, unsigned int) const;
102102

103103
native_handle_type handle_;
104104
};

‎libs/core/compute_local/src/host_target.cpp

+17-6
Original file line numberDiff line numberDiff line change
@@ -21,30 +21,41 @@ namespace hpx::compute::host {
2121

2222
std::pair<std::size_t, std::size_t> target::num_pus() const
2323
{
24-
auto& rp = hpx::resource::get_partitioner();
25-
std::size_t num_os_threads = hpx::get_os_thread_count();
24+
auto const& rp = hpx::resource::get_partitioner();
25+
std::size_t const num_os_threads = hpx::get_os_thread_count();
2626

27-
hpx::threads::mask_type mask = native_handle().get_device();
28-
std::size_t mask_size = hpx::threads::mask_size(mask);
27+
hpx::threads::mask_type const mask = native_handle().get_device();
28+
std::size_t const mask_size = hpx::threads::mask_size(mask);
29+
30+
bool found_one = false;
2931

3032
std::size_t num_thread = 0;
3133
for (/**/; num_thread != num_os_threads; ++num_thread)
3234
{
3335
if (hpx::threads::bit_and(
3436
mask, rp.get_pu_mask(num_thread), mask_size))
3537
{
38+
found_one = true;
3639
break;
3740
}
3841
}
39-
return std::make_pair(num_thread, hpx::threads::count(mask));
42+
43+
if (!found_one)
44+
{
45+
return std::make_pair(static_cast<std::size_t>(-1), 0);
46+
}
47+
48+
return std::make_pair(
49+
num_thread, (std::min)(num_os_threads, hpx::threads::count(mask)));
4050
}
4151

4252
void target::serialize(serialization::input_archive& ar, unsigned int)
4353
{
4454
ar >> handle_.mask_;
4555
}
4656

47-
void target::serialize(serialization::output_archive& ar, unsigned int)
57+
void target::serialize(
58+
serialization::output_archive& ar, unsigned int) const
4859
{
4960
ar << handle_.mask_;
5061
}

0 commit comments

Comments
 (0)
Please sign in to comment.