STEllAR-GROUP · Aug 8, 2023
diff --git a/‎libs/core/algorithms/tests/performance/foreach_scaling.cpp
+26-18 b/‎libs/core/algorithms/tests/performance/foreach_scaling.cpp
+26-18
diff --git a/‎libs/core/compute_local/include/hpx/compute_local/host/target.hpp
+5-5 b/‎libs/core/compute_local/include/hpx/compute_local/host/target.hpp
+5-5
diff --git a/‎libs/core/compute_local/src/host_target.cpp
+17-6 b/‎libs/core/compute_local/src/host_target.cpp
+17-6
@@ -35,7 +35,7 @@ std::uint64_t averageout_plain_for(std::size_t vector_size)
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -52,7 +52,7 @@ std::uint64_t averageout_plain_for_iter(std::size_t vector_size)
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -72,7 +72,7 @@ std::uint64_t averageout_parallel_foreach(
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -92,7 +92,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)
 
     if (num_overlapping_loops <= 0)
     {
-        std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+        std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
         for (auto i = 0; i < test_count; i++)
             measure_task_foreach(data_representation, exec).wait();
@@ -103,7 +103,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)
     std::vector<hpx::shared_future<void>> tests;
     tests.resize(num_overlapping_loops);
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     for (auto i = 0; i < test_count; i++)
     {
@@ -124,7 +124,7 @@ std::uint64_t averageout_sequential_foreach(std::size_t vector_size)
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -142,7 +142,7 @@ std::uint64_t averageout_parallel_forloop(
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -167,7 +167,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)
 
     if (num_overlapping_loops <= 0)
     {
-        std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+        std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
         for (auto i = 0; i < test_count; i++)
             measure_task_forloop(data_representation, exec).wait();
@@ -178,7 +178,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)
     std::vector<hpx::shared_future<void>> tests;
     tests.resize(num_overlapping_loops);
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     for (auto i = 0; i < test_count; i++)
     {
@@ -199,7 +199,7 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -212,8 +212,8 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
 int hpx_main(hpx::program_options::variables_map& vm)
 {
     // pull values from cmd
-    std::size_t vector_size = vm["vector_size"].as<std::size_t>();
-    bool csvoutput = vm.count("csv_output") != 0;
+    std::size_t const vector_size = vm["vector_size"].as<std::size_t>();
+    bool const csvoutput = vm.count("csv_output") != 0;
     delay = vm["work_delay"].as<int>();
     test_count = vm["test_count"].as<int>();
     chunk_size = vm["chunk_size"].as<int>();
@@ -264,8 +264,8 @@ int hpx_main(hpx::program_options::variables_map& vm)
         std::uint64_t task_time_forloop = 0;
         std::uint64_t seq_time_forloop = 0;
 
-        std::uint64_t plain_time_for = averageout_plain_for(vector_size);
-        std::uint64_t plain_time_for_iter =
+        std::uint64_t const plain_time_for = averageout_plain_for(vector_size);
+        std::uint64_t const plain_time_for_iter =
             averageout_plain_for_iter(vector_size);
 
         if (vm["executor"].as<std::string>() == "forkjoin")
@@ -467,11 +467,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
                       << std::left
                       << "Parallel Scale                    : " << std::right
                       << std::setw(8)
-                      << (double(seq_time_foreach) / par_time_foreach) << "\n"
+                      << (static_cast<double>(seq_time_foreach) /
+                             par_time_foreach)
+                      << "\n"
                       << std::left
                       << "Task Scale                        : " << std::right
                       << std::setw(8)
-                      << (double(seq_time_foreach) / task_time_foreach) << "\n"
+                      << (static_cast<double>(seq_time_foreach) /
+                             task_time_foreach)
+                      << "\n"
                       << std::flush;
 
             std::cout << "-------------Average-(for_loop)----------------\n"
@@ -490,11 +494,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
                       << std::left
                       << "Parallel Scale                    : " << std::right
                       << std::setw(8)
-                      << (double(seq_time_forloop) / par_time_forloop) << "\n"
+                      << (static_cast<double>(seq_time_forloop) /
+                             par_time_forloop)
+                      << "\n"
                       << std::left
                       << "Task Scale                        : " << std::right
                       << std::setw(8)
-                      << (double(seq_time_forloop) / task_time_forloop) << "\n";
+                      << (static_cast<double>(seq_time_forloop) /
+                             task_time_forloop)
+                      << "\n";
         }
     }
 
 
@@ -33,7 +33,7 @@ namespace hpx::compute::host {
             {
             }
 
-            explicit native_handle_type(hpx::threads::mask_type mask)
+            explicit native_handle_type(hpx::threads::mask_type const& mask)
               : mask_(mask)
             {
             }
@@ -58,7 +58,7 @@ namespace hpx::compute::host {
         target() = default;
 
         // Constructs target from a given mask of processing units
-        explicit target(hpx::threads::mask_type mask)
+        explicit target(hpx::threads::mask_type const& mask)
           : handle_(mask)
         {
         }
@@ -74,12 +74,12 @@ namespace hpx::compute::host {
 
         std::pair<std::size_t, std::size_t> num_pus() const;
 
-        constexpr void synchronize() const noexcept
+        static constexpr void synchronize() noexcept
         {
             // nothing to do here...
         }
 
-        hpx::future<void> get_future() const
+        static hpx::future<void> get_future()
         {
             return hpx::make_ready_future();
         }
@@ -98,7 +98,7 @@ namespace hpx::compute::host {
         friend class hpx::serialization::access;
 
         void serialize(serialization::input_archive& ar, unsigned int);
-        void serialize(serialization::output_archive& ar, unsigned int);
+        void serialize(serialization::output_archive& ar, unsigned int) const;
 
         native_handle_type handle_;
     };
 
@@ -21,30 +21,41 @@ namespace hpx::compute::host {
 
     std::pair<std::size_t, std::size_t> target::num_pus() const
     {
-        auto& rp = hpx::resource::get_partitioner();
-        std::size_t num_os_threads = hpx::get_os_thread_count();
+        auto const& rp = hpx::resource::get_partitioner();
+        std::size_t const num_os_threads = hpx::get_os_thread_count();
 
-        hpx::threads::mask_type mask = native_handle().get_device();
-        std::size_t mask_size = hpx::threads::mask_size(mask);
+        hpx::threads::mask_type const mask = native_handle().get_device();
+        std::size_t const mask_size = hpx::threads::mask_size(mask);
+
+        bool found_one = false;
 
         std::size_t num_thread = 0;
         for (/**/; num_thread != num_os_threads; ++num_thread)
         {
             if (hpx::threads::bit_and(
                     mask, rp.get_pu_mask(num_thread), mask_size))
             {
+                found_one = true;
                 break;
             }
         }
-        return std::make_pair(num_thread, hpx::threads::count(mask));
+
+        if (!found_one)
+        {
+            return std::make_pair(static_cast<std::size_t>(-1), 0);
+        }
+
+        return std::make_pair(
+            num_thread, (std::min)(num_os_threads, hpx::threads::count(mask)));
     }
 
     void target::serialize(serialization::input_archive& ar, unsigned int)
     {
         ar >> handle_.mask_;
     }
 
-    void target::serialize(serialization::output_archive& ar, unsigned int)
+    void target::serialize(
+        serialization::output_archive& ar, unsigned int) const
     {
         ar << handle_.mask_;
     }
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ namespace hpx::compute::host {`
`33`	`33`	`{`
`34`	`34`	`}`
`35`	`35`
`36`		`- explicit native_handle_type(hpx::threads::mask_type mask)`
	`36`	`+ explicit native_handle_type(hpx::threads::mask_type const& mask)`
`37`	`37`	`: mask_(mask)`
`38`	`38`	`{`
`39`	`39`	`}`
`@@ -58,7 +58,7 @@ namespace hpx::compute::host {`
`58`	`58`	`target() = default;`
`59`	`59`
`60`	`60`	`// Constructs target from a given mask of processing units`
`61`		`- explicit target(hpx::threads::mask_type mask)`
	`61`	`+ explicit target(hpx::threads::mask_type const& mask)`
`62`	`62`	`: handle_(mask)`
`63`	`63`	`{`
`64`	`64`	`}`
`@@ -74,12 +74,12 @@ namespace hpx::compute::host {`
`74`	`74`
`75`	`75`	`std::pair<std::size_t, std::size_t> num_pus() const;`
`76`	`76`
`77`		`- constexpr void synchronize() const noexcept`
	`77`	`+ static constexpr void synchronize() noexcept`
`78`	`78`	`{`
`79`	`79`	`// nothing to do here...`
`80`	`80`	`}`
`81`	`81`
`82`		`- hpx::future<void> get_future() const`
	`82`	`+ static hpx::future<void> get_future()`
`83`	`83`	`{`
`84`	`84`	`return hpx::make_ready_future();`
`85`	`85`	`}`
`@@ -98,7 +98,7 @@ namespace hpx::compute::host {`
`98`	`98`	`friend class hpx::serialization::access;`
`99`	`99`
`100`	`100`	`void serialize(serialization::input_archive& ar, unsigned int);`
`101`		`- void serialize(serialization::output_archive& ar, unsigned int);`
	`101`	`+ void serialize(serialization::output_archive& ar, unsigned int) const;`
`102`	`102`
`103`	`103`	`native_handle_type handle_;`
`104`	`104`	`};`
Original file line number	Diff line number	Diff line change
`@@ -21,30 +21,41 @@ namespace hpx::compute::host {`
`21`	`21`
`22`	`22`	`std::pair<std::size_t, std::size_t> target::num_pus() const`
`23`	`23`	`{`
`24`		`- auto& rp = hpx::resource::get_partitioner();`
`25`		`- std::size_t num_os_threads = hpx::get_os_thread_count();`
	`24`	`+ auto const& rp = hpx::resource::get_partitioner();`
	`25`	`+ std::size_t const num_os_threads = hpx::get_os_thread_count();`
`26`	`26`
`27`		`- hpx::threads::mask_type mask = native_handle().get_device();`
`28`		`- std::size_t mask_size = hpx::threads::mask_size(mask);`
	`27`	`+ hpx::threads::mask_type const mask = native_handle().get_device();`
	`28`	`+ std::size_t const mask_size = hpx::threads::mask_size(mask);`
	`29`	`+`
	`30`	`+ bool found_one = false;`
`29`	`31`
`30`	`32`	`std::size_t num_thread = 0;`
`31`	`33`	`for (/**/; num_thread != num_os_threads; ++num_thread)`
`32`	`34`	`{`
`33`	`35`	`if (hpx::threads::bit_and(`
`34`	`36`	`mask, rp.get_pu_mask(num_thread), mask_size))`
`35`	`37`	`{`
	`38`	`+ found_one = true;`
`36`	`39`	`break;`
`37`	`40`	`}`
`38`	`41`	`}`
`39`		`- return std::make_pair(num_thread, hpx::threads::count(mask));`
	`42`	`+`
	`43`	`+ if (!found_one)`
	`44`	`+ {`
	`45`	`+ return std::make_pair(static_cast<std::size_t>(-1), 0);`
	`46`	`+ }`
	`47`	`+`
	`48`	`+ return std::make_pair(`
	`49`	`+ num_thread, (std::min)(num_os_threads, hpx::threads::count(mask)));`
`40`	`50`	`}`
`41`	`51`
`42`	`52`	`void target::serialize(serialization::input_archive& ar, unsigned int)`
`43`	`53`	`{`
`44`	`54`	`ar >> handle_.mask_;`
`45`	`55`	`}`
`46`	`56`
`47`		`- void target::serialize(serialization::output_archive& ar, unsigned int)`
	`57`	`+ void target::serialize(`
	`58`	`+ serialization::output_archive& ar, unsigned int) const`
`48`	`59`	`{`
`49`	`60`	`ar << handle_.mask_;`
`50`	`61`	`}`