@@ -65,7 +65,8 @@ namespace hpx::parallel::execution::detail {
65
65
template <typename F, typename Ts>
66
66
void do_work_chunk (F&& f, Ts&& ts, std::uint32_t const index) const
67
67
{
68
- #if HPX_HAVE_ITTNOTIFY != 0 && !defined(HPX_HAVE_APEX)
68
+ #if defined(HPX_HAVE_ITTNOTIFY) && HPX_HAVE_ITTNOTIFY != 0 && \
69
+ !defined (HPX_HAVE_APEX)
69
70
static hpx::util::itt::event notify_event (
70
71
" set_value_loop_visitor_static::do_work_chunk(chunking)" );
71
72
@@ -150,7 +151,7 @@ namespace hpx::parallel::execution::detail {
150
151
// Finish the work for one worker thread. If this is not the last worker
151
152
// thread to finish, it will only decrement the counter. If it is the
152
153
// last thread it will call set_exception if there is an exception.
153
- // Otherwise it will call set_value on the shared state.
154
+ // Otherwise, it will call set_value on the shared state.
154
155
void finish () const
155
156
{
156
157
if (--(state->tasks_remaining .data_ ) == 0 )
@@ -438,17 +439,21 @@ namespace hpx::parallel::execution::detail {
438
439
439
440
// Initialize the queues for all worker threads so that worker
440
441
// threads can start stealing immediately when they start.
441
- for (std:: uint32_t worker_thread = 0 ; worker_thread != num_threads;
442
- ++worker_thread )
442
+ if (hint. placement_mode () == placement::breadth_first ||
443
+ hint. placement_mode () == placement::breadth_first_reverse )
443
444
{
444
- if (hint. placement_mode () == placement::breadth_first ||
445
- hint. placement_mode () == placement::breadth_first_reverse )
445
+ for (std:: uint32_t worker_thread = 0 ;
446
+ worker_thread != num_threads; ++worker_thread )
446
447
{
447
448
init_queue_breadth_first (worker_thread, num_chunks);
448
449
}
449
- else
450
+ }
451
+ else
452
+ {
453
+ // the default for this executor is depth-first placement
454
+ for (std::uint32_t worker_thread = 0 ;
455
+ worker_thread != num_threads; ++worker_thread)
450
456
{
451
- // the default for this executor is depth-first placement
452
457
init_queue_depth_first (worker_thread, num_chunks);
453
458
}
454
459
}
@@ -546,8 +551,8 @@ namespace hpx::parallel::execution::detail {
546
551
auto launch_data = generate_launch_data ();
547
552
std::size_t const size = launch_data.size ();
548
553
549
- // Do straight spawning if hierarchical spawning was disabled or we
550
- // have less chunks than our threshold.
554
+ // Do straight spawning if hierarchical spawning was disabled or if
555
+ // we have less chunks than our threshold.
551
556
if (hierarchical_threshold == 0 || hierarchical_threshold >= size)
552
557
{
553
558
for (std::size_t i = 0 ; i != size; ++i)
@@ -558,36 +563,50 @@ namespace hpx::parallel::execution::detail {
558
563
return ;
559
564
}
560
565
561
- auto task = [desc, pool, launch_data](auto b, auto e) {
562
- for (std::size_t i = b; i != e - 1 ; ++i)
566
+ auto task = [desc, pool, launch_data = HPX_MOVE (launch_data)](
567
+ auto b, auto e) mutable {
568
+ HPX_ASSERT (b != e);
569
+ for (std::size_t i = b + 1 ; i != e; ++i)
563
570
{
564
571
auto state = launch_data[i].func .state ;
565
572
state->template do_work_task <false >(desc, pool,
566
- launch_data[i].bind_to_core , launch_data[i].func );
573
+ launch_data[i].bind_to_core ,
574
+ HPX_MOVE (launch_data[i].func ));
567
575
}
568
576
569
- // directly execute last task
570
- auto state = launch_data[e - 1 ].func .state ;
577
+ // directly execute first task
578
+ auto state = launch_data[b ].func .state ;
571
579
state->template do_work_task <true >(
572
- desc, pool, false , launch_data[e - 1 ].func );
580
+ desc, pool, false , HPX_MOVE ( launch_data[b ].func ) );
573
581
};
574
582
575
583
// run task on small stack
576
584
auto post_policy = hpx::execution::experimental::with_stacksize (
577
585
policy, threads::thread_stacksize::small_);
586
+ auto post_policy_hint =
587
+ hpx::execution::experimental::get_hint (post_policy);
588
+ post_policy_hint.mode =
589
+ hpx::threads::thread_schedule_hint_mode::thread;
590
+
578
591
std::size_t start = 0 ;
579
- while (true )
592
+ while (start < size )
580
593
{
594
+ // place the helper thread on the first core of the thread block
595
+ post_policy_hint.hint =
596
+ first_thread + static_cast <std::uint16_t >(start);
597
+ auto core_policy = hpx::execution::experimental::with_hint (
598
+ post_policy, post_policy_hint);
599
+
581
600
auto const stop = start + hierarchical_threshold;
582
601
if (stop > size)
583
602
{
584
603
hpx::detail::post_policy_dispatch<Launch>::call (
585
- post_policy , desc, pool, HPX_MOVE (task), start, size);
604
+ core_policy , desc, pool, HPX_MOVE (task), start, size);
586
605
break ;
587
606
}
588
607
589
608
hpx::detail::post_policy_dispatch<Launch>::call (
590
- post_policy , desc, pool, task, start, stop);
609
+ core_policy , desc, pool, task, start, stop);
591
610
start = stop;
592
611
}
593
612
}
0 commit comments