diff --git a/cscs2016/session6/README.md b/cscs2016/session6/README.md index d18bcf4..b7e8d85 100644 --- a/cscs2016/session6/README.md +++ b/cscs2016/session6/README.md @@ -293,7 +293,7 @@ a reference to the underlying data. at once --- -##Latency V1 +##Latency V0 * Synchronous send and receive of a message * An action is spawned to do nothing other than send a message back @@ -306,8 +306,77 @@ a reference to the underlying data. * Changing the window size has no effect [See the source code](https://github.com/STEllAR-GROUP/tutorials/blob/master/examples/01_latency/latency.cpp#L75) + +* Note the use of DIRECT_ACTION, `serialize_buffer` + +--- +##Latency V1 +* Vector of futures + +* Spawn N actions and store the futures in a vector + +* Wait on the vector of futures until all complete + +* take the time and compute the average for 1 + +* Gives a more realistic answer than v0, but we are not really measuring N + +[See the source code](https://github.com/STEllAR-GROUP/tutorials/blob/master/examples/01_latency/latency.cpp#L111) + +* Note : We are actually measuring a sawtooth from 0 to N + + + + +--- +##Latency V2 +* Simple Atomic Counter and Condition Variable + +* Spawn N messages, each time one returns, increment a counter + +* When the counter reaches N, restart + +* Simple, but still a sawtooth + +[See the source code](https://github.com/STEllAR-GROUP/tutorials/blob/master/examples/01_latency/latency.cpp#L154) + +--- +##Latency V3 +* Sliding Semaphore +* Loop over sends, and track how many are in flight with a sliding semaphore +* This will maintain N in flight using a sliding window, so that when and not >= as the test internally +* we have got past the sawtooth, but there's a nasty bug +* The Nth message may return before the N-1 (or N-2 etc)th message because when multiple +threads are used, on the remote node, one might get suspended by the OS and return after +a later one +* Our semaphore is therefore 'noisy' and we don't have exactly N in flight +* Can segfault if one late message returns after the semaphore goes out of scope +* Add an extra condition variable at the end to make sure we keep semaphore alive +until the last message has returned + * (this also means the timing is correct on the last iteration) + +[See the source code](https://github.com/STEllAR-GROUP/tutorials/blob/master/examples/01_latency/latency.cpp#L211) + --- +##Latency V4 +* Sliding Semaphore with Atomic +* The bug in V3 is caused by the noisy/random return of messages + +* We can easily fix this by using an atomic counter instead of the loop index +for triggering our semaphore. + +* We no longer need the condition variable at the end to prevent segfaults on the +semaphore access. + +[See the source code](https://github.com/STEllAR-GROUP/tutorials/blob/master/examples/01_latency/latency.cpp#L275) + +* V5 : Suggestions welcome for an even better version + +--- class: center, middle ## Next diff --git a/cscs2016/session6/images/sawtooth.jpg b/cscs2016/session6/images/sawtooth.jpg new file mode 100644 index 0000000..78066e8 Binary files /dev/null and b/cscs2016/session6/images/sawtooth.jpg differ diff --git a/examples/01_latency/latency.cpp b/examples/01_latency/latency.cpp index 1d169e3..f60c8c5 100644 --- a/examples/01_latency/latency.cpp +++ b/examples/01_latency/latency.cpp @@ -258,16 +258,13 @@ double receive_v3( } // --------------------------------------------------------------------------------- -// Send a message and receives the reply using a sliding_semaphore to -// track messages in flight. There are always 'window_size' messages in transit +// Send a message and receives the reply using a sliding_semaphore (as per v3) to +// track messages in flight, but we trigger the signal using an atomic counter. +// As before there are always 'window_size' messages in transit // at any time -// Warning : message N might be returned after message N+M because at the remote -// end each message return is triggered on an HPX task which may or may not -// be suspended and delay the current return message. -// This means that when message N completes- we cannot be 100% that 'window_size' -// messages are really in flight, but we get close. Also when the loop terminates -// there may be one or more messages still uncompleted, so we wait for them at the end -// to avoid destroying the CV before it is done with +// The use of the atomic counter means that we always signal with the correct +// number of messages in flight and we do not need an additional wait on the +// condition variable at the end. double receive_v4( hpx::naming::id_type dest, char * send_buffer, @@ -307,14 +304,13 @@ double receive_v4( sem.signal(counter++); } ); - // sem.wait(parcel_count); // parcel_count++; } sem.wait(parcel_count + window_size - 2); - // + // double d = (static_cast(window_size*num_loops)); return (t.elapsed() * 1e6) / (2.0*d); }