-
Notifications
You must be signed in to change notification settings - Fork 3
First draft for the section on atomics #34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
b39e750
57d9e9f
1118350
202c636
ec614b4
5a8b7b1
a43f031
83b62b1
069c84a
2483079
22b7c21
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -69,6 +69,8 @@ | |||||
| row{1}={bg=lightmain}, | ||||||
| } | ||||||
| } | ||||||
| \colorlet{thread1}{gray!25} | ||||||
| \colorlet{thread6}{example!25} | ||||||
|
|
||||||
| \graphicspath{{../../images/}} | ||||||
|
|
||||||
|
|
@@ -648,12 +650,267 @@ \section{Subviews} | |||||
|
|
||||||
| \section{Atomics} | ||||||
|
|
||||||
| \begin{frame}[fragile]{Race condition} | ||||||
| Porting a code creating an histogram: | ||||||
PaulGannay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| \begin{columns} | ||||||
| \begin{column}{0.5\linewidth} | ||||||
| \begin{minted}{C++} | ||||||
| double histo[5] = {0}; | ||||||
|
|
||||||
| for (int i=0; i < N; i++) { | ||||||
| histo[i%5] += i; | ||||||
| } | ||||||
| \end{minted} | ||||||
| \end{column} | ||||||
| \pause | ||||||
| \begin{column}{0.5\linewidth} | ||||||
| \begin{minted}{C++} | ||||||
| Kokkos::View<double*> histo(5); | ||||||
| Kokkos::deep_copy(histo, 0); | ||||||
|
||||||
|
|
||||||
| Kokkos::parallel_for( | ||||||
| Kokkos::RangePolicy(0,N), | ||||||
| KOKKOS_LAMBDA(int i) { | ||||||
| histo(i%5) += i; | ||||||
| }); | ||||||
| \end{minted} | ||||||
| \end{column} | ||||||
| \end{columns} | ||||||
| \end{frame} | ||||||
|
|
||||||
| \begin{frame}[fragile]{Race condition} | ||||||
| \begin{columns} | ||||||
| \begin{column}{0.5\linewidth} | ||||||
| Even simple instructions like increment are decomposed into several smaller assembly instructions: | ||||||
| \begin{minted}{C++} | ||||||
| histo(i%5) += i; | ||||||
| \end{minted} | ||||||
| \end{column} | ||||||
| \begin{column}{0.5\linewidth} | ||||||
| \SetTblrInner{rowsep=0pt} | ||||||
| \begin{tblr}{colspec={cccc},rowspec={Q[lightmain]Q[white]Q[thread1]Q[thread1]Q[thread1]Q[white]Q[thread6]Q[thread6]Q[thread6]Q[white]}} | ||||||
| \textbf{Thread 1} & \textbf{Thread 6} & & \textbf{res} \\ | ||||||
| & & & 0 \\ | ||||||
| read value & & ← & 0 \\ | ||||||
| add 1 & & & 0 \\ | ||||||
| write value & & → & 1 \\ | ||||||
| & & & 1 \\ | ||||||
| & read value & ← & 1 \\ | ||||||
| & add 6 & & 1 \\ | ||||||
| & write value & → & 7 \\ | ||||||
| & & & 7 \\ | ||||||
| \end{tblr} | ||||||
| \end{column} | ||||||
| \end{columns} | ||||||
| \end{frame} | ||||||
|
|
||||||
| % Trainee could play with the following program to check that it really present a race condition: | ||||||
| %#include <iostream> | ||||||
| %#include <Kokkos_Core.hpp> | ||||||
| % | ||||||
| %int main(int argc, char *argv[]) { | ||||||
| % Kokkos::initialize(argc, argv); | ||||||
| % { | ||||||
| % const int N = 10000; | ||||||
| % Kokkos::View<double*> v("v", N); | ||||||
| % Kokkos::deep_copy(v, 4); | ||||||
| % | ||||||
| % Kokkos::View<double> res("res", N); | ||||||
| % | ||||||
| % Kokkos::parallel_for(Kokkos::RangePolicy(0, N), | ||||||
| % KOKKOS_LAMBDA(int i) { | ||||||
| % //Kokkos::atomic_add(&res(), v(i)); | ||||||
| % res() = res() + v(i); | ||||||
| % }); | ||||||
| % | ||||||
| % double res_; | ||||||
| % | ||||||
| % deep_copy(res_, res); | ||||||
| % | ||||||
| % std::cout << "res_:" << res_ << std::endl; | ||||||
| % std::cout << "4*N:" << 4*N << std::endl; | ||||||
| % } | ||||||
| % Kokkos::finalize(); | ||||||
| %} | ||||||
|
|
||||||
| \begin{frame}[fragile]{Race condition} | ||||||
| \begin{columns} | ||||||
| \begin{column}{0.5\linewidth} | ||||||
| Execution between threads is independent. There is no guarantee over the order of instructions: | ||||||
| \begin{minted}{C++} | ||||||
| histo(i%5) += i; | ||||||
| \end{minted} | ||||||
|
|
||||||
| When several threads are accessing the same data, it will generate \structure{race conditions}. | ||||||
PaulGannay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| \end{column} | ||||||
| \begin{column}{0.5\linewidth} | ||||||
| \SetTblrInner{rowsep=0pt} | ||||||
| \begin{tblr}{colspec={cccc},rowspec={Q[lightmain]Q[white]Q[thread1]Q[thread6]Q[thread1]Q[thread1]Q[white]Q[thread6]Q[thread6]Q[white]}} | ||||||
| \textbf{Thread 1} & \textbf{Thread 6} & & \textbf{res} \\ | ||||||
| & & & 0 \\ | ||||||
| read value & & ← & 0 \\ | ||||||
| & read value & ← & 0 \\ | ||||||
| add 1 & & & 0 \\ | ||||||
| write value & & → & 1 \\ | ||||||
| & & & 1 \\ | ||||||
| & add 6 & & 1 \\ | ||||||
| & write value & → & 6 \\ | ||||||
| & & & 6 \\ | ||||||
| \end{tblr} | ||||||
| \end{column} | ||||||
| \end{columns} | ||||||
| \end{frame} | ||||||
|
|
||||||
| \begin{frame}[fragile]{Atomic operation} | ||||||
| Replacing the addition with its atomic counterpart solve the problem: | ||||||
PaulGannay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| \begin{columns} | ||||||
| \begin{column}{0.40\linewidth} | ||||||
| \begin{minted}{C++} | ||||||
| Kokkos::parallel_for( | ||||||
| Kokkos::RangePolicy(0,N), | ||||||
| KOKKOS_LAMBDA(int i) { | ||||||
| histo(i%5) += i; | ||||||
| }); | ||||||
| \end{minted} | ||||||
| \end{column} | ||||||
| \begin{column}{0.56\linewidth} | ||||||
| \begin{minted}{C++} | ||||||
| Kokkos::parallel_for( | ||||||
| Kokkos::RangePolicy(0,N), | ||||||
| KOKKOS_LAMBDA(int i) { | ||||||
| Kokkos::atomic_add(&histo(i%5), i); | ||||||
| }); | ||||||
| \end{minted} | ||||||
| \end{column} | ||||||
| \end{columns} | ||||||
| Note that the \texttt{atomic\_add} instruction takes a pointer and not a | ||||||
PaulGannay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| reference as first argument, as the instruction needs to have an access to | ||||||
| the actual memory address of the modified variable. | ||||||
| \end{frame} | ||||||
|
|
||||||
| \begin{frame}[fragile]{Atomic operation} | ||||||
| \begin{columns} | ||||||
| \begin{column}{0.55\linewidth} | ||||||
| \texttt{atomic\_add} executes the \texttt{Write}, \texttt{Read} and \texttt{Add} in a single atomic step, | ||||||
|
||||||
| guarantying the absence of race condition during the operation: | ||||||
PaulGannay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| \begin{minted}{C++} | ||||||
| Kokkos::atomic_add(&histo(i%5), i); | ||||||
| \end{minted} | ||||||
| \end{column} | ||||||
| \begin{column}{0.5\linewidth} | ||||||
| \noindent Either: | ||||||
|
|
||||||
| \vspace{0.5em} | ||||||
| \SetTblrInner{rowsep=0pt} | ||||||
| \begin{tblr}{colspec={cccc},rowspec={Q[lightmain]Q[white]Q[thread1]Q[thread6]}} | ||||||
| \textbf{Thread 1} & \textbf{Thread 6} & & \textbf{res} \\ | ||||||
| & & & 0 \\ | ||||||
| atomic add & & ←→ & 1 \\ | ||||||
| & atomic add & ←→ & 7 \\ | ||||||
| & & & 7 \\ | ||||||
| \end{tblr} | ||||||
| \pause | ||||||
| \vspace{0.5em} | ||||||
|
|
||||||
| \noindent Or: | ||||||
|
|
||||||
| \vspace{0.5em} | ||||||
| \SetTblrInner{rowsep=0pt} | ||||||
| \begin{tblr}{colspec={cccc},rowspec={Q[lightmain]Q[white]Q[thread6]Q[thread1]}} | ||||||
| \textbf{Thread 1} & \textbf{Thread 6} & & \textbf{res} \\ | ||||||
| & & & 0 \\ | ||||||
| & atomic add & ←→ & 6 \\ | ||||||
| atomic add & & ←→ & 7 \\ | ||||||
| & & & 7 \\ | ||||||
| \end{tblr} | ||||||
| \end{column} | ||||||
| \end{columns} | ||||||
| \end{frame} | ||||||
|
|
||||||
| \begin{frame}[fragile]{Operations} | ||||||
| \begin{columns} | ||||||
| \begin{column}{0.35\linewidth} | ||||||
| Other common operations are available with the format \texttt{Kokkos::atomic\_[op]}: | ||||||
| \end{column} | ||||||
| \begin{column}{0.75\linewidth} | ||||||
| \SetTblrInner{rowsep=0pt} | ||||||
| \begin{tblr}[theme=kokkostable]{lc} | ||||||
| Operation & Replaces \\ | ||||||
| \texttt{Kokkos::atomic\_add(\&x, y)} & \texttt{x += y} \\ | ||||||
| \texttt{Kokkos::atomic\_and(\&x, y)} & \texttt{x \&= y} \\ | ||||||
| \texttt{Kokkos::atomic\_dec(\&x)} & \texttt{x--} \\ | ||||||
| \texttt{Kokkos::atomic\_inc(\&x)} & \texttt{x++} \\ | ||||||
| \texttt{Kokkos::atomic\_lshift(\&x, y)} & \texttt{x = x << y} \\ | ||||||
| \texttt{Kokkos::atomic\_max(\&x, y)} & \texttt{x = std::max(x, y)} \\ | ||||||
| \texttt{Kokkos::atomic\_min(\&x, y)} & \texttt{x = std::min(x, y)} \\ | ||||||
| \texttt{Kokkos::atomic\_mod(\&x, y)} & \texttt{x \%= y} \\ | ||||||
| \texttt{Kokkos::atomic\_nand(\&x, y)} & \texttt{x = !(x \&\& y)} \\ | ||||||
| \texttt{Kokkos::atomic\_or(\&x, y)} & \texttt{x |= y} \\ | ||||||
| \texttt{Kokkos::atomic\_rshift(\&x, y)} & \texttt{x = x >> y} \\ | ||||||
| \texttt{Kokkos::atomic\_sub(\&x, y)} & \texttt{x -= y} \\ | ||||||
| \texttt{Kokkos::atomic\_store(\&x, y)} & \texttt{x = y} \\ | ||||||
| \texttt{Kokkos::atomic\_xor(\&x, y)} & \texttt{x \^{}= y} \\ | ||||||
| \end{tblr} | ||||||
| \end{column} | ||||||
| \end{columns} | ||||||
| \end{frame} | ||||||
|
|
||||||
| \begin{frame}[fragile]{Atomic Memory Trait} | ||||||
| \begin{columns} | ||||||
| \begin{column}{0.50\linewidth} | ||||||
| \begin{itemize} | ||||||
| \item If you need to access a View exclusively through atomic operation, you | ||||||
PaulGannay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| can also create an alias for this View with the \texttt{Atomic} \texttt{MemoryTraits} | ||||||
PaulGannay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| \item It guaranties that any operation done through the alias are done atomically | ||||||
| \end{itemize} | ||||||
| \end{column} | ||||||
| \begin{column}{0.50\linewidth} | ||||||
| \begin{minted}{C++} | ||||||
| Kokkos::View<double*> histo(5); | ||||||
| Kokkos::deep_copy(histo, 0); | ||||||
|
|
||||||
| View<int*, MemoryTraits<Atomic>> | ||||||
PaulGannay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| histo_atomic = histo; | ||||||
|
|
||||||
| Kokkos::parallel_for( | ||||||
| Kokkos::RangePolicy(0,N), | ||||||
| KOKKOS_LAMBDA(int i) { | ||||||
| histo_atomic(i%5) += i; | ||||||
| }); | ||||||
| \end{minted} | ||||||
| \end{column} | ||||||
| \end{columns} | ||||||
| \end{frame} | ||||||
|
|
||||||
| \begin{frame}[fragile]{Performances} | ||||||
| \begin{columns} | ||||||
| \begin{column}{0.45\linewidth} | ||||||
| Atomics can have a huge impact on performance: | ||||||
| \begin{itemize} | ||||||
| \item The instruction itself is slower than the one it replaces | ||||||
| \item They may generate extra synchronisation points | ||||||
| \item They bypass and invalidate cache lines | ||||||
| \end{itemize} | ||||||
| \end{column} | ||||||
| \begin{column}{0.55\linewidth} | ||||||
| \begin{block} | ||||||
|
||||||
| \begin{block} | |
| \begin{block}{Remarks} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would select two tones of gray instead
Or plainly use colors:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I initially tried with the different levels of gray but found it hard to read, especially on slide 30.
The light red + light blue looks nice in colour but is harder to differentiate in B&W.
I'll do the change, we'll revert it if you think readability in B&W is important.