-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathReciprocating.c
158 lines (128 loc) · 4.85 KB
/
Reciprocating.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
// Dave Dice and Alex Kogan
// Reciprocating Locks
// PPoPP 2025
// https://doi.org/10.1145/3710848.3710862 (ACM)
// The code below closely follows Listing-1 in the above.
// https://arxiv.org/abs/2501.02380 (Long form)
// The Reciprocating lock gains performance by a controlled (small) amount of unfairness.
#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdatomic.h>
typedef struct _WaitElement {
_Atomic(struct _WaitElement *) Gate CALIGN;
} WaitElement;
static WaitElement * const LOCKEDEMPTY = (WaitElement *)(uintptr_t) 1;
typedef struct {
_Atomic(WaitElement *) Arrivals;
} ReciprocatingLock;
static __thread WaitElement E CALIGN;
static ReciprocatingLock lock CALIGN;
static inline WaitElement * Acquire(ReciprocatingLock * lock, WaitElement ** _EndOfSegment ) {
atomic_store_explicit( &E.Gate, NULL, memory_order_release );
WaitElement * succ = NULL;
WaitElement * EndOfSegment = &E;
WaitElement * const tail = atomic_exchange( &lock->Arrivals, &E );
assert( tail != &E );
if ( tail != NULL ) {
// coerce LOCKEDEMPTY to null
// succ will be our successor when we subsequently release
succ = (WaitElement *)(((uintptr_t) tail) & ~1);
assert(succ != &E);
// contention : waiting phase
// Consider : could use HemLock CTR optimization here and spin using exchange
// That, in turn, would obviate the need to clear Gate at the top of Acquire
// and would avoid the MESI/MOESI/MESIF S->M coherence upgrade.
for ( ;; ) {
EndOfSegment = atomic_load_explicit( &E.Gate, memory_order_acquire );
if ( EndOfSegment != NULL ) break;
Pause();
}
assert( EndOfSegment != &E );
// Detect logical end-of-segment terminus address
if ( succ == EndOfSegment ) {
succ = NULL; // quash
EndOfSegment = LOCKEDEMPTY;
}
}
*_EndOfSegment = EndOfSegment;
return succ;
}
static inline void Release( ReciprocatingLock * lock, WaitElement * EndOfSegment, WaitElement * succ ) {
assert( EndOfSegment != NULL );
assert( atomic_load_explicit(&lock->Arrivals, memory_order_acquire) != NULL );
if ( succ != NULL ) {
assert( atomic_load(&succ->Gate) == NULL );
atomic_store_explicit( &succ->Gate, EndOfSegment, memory_order_release );
return;
}
assert( EndOfSegment == LOCKEDEMPTY || EndOfSegment == &E );
#if 0
WaitElement * v = EndOfSegment;
if ( atomic_compare_exchange_strong_explicit( &lock->Arrivals, &v, NULL, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) {
// uncontended fast-path return
return;
}
#else
if ( atomic_load_explicit( &lock->Arrivals, memory_order_acquire ) == EndOfSegment ) {
WaitElement * v = EndOfSegment;
if ( atomic_compare_exchange_strong_explicit( &lock->Arrivals, &v, NULL, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) {
// uncontended fast-path return
return;
}
}
#endif
WaitElement * w = atomic_exchange( &lock->Arrivals, LOCKEDEMPTY );
assert( w != NULL );
assert( w != LOCKEDEMPTY );
assert( w != &E );
assert( atomic_load_explicit( &w->Gate, memory_order_acquire ) == NULL );
atomic_store_explicit( &w->Gate, EndOfSegment, memory_order_release );
}
static void * Worker( void * arg ) {
TYPE id = (size_t)arg;
uint64_t entry;
#ifdef FAST
unsigned int cnt = 0, oid = id;
#endif // FAST
NCS_DECL;
// Note that E could either reside in thread_lock, or, in this case, on-stack
for ( int r = 0; r < RUNS; r += 1 ) {
RTYPE randomThreadChecksum = 0;
for ( entry = 0; stop == 0; entry += 1 ) {
NCS;
// EndOfSegment and succ reflect context to be passed from Acquire to corresponding Release
// CLH, MCS, and MCSH similarly are _not context-free.
// With a slightly more clever encoding we can readily collapse the 2 fields into just 1.
// But 2 fields is easier for the purpose of explication.
// Could also pass the context via fields in the lock body or into TLS.
// Might also use C++ RAII std::scoped_lock or friends to carry context.
WaitElement * EndOfSegment;
WaitElement * succ = Acquire( &lock, &EndOfSegment );
randomThreadChecksum += CS( id );
Release( &lock, EndOfSegment, succ );
#ifdef FAST
id = startpoint( cnt ); // different starting point each experiment
cnt = cycleUp( cnt, NoStartPoints );
#endif // FAST
} // for
Fai( sumOfThreadChecksums, randomThreadChecksum );
#ifdef FAST
id = oid;
#endif // FAST
entries[r][id] = entry;
Fai( Arrived, 1 );
while ( stop != 0 ) Pause();
Fai( Arrived, -1 );
} // for
return NULL;
} // Worker
void __attribute__((noinline)) ctor() {
atomic_store_explicit( &lock.Arrivals, NULL, memory_order_release );
} // ctor
void __attribute__((noinline)) dtor() {
} // dtor
// Local Variables: //
// tab-width: 4 //
// compile-command: "gcc -Wall -Wextra -std=gnu11 -O3 -DNDEBUG -fno-reorder-functions -DPIN -DAlgorithm=Reciprocating Harness.c -DNDEBUG=1 -lpthread -lm -D`hostname` -DCFMT -DCNT=0" //
// End: //