-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathRMC.cpp
1687 lines (1489 loc) · 55.3 KB
/
RMC.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) 2014-2017 Michael J. Sullivan
// Use of this source code is governed by an MIT-style license that can be
// found in the LICENSE file.
// BUG: this whole thing depends on the specifics of how the clang version I
// am using emits llvm bitcode for the hacky RMC protocol.
// BUG: the handling of of the edge cut map is bogus. Right now we are
// working around this by only ever having syncs at the start of
// blocks.
#include "RMCInternal.h"
#include "PathCache.h"
#include <llvm/Pass.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/InlineAsm.h>
#include <llvm/IR/CFG.h>
#include <llvm/IR/InstIterator.h>
#include <llvm/InitializePasses.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Transforms/Utils.h>
#include <llvm/ADT/ArrayRef.h>
#include <llvm/ADT/SmallVector.h>
#include <llvm/ADT/DenseMap.h>
#include <llvm/ADT/SmallPtrSet.h>
#include <llvm/ADT/iterator_range.h>
#include <llvm/IR/Dominators.h>
#include <llvm/Analysis/LoopInfo.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/IR/LegacyPassManager.h>
#include <ostream>
#include <fstream>
#include <sstream>
#include <iostream>
#include <map>
#include <set>
#undef NDEBUG
#include <assert.h>
// Which data dep hiding strategy to use?
static const bool kUseTransitiveHiding = true;
using namespace llvm;
cl::opt<bool> DebugSpew("rmc-debug-spew",
cl::desc("Enable RMC debug spew"));
static void rmc_error() {
exit(1);
}
///////
// Printing functions I wish didn't have to be here
namespace llvm {
raw_ostream& operator<<(raw_ostream& os, const RMCEdgeType& t) {
switch (t) {
case VisibilityEdge:
os << "v";
break;
case ExecutionEdge:
os << "x";
break;
case PushEdge:
os << "p";
break;
case NoEdge:
os << "no";
break;
default:
os << "?";
break;
}
return os;
}
// ew
#define SPEW_CASE(x) case x: os << #x; break
raw_ostream& operator<<(raw_ostream& os, const ActionType& t) {
switch (t) {
SPEW_CASE(ActionPrePost);
SPEW_CASE(ActionNop);
SPEW_CASE(ActionComplex);
SPEW_CASE(ActionSimpleRead);
SPEW_CASE(ActionSimpleWrites);
SPEW_CASE(ActionSimpleRMW);
SPEW_CASE(ActionGive);
SPEW_CASE(ActionTake);
}
return os;
}
raw_ostream& operator<<(raw_ostream& os, const RMCEdge& e) {
e.print(os);
return os;
}
}
// Compute the transitive closure of the action graph
template <typename Range, typename F>
void transitiveClosure(Range &actions,
RMCEdgeType type,
F merge) {
// Use Warshall's algorithm to compute the transitive closure. More
// or less. I can probably be a little more clever since I have
// adjacency lists, right? But n is small and this is really easy.
for (auto & k : actions) {
for (auto & i : actions) {
for (auto & j : actions) {
// OK, now we need to transitively join all of the ki and ij
// edges by merging the bind sites of each combination.
// (Although probably there is only one of each.)
// If there *aren't* both ik and kj edges, we skip the inner
// loop to avoid empty entries being automagically created.
if (!(i.transEdges[type].count(&k) &&
k.transEdges[type].count(&j))) continue;
for (BasicBlock *bind_ik : i.transEdges[type][&k]) {
for (BasicBlock *bind_kj : k.transEdges[type][&j]) {
BasicBlock *bind_ij = merge(bind_ik, bind_kj);
i.transEdges[type][&j].insert(bind_ij);
}
}
}
}
}
}
///////////////////////////////////////////////////////////////////////////
// It is sort of bogus to make this global.
RMCTarget target;
bool isARM(RMCTarget target) {
return target == TargetARM || target == TargetARMv8;
}
// Generate a unique str that we add to comments in our inline
// assembly to keep llvm from getting clever and merging them.
// This is awful.
std::string uniqueStr() {
// This isn't threadsafe but whatever
static int i = 0;
std::ostringstream buffer;
buffer << i++;
return buffer.str();
}
// We use this wrapper to make sure that llvm won't try to merge the
// inline assembly operations.
InlineAsm *makeAsm(FunctionType *Ty,
const char *AsmString, StringRef Constraints,
bool hasSideEffects) {
return InlineAsm::get(Ty, AsmString + (" #" + uniqueStr()),
Constraints, hasSideEffects);
}
// Some llvm nonsense. I should probably find a way to clean this up.
// do we put ~{dirflag},~{fpsr},~{flags} for the x86 ones? don't think so.
Instruction *makeBarrier(Instruction *to_precede) {
LLVMContext &C = to_precede->getContext();
FunctionType *f_ty = FunctionType::get(FunctionType::getVoidTy(C), false);
InlineAsm *a = makeAsm(f_ty, "# barrier", "~{memory}", true);
return CallInst::Create(a, None, "", to_precede);
}
Instruction *makeSync(Instruction *to_precede) {
LLVMContext &C = to_precede->getContext();
FunctionType *f_ty = FunctionType::get(FunctionType::getVoidTy(C), false);
InlineAsm *a = nullptr;
if (isARM(target)) {
a = makeAsm(f_ty, "dmb ish // sync", "~{memory}", true);
} else if (target == TargetPOWER) {
a = makeAsm(f_ty, "sync # sync", "~{memory}", true);
} else if (target == TargetX86) {
a = makeAsm(f_ty, "mfence # sync", "~{memory}", true);
}
return CallInst::Create(a, None, "", to_precede);
}
Instruction *makeLwsync(Instruction *to_precede) {
LLVMContext &C = to_precede->getContext();
FunctionType *f_ty = FunctionType::get(FunctionType::getVoidTy(C), false);
InlineAsm *a = nullptr;
if (target == TargetARMv8) {
// Because ARM strengthened their memory model to be "Other
// multi-copy atomic", we can fake an lwsync (or at least the
// properties of lwsync we require) by doing an dmb st; dmb ld!
// This actually performs well too!
a = makeAsm(f_ty, "dmb ishld; dmb ishst // lwsync", "~{memory}", true);
} else if (target == TargetARM) {
a = makeAsm(f_ty, "dmb ish // lwsync", "~{memory}", true);
} else if (target == TargetPOWER) {
a = makeAsm(f_ty, "lwsync # lwsync", "~{memory}", true);
} else if (target == TargetX86) {
a = makeAsm(f_ty, "# lwsync", "~{memory}", true);
}
return CallInst::Create(a, None, "", to_precede);
}
Instruction *makeDmbSt(Instruction *to_precede) {
LLVMContext &C = to_precede->getContext();
FunctionType *f_ty = FunctionType::get(FunctionType::getVoidTy(C), false);
InlineAsm *a = nullptr;
if (isARM(target)) {
a = makeAsm(f_ty, "dmb ishst // dmb st", "~{memory}", true);
} else if (target == TargetPOWER) {
a = makeAsm(f_ty, "lwsync # dmb st", "~{memory}", true);
} else if (target == TargetX86) {
a = makeAsm(f_ty, "# dmb st", "~{memory}", true);
}
return CallInst::Create(a, None, "", to_precede);
}
Instruction *makeDmbLd(Instruction *to_precede) {
LLVMContext &C = to_precede->getContext();
FunctionType *f_ty = FunctionType::get(FunctionType::getVoidTy(C), false);
InlineAsm *a = nullptr;
if (target == TargetARMv8) {
a = makeAsm(f_ty, "dmb ishld // dmb ld", "~{memory}", true);
} else if (target == TargetARM) {
a = makeAsm(f_ty, "dmb ish // dmb ld", "~{memory}", true);
} else if (target == TargetPOWER) {
a = makeAsm(f_ty, "lwsync # dmb ld", "~{memory}", true);
} else if (target == TargetX86) {
a = makeAsm(f_ty, "# dmb ld", "~{memory}", true);
}
return CallInst::Create(a, None, "", to_precede);
}
Instruction *makeIsync(Instruction *to_precede) {
LLVMContext &C = to_precede->getContext();
FunctionType *f_ty =
FunctionType::get(FunctionType::getVoidTy(C), false);
InlineAsm *a = nullptr;
if (isARM(target)) {
a = makeAsm(f_ty, "isb // isync", "~{memory}", true);
} else if (target == TargetPOWER) {
a = makeAsm(f_ty, "isync # isync", "~{memory}", true);
} else if (target == TargetX86) {
a = makeAsm(f_ty, "# isync", "~{memory}", true);
}
return CallInst::Create(a, None, "", to_precede);
}
Instruction *makeCtrl(Value *v, Instruction *to_precede) {
LLVMContext &C = v->getContext();
FunctionType *f_ty =
FunctionType::get(FunctionType::getVoidTy(C), v->getType(), false);
InlineAsm *a = nullptr;
if (isARM(target)) {
a = makeAsm(f_ty, "cmp $0, $0;beq 1f;1: // ctrl", "r,~{memory},~{cc}", true);
} else if (target == TargetPOWER) {
// Use cr7 so as to not conflict with "." instructions that can only
// write to cr0.
a = makeAsm(f_ty, "cmpw 7, $0, $0;bne- 7, 1f;1: # ctrl", "r,~{memory},~{cr7}", true);
} else if (target == TargetX86) {
a = makeAsm(f_ty, "# ctrl", "r,~{memory}", true);
}
return CallInst::Create(a, v, "", to_precede);
}
Instruction *makeCtrlIsync(Value *v, Instruction *to_precede) {
Instruction *i = makeIsync(to_precede);
makeCtrl(v, i);
return i;
}
Instruction *makeCopy(Value *v, Instruction *to_precede) {
Value *getRealValue(Value *v);
FunctionType *f_ty = FunctionType::get(v->getType(), v->getType(), false);
InlineAsm *a = makeAsm(f_ty, "# bs_copy", "=r,0", false); /* false?? */
return CallInst::Create(a, v,
getRealValue(v)->getName() + ".__rmc_bs_copy",
to_precede);
}
// We also need to add a thing for fake data deps, which is more annoying.
///////////////////////////////////////////////////////////////////////////
//// Some annoying LLVM version specific stuff
#if LLVM_VERSION_MAJOR == 12
LoopInfo &getLoopInfo(const Pass &pass) {
return pass.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
}
BasicBlock *RealizeRMC::splitBlock(BasicBlock *Old, Instruction *SplitPt) {
return llvm::SplitBlock(Old, SplitPt, &domTree_, &loopInfo_);
}
#else
#error Unsupported LLVM version
#endif
bool keepValueNames(Function &func) {
LLVMContext &ctx = func.getContext();
bool discard = ctx.shouldDiscardValueNames();
ctx.setDiscardValueNames(false);
return discard;
}
void restoreValueNames(Function &func, bool discard) {
LLVMContext &ctx = func.getContext();
ctx.setDiscardValueNames(discard);
}
///////////////////////////////////////////////////////////////////////////
//// Code to pull random crap out of LLVM functions
namespace llvm {
StringRef getStringArg(Value *v) {
const Value *g = cast<Constant>(v)->stripPointerCasts();
const Constant *ptr = cast<GlobalVariable>(g)->getInitializer();
StringRef str = cast<ConstantDataSequential>(ptr)->getAsString();
// Drop the '\0' at the end
return str.drop_back();
}
Instruction *getNextInstr(Instruction *i) {
BasicBlock::iterator I(*i);
return ++I == i->getParent()->end() ? nullptr : &*I;
}
Instruction *getNextInsertionPt(Instruction *i) {
BasicBlock::iterator I(*i);
// Sometimes we need to insert something immediately after an invoke
// instruction. In that case we insert it into the normal
// destination block. Since we split critical edges, that can't have
// any predecessors.
if (InvokeInst *invoke = dyn_cast<InvokeInst>(i)) {
I = invoke->getNormalDest()->getFirstInsertionPt();
} else {
assert(!i->isTerminator() && "Can't insert after non-invoke terminators!");
++I;
}
while (isa<LandingPadInst>(I) || isa<PHINode>(I)) ++I;
return &*I;
}
Instruction *getPrevInstr(Instruction *i) {
BasicBlock::iterator I(*i);
return I == i->getParent()->begin() ? nullptr : &*--I;
}
// Sigh. LLVM 3.7 has a method inside BasicBlock for this, but
// earlier ones don't.
BasicBlock *getSingleSuccessor(BasicBlock *bb) {
Instruction *term = bb->getTerminator();
return term->getNumSuccessors() == 1 ? term->getSuccessor(0) : nullptr;
}
}
// Code to detect our inline asm things
bool isInstrInlineAsm(Instruction *i, const char *string) {
if (CallInst *call = dyn_cast_or_null<CallInst>(i)) {
if (InlineAsm *iasm = dyn_cast<InlineAsm>(call->getCalledOperand())) {
if (iasm->getAsmString().find(string) != std::string::npos) {
return true;
}
}
}
return false;
}
bool isInstrIsync(Instruction *i) { return isInstrInlineAsm(i, " isync #"); }
bool isInstrBarrier(Instruction *i) { return isInstrInlineAsm(i, " barrier #");}
void deleteRegisterCall(Instruction *i) {
// Delete a bogus registration call. There might be uses if we didn't mem2reg.
BasicBlock::iterator ii(i);
ReplaceInstWithValue(i->getParent()->getInstList(),
ii, ConstantInt::get(i->getType(), 0));
}
///////////////////////////////////////////////////////////////////////////
//// Actual code for the pass
////////////// RMC analysis routines
Action *RealizeRMC::makePrePostAction(BasicBlock *bb) {
if (bb2action_[bb]) {
assert(bb2action_[bb]->type == ActionPrePost);
return bb2action_[bb];
}
actions_.emplace_back(bb, bb);
Action *a = &actions_.back();
bb2action_[bb] = a;
a->type = ActionPrePost;
return a;
}
Action *RealizeRMC::getPreAction(Action *a) {
return makePrePostAction(a->bb->getSinglePredecessor());
}
Action *RealizeRMC::getPostAction(Action *a) {
return makePrePostAction(getSingleSuccessor(a->outBlock));
}
void registerEdge(std::vector<RMCEdge> &edges,
RMCEdgeType edgeType,
BasicBlock *bindSite,
Action *src, Action *dst) {
// Insert into the graph and the list of edges
src->edges[edgeType][dst].insert(bindSite);
edges.push_back({edgeType, src, dst, bindSite});
}
TinyPtrVector<Action *> RealizeRMC::collectEdges(StringRef name) {
TinyPtrVector<Action *> matches;
if (name == "pre" || name == "post") return matches;
// Since multiple blocks can have the same tag, we search for
// them by name.
// We could make this more efficient by building maps but I don't think
// it is going to matter.
for (auto & a : actions_) {
if (a.name == name) {
matches.push_back(&a);
}
}
if (matches.size() == 0) {
errs() << "Error: use of nonexistent label '" << name
<< "' in function '" << func_.getName() << "'\n";
rmc_error();
}
return matches;
}
void RealizeRMC::processEdge(CallInst *call) {
// Pull out what the operands have to be.
// We just assert if something is wrong, which is not great UX.
uint64_t val = cast<ConstantInt>(call->getOperand(0))
->getValue().getLimitedValue();
RMCEdgeType edgeType = (RMCEdgeType)val; // a bit dubious
StringRef srcName = getStringArg(call->getOperand(1));
StringRef dstName = getStringArg(call->getOperand(2));
uint64_t bindHere = cast<ConstantInt>(call->getOperand(3))
->getValue().getLimitedValue();
auto srcs = collectEdges(srcName);
auto dsts = collectEdges(dstName);
// If bindHere was set, then the binding site is this basic block,
// otherwise it is nullptr to represent outside the function.
BasicBlock *bindSite = bindHere ? call->getParent() : nullptr;
for (auto src : srcs) {
for (auto dst : dsts) {
registerEdge(edges_, edgeType, bindSite, src, dst);
}
}
// Handle pre and post edges now
if (srcName == "pre") {
for (auto dst : dsts) {
Action *src = getPreAction(dst);
registerEdge(edges_, edgeType, bindSite, src, dst);
}
}
if (dstName == "post") {
for (auto src : srcs) {
Action *dst = getPostAction(src);
registerEdge(edges_, edgeType, bindSite, src, dst);
}
}
}
bool RealizeRMC::processPush(CallInst *call) {
Action *a = bb2action_[call->getParent()]; // This is dubious.
// Ignore pushes not in actions. Needed to deal with having wrapper
// push functions in Rust/C++. Indicate that we should leave the
// __rmc_push call in place so that if this function gets RMC'd and
// then inlined into another function before it has been RMC'd, it
// properly sees the __rmc_push. Any function that wraps an
// __rmc_push without it being in an action must be always_inline.
if (a) {
// We do explicit pushes by generating a push edge from its
// pre-action to its post-action. Originally we hoped to generate
// push edges based on all "a -vo-> push -xo-> b" triples, but we
// can't: pre and post edges mean we can't actually find them all.
registerEdge(edges_, PushEdge, nullptr, getPreAction(a), getPostAction(a));
return true;
} else {
return false;
}
}
void RealizeRMC::findEdges() {
for (inst_iterator is = inst_begin(func_), ie = inst_end(func_); is != ie;) {
// Grab the instruction and advance the iterator at the start, since
// we might delete the instruction.
Instruction *i = &*is++;
CallInst *call = dyn_cast<CallInst>(i);
if (!call) continue;
Function *target = call->getCalledFunction();
// We look for calls to the bogus functions __rmc_edge_register
// and __rmc_push, pull out the information about them, and delete
// the calls.
if (!target) continue;
if (target->getName() == "__rmc_edge_register") {
processEdge(call);
} else if (target->getName() == "__rmc_push") {
if (!processPush(call)) continue;
} else {
continue;
}
deleteRegisterCall(i);
}
}
// XXX: document this scheme more?
// And think a bit more about whether the disconnect between the
// action location and where things are actually happening can cause
// trouble.
void handleTransfer(Action &info, CallInst *call) {
uint64_t is_take = cast<ConstantInt>(call->getOperand(1))
->getValue().getLimitedValue();
Value *value = call->getOperand(0);
if (is_take) {
// The dependency tracking code wants everything to be an
// instruction. This doesn't obtain when we are trying to trace a
// dependency from a parameter, so in that case we make a dummy
// copy and replace all uses with it.
if (!isa<Instruction>(value)) {
// XXX: won't work if there are uses /before/ the TAKE, which
// there probably shouldn't be, so...
Instruction *newval = makeCopy(value, call);
value->replaceAllUsesWith(newval);
newval->setOperand(0, value);
value = newval;
}
info.type = ActionTake;
info.outgoingDep = value;
} else {
// Get the one Use of the output that we need to make sure to push
// the value to.
// XXX: might be fucked up if we didn't run mem2reg
assert(call->hasOneUse());
info.type = ActionGive;
// XXX: This name is a little misleading since the dep isn't actually
// in this action...
info.incomingDep = &*call->use_begin();
}
// Get rid of the call
BasicBlock::iterator ii(call);
ReplaceInstWithValue(call->getParent()->getInstList(),
ii, value);
}
template <typename T>
bool actionIsSC(T *i) {
return i->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
i->getSyncScopeID() == SyncScope::System;
}
bool actionIsSC(AtomicCmpXchgInst *i) {
return i->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent &&
i->getFailureOrdering() == AtomicOrdering::SequentiallyConsistent &&
i->getSyncScopeID() == SyncScope::System;
}
void analyzeAction(Action &info) {
// Don't analyze the dummy pre/post actions!
if (info.type == ActionPrePost) return;
// We search through info.outBlock instead of info.bb because if the
// action is a multiblock LTAKE, the __rmc_transfer_ call will be in
// the final block. If it doesn't end up being a transfer, then we
// call any action where the parts don't match "complex".
bool allSC = true;
Instruction *soleLoad = nullptr;
for (auto & i : *info.outBlock) {
if (auto *load = dyn_cast<LoadInst>(&i)) {
++info.loads;
soleLoad = &i;
allSC &= actionIsSC(load);
} else if (auto *store = dyn_cast<StoreInst>(&i)) {
++info.stores;
allSC &= actionIsSC(store);
} else if (auto *call = dyn_cast<CallInst>(&i)) {
// If this is a transfer, mark it as such
if (Function *target = call->getCalledFunction()) {
// This is *really* silly. We declare appropriate __rmc_transfer
// functions as needed at use sites, but if this happens
// inside of namespaces, the name gets mangled. So we look
// through the whole string, not just the prefix. Sigh.
if (target->getName().find("__rmc_transfer_") != StringRef::npos) {
handleTransfer(info, call);
return;
}
}
// Don't count functions that don't access memory
// (for example, critically, llvm.dbg.* intrinsics)
if (!(call->getCalledFunction() &&
call->getCalledFunction()->doesNotAccessMemory())) {
++info.calls;
}
allSC = false;
// What else counts as a call? I'm counting fences I guess.
} else if (isa<FenceInst>(i)) {
++info.calls;
allSC = false;
} else if (auto *rmw = dyn_cast<AtomicRMWInst>(&i)) {
++info.RMWs;
soleLoad = &i;
allSC &= actionIsSC(rmw);
} else if (auto *cas = dyn_cast<AtomicCmpXchgInst>(&i)) {
++info.RMWs;
soleLoad = &i;
allSC &= actionIsSC(cas);
}
}
// Now that we know it isn't a transfer,
// if the action has multiple basic blocks, call it Complex
if (info.outBlock != info.bb) {
info.type = ActionComplex;
return;
}
// Now that we're past all the return cases, we can safely call it
// allSC if it is.
info.allSC = allSC;
// Try to characterize what this action does.
// These categories might not be the best.
if (info.loads == 1 && info.stores+info.calls+info.RMWs == 0) {
info.outgoingDep = soleLoad;
info.incomingDep = &soleLoad->getOperandUse(0);
info.type = ActionSimpleRead;
} else if (info.stores >= 1 && info.loads+info.calls+info.RMWs == 0) {
info.type = ActionSimpleWrites;
} else if (info.RMWs == 1 && info.stores+info.loads+info.calls == 0) {
info.outgoingDep = soleLoad;
info.type = ActionSimpleRMW;
} else if (info.RMWs+info.stores+info.loads+info.calls == 0) {
info.type = ActionNop;
} else {
info.type = ActionComplex;
}
}
void RealizeRMC::findActions() {
// First, collect all calls to register actions
SmallPtrSet<CallInst *, 8> registrations;
for (inst_iterator is = inst_begin(func_), ie = inst_end(func_); is != ie;
is++) {
if (CallInst *call = dyn_cast<CallInst>(&*is)) {
if (Function *target = call->getCalledFunction()) {
if (target->getName() == "__rmc_action_register") {
registrations.insert(call);
}
}
}
}
// Now, make the vector of actions and a mapping from BasicBlock *.
// We, somewhat tastelessly, reserve space for 3x the number of
// actions we actually have so that we have space for new pre/post
// actions that we might need to dummy up.
// We need to have all the space reserved in advance so that our
// pointers don't get invalidated when a resize happens.
actions_.reserve(3 * registrations.size());
numNormalActions_ = registrations.size();
for (auto reg : registrations) {
// FIXME: this scheme only works if we've run mem2reg. Otherwise we
// need to chase through the alloca...
assert(reg->hasOneUse());
Instruction *close = cast<Instruction>(*reg->user_begin());
StringRef name = getStringArg(reg->getOperand(0));
// Now that we have found the start and the end of the action,
// split the action into its own (group of) basic blocks so that
// we can work with it more easily.
// Split once to make a start block
BasicBlock *start = splitBlock(reg->getParent(), reg);
start->setName("_rmc_start_" + name);
// Split it again so the start block is empty and we have our main block
BasicBlock *main = splitBlock(start, reg);
main->setName("_rmc_" + name);
// Now split the end to get our tail block
BasicBlock *end = splitBlock(close->getParent(), close);
// Every action needs to have a well-defined single "out block"
// that is the last block of the action that doesn't contain any
// code from after the action (like the end block does). If there
// isn't such a block, we shave off an empty one from the end
// block. Note that we can use an existing out block even in
// multi-block actions as long as there is a unique one.
BasicBlock *out = end->getSinglePredecessor();
if (!out) {
out = end;
end = splitBlock(close->getParent(), close);
out->setName("_rmc_out_" + name);
}
end->setName("_rmc_end_" + name);
// There is a subtly here: further processing of actions can cause
// the out block to get split, leaving our pointer to it wrong
// (since if it gets split we want the *later* block).
// We work around this in a hacky way by storing the *end* block
// instead and then patching them up once we have processed all
// the actions.
actions_.emplace_back(main, end, name.data());
bb2action_[main] = &actions_.back();
deleteRegisterCall(reg);
deleteRegisterCall(close);
}
// Now we need to go fix up our out blocks.
for (auto & action : actions_) {
BasicBlock *out = action.outBlock->getSinglePredecessor();
assert(out);
action.outBlock = out;
}
}
void dumpGraph(std::vector<Action> &actions) {
// Debug spew!!
for (auto & src : actions) {
errs() << "Action: " << src.bb->getName().substr(5) << ": " <<
src.type << "\n";
}
for (auto & src : actions) {
for (auto edgeType : kEdgeTypes) {
for (auto entry : src.transEdges[edgeType]) {
auto *dst = entry.first;
for (auto *bindSite : entry.second) {
errs() << "Edge: " << RMCEdge{edgeType, &src, dst, bindSite} << "\n";
}
}
}
}
errs() << "\n";
}
BasicBlock *mergeBindPoints(DominatorTree &domTree,
BasicBlock *b1, BasicBlock *b2) {
if (!b1 || !b2) return nullptr;
BasicBlock *dom = domTree.findNearestCommonDominator(b1, b2);
assert(dom == b1 || dom == b2); // XXX testing
return dom;
}
// argh, MapVector doesn't have .insert() for ranges
template <typename A, typename B>
void insert(A &to, B &from) { for (auto & x : from) to.insert(x); }
void buildActionGraph(std::vector<Action> &actions, int numReal,
DominatorTree &domTree) {
// Copy the initial edge specifications into the transitive graph
for (auto & a : actions) {
for (auto edgeType : kEdgeTypes) {
insert(a.transEdges[edgeType], a.edges[edgeType]);
}
// Visibility implies execution.
insert(a.transEdges[ExecutionEdge], a.edges[VisibilityEdge]);
// Push implies visibility and execution, but not in a way that we
// need to track explicitly. Because push edges can't be useless,
// they'll never get dropped from the graph, so it isn't important
// to push them into visibility and execution.
// (Although maybe we ought to, for consistency?)
}
auto merge = [&] (BasicBlock *b1, BasicBlock *b2) {
return mergeBindPoints(domTree, b1, b2);
};
// Now compute the closures. We previously ignored pre/post edges,
// which was wrong; was it on to /anything/, though?
//auto realActions = make_range(actions.begin(), actions.begin() + numReal);
for (auto edgeType : kEdgeTypes) {
transitiveClosure(actions, edgeType, merge);
}
}
////////////// Chicanery to handle disguising operands
// Return the copied value if the value is a bs copy, otherwise null
Value *getBSCopyValue(Value *v) {
CallInst *call = dyn_cast<CallInst>(v);
if (!call) return nullptr;
InlineAsm *iasm = dyn_cast<InlineAsm>(call->getCalledOperand());
if (!iasm) return nullptr;
// This is kind of dubious
return iasm->getAsmString().find("# bs_copy #") != StringRef::npos ?
call->getOperand(0) : nullptr;
}
// Look through a possible bs copy to find the real underlying value
Value *getRealValue(Value *v) {
Value *copyVal;
while ((copyVal = getBSCopyValue(v)) != nullptr)
v = copyVal;
return v;
}
// Rewrite an instruction so that an operand is a dummy copy to
// hide information from llvm's optimizer
void hideOperand(Instruction *instr, int i) {
Value *v = instr->getOperand(i);
// Only put in the copy if we haven't done it already.
if (!getBSCopyValue(v)) {
Instruction *dummyCopy = makeCopy(v, instr);
instr->setOperand(i, dummyCopy);
}
}
void hideOperands(Instruction *instr) {
for (unsigned i = 0; i < instr->getNumOperands(); i++) {
hideOperand(instr, i);
}
}
void hideUses(Instruction *instr, User *skip) {
Instruction *dummyCopy = nullptr;
for (auto is = instr->use_begin(), ie = instr->use_end(); is != ie;) {
Use &use = *is++; // Grab and advance, so we can delete
if (use.getUser() != skip && !getBSCopyValue(use.getUser())) {
if (!dummyCopy) {
dummyCopy = makeCopy(instr, getNextInsertionPt(instr));
}
//errs() << "Hiding " << instr->getName() << " as "
// << dummyCopy->getName() << " in " << *use.getUser() << "\n";
use.set(dummyCopy);
}
}
}
//
void enforceBranchOn(BasicBlock *next, ICmpInst *icmp, int idx) {
if (!icmp) return;
// In order to keep LLVM from optimizing our stuff away we
// insert dummy copies of the operands and a compiler barrier in the
// target.
hideOperands(icmp);
Instruction *front = &*next->getFirstInsertionPt();
if (!isInstrBarrier(front)) makeBarrier(front);
}
void enforceAddrDeps(Use *use, std::vector<Instruction *> &trail) {
Instruction *end = cast<Instruction>(use->getUser());
//errs() << "enforcing for: " << *end << "\n";
for (auto is = trail.begin(), ie = trail.end(); is != ie; is++) {
// Hide all the uses except for the other ones in the dep chain
Instruction *next = is+1 != ie ? *(is+1) : end;
hideUses(*is, next);
}
}
// Find everything that transitively depends on some value
using UseSet = SmallPtrSet<Use *, 8>;
UseSet findTransitiveUses(Value *v) {
UseSet seen;
SmallVector<Use *, 8> wl{};
auto processValue = [&] (Value *v) {
for (auto is = v->use_begin(), ie = v->use_end(); is != ie; is++) {
Use *use = &*is;
wl.push_back(use);
}
};
processValue(v);
while (!wl.empty()) {
auto *use = wl.back();
wl.pop_back();
if (seen.count(use)) continue;
seen.insert(use);
processValue(use->getUser());
}
return seen;
}
// We trace through GEP, BitCast, IntToPtr.
// TODO: less heavily restrict what we use?
bool isAddrDepSafe(Value *v) {
return isa<GetElementPtrInst>(v) || isa<BitCastInst>(v) ||
isa<SExtInst>(v) || isa<IntToPtrInst>(v) ||
isa<LoadInst>(v);
}
// A different approach for hiding address deps, in which we find all
// transitive uses and hide operands to uses that could cause trouble.
// (As opposed to hiding *all* uses off the main path of a trail.)
void enforceAddrDeps(Value *src) {
auto uses = findTransitiveUses(src);
for (Use *use : uses) {
Value *v = use->getUser();
// TODO: what else can we allow? Probably a lot.
// We *might* be able to *blacklist*, even?? Though I am afraid.
// But notionally I think all we need worry about is comparisions
// and function calls (because they may be inlined and have
// comparisions).
// Also important to disallow returns and stores, which can propagate
// information to a calling function if *this* function is inlined.
// Disallowing returns is super annoying, though.
// Comparisons against null should work.
if (!isAddrDepSafe(v) && !isa<PHINode>(v) &&
!getBSCopyValue(v)) {
Instruction *instr = dyn_cast<Instruction>(v);
assert(instr);
hideOperand(instr, use->getOperandNo());
}
}
}
////////////// Program analysis that we use
// FIXME: reorganize the namespace stuff?. Or put this in the class.
namespace llvm {
// Look for control dependencies on a read.
bool branchesOn(BasicBlock *bb, Value *load,
ICmpInst **icmpOut, int *outIdx) {
// XXX: make this platform configured; on some platforms maybe an
// atomic cmpxchg does /not/ behave like it branches on the old value
if (isa<AtomicCmpXchgInst>(load) || isa<AtomicRMWInst>(load)) {
if (icmpOut) *icmpOut = nullptr;
if (outIdx) *outIdx = 0;
return true;
}
// TODO: we should be able to follow values through phi nodes,
// since we are path dependent anyways.
BranchInst *br = dyn_cast<BranchInst>(bb->getTerminator());
if (!br || !br->isConditional()) return false;
// TODO: We only check one level of things. Check deeper?
// We pretty heavily restrict what operations we handle here.
// Some would just be wrong (like call), but really icmp is
// the main one, so. Probably we should be able to also
// pick through casts and wideness changes.
ICmpInst *icmp = dyn_cast<ICmpInst>(br->getCondition());
if (!icmp) return false;
int idx = 0;
for (auto v : icmp->operand_values()) {
if (getRealValue(v) == load) {
if (icmpOut) *icmpOut = icmp;
if (outIdx) *outIdx = idx;
return true;
}
++idx;
}
return false;
}
typedef SmallPtrSet<Value *, 4> PendingPhis;
// Look for address dependencies on a read.
template<class F>
bool addrDepsOnSearch(Value *pointer, Value *load,
F reachable,
PendingPhis &phis,
std::vector<std::vector<Instruction *> > *trails) {
Instruction *instr = dyn_cast<Instruction>(pointer);
if (!instr) return false;
unsigned init_size = trails ? trails->size() : 0;
auto extend_trails = [&] (bool ret) {
if (!trails || !ret) return ret;
for (unsigned i = init_size; i < trails->size(); i++) {
(*trails)[i].push_back(instr);
}
return true;
};
if (pointer == load || phis.count(pointer)) {
// Push an empty trail on the back to fill up
if (trails) trails->push_back({});
return extend_trails(true);
} else if (getBSCopyValue(pointer)) {
bool succ = addrDepsOnSearch(getRealValue(pointer),
load, reachable, phis, trails);