-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LAA] Only use inbounds/nusw in isNoWrap if the GEP is dereferenced. #161445
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -800,11 +800,11 @@ class AccessAnalysis { | |||||
typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; | ||||||
|
||||||
AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI, | ||||||
MemoryDepChecker::DepCandidates &DA, | ||||||
DominatorTree &DT, MemoryDepChecker::DepCandidates &DA, | ||||||
PredicatedScalarEvolution &PSE, | ||||||
SmallPtrSetImpl<MDNode *> &LoopAliasScopes) | ||||||
: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE), | ||||||
LoopAliasScopes(LoopAliasScopes) { | ||||||
: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA), | ||||||
PSE(PSE), LoopAliasScopes(LoopAliasScopes) { | ||||||
// We're analyzing dependences across loop iterations. | ||||||
BAA.enableCrossIterationMode(); | ||||||
} | ||||||
|
@@ -928,6 +928,9 @@ class AccessAnalysis { | |||||
/// The LoopInfo of the loop being checked. | ||||||
const LoopInfo *LI; | ||||||
|
||||||
/// The dominator tree of the function. | ||||||
DominatorTree &DT; | ||||||
|
||||||
/// Sets of potentially dependent accesses - members of one set share an | ||||||
/// underlying pointer. The set "CheckDeps" identfies which sets really need a | ||||||
/// dependence check. | ||||||
|
@@ -1009,7 +1012,8 @@ getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, | |||||
/// informating from the IR pointer value to determine no-wrap. | ||||||
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, | ||||||
Value *Ptr, Type *AccessTy, const Loop *L, bool Assume, | ||||||
std::optional<int64_t> Stride = std::nullopt) { | ||||||
std::optional<int64_t> Stride = std::nullopt, | ||||||
DominatorTree *DT = nullptr) { | ||||||
// FIXME: This should probably only return true for NUW. | ||||||
if (AR->getNoWrapFlags(SCEV::NoWrapMask)) | ||||||
return true; | ||||||
|
@@ -1023,8 +1027,22 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, | |||||
// case, the GEP would be poison and any memory access dependent on it would | ||||||
// be immediate UB when executed. | ||||||
if (auto *GEP = dyn_cast_if_present<GetElementPtrInst>(Ptr); | ||||||
GEP && GEP->hasNoUnsignedSignedWrap()) | ||||||
return true; | ||||||
GEP && GEP->hasNoUnsignedSignedWrap()) { | ||||||
// For the above reasoning to apply, the pointer must be dereferenced in | ||||||
// every iteration. | ||||||
if (L->getHeader() == L->getLoopLatch() || | ||||||
any_of(GEP->users(), [L, DT](User *U) { | ||||||
if (!isa<LoadInst, StoreInst>(U)) | ||||||
return false; | ||||||
BasicBlock *UserBB = cast<Instruction>(U)->getParent(); | ||||||
if (DT && !LoopAccessInfo::blockNeedsPredication(UserBB, L, DT)) | ||||||
return true; | ||||||
return UserBB == L->getHeader() || | ||||||
(L->getExitingBlock() == L->getLoopLatch() && | ||||||
UserBB == L->getLoopLatch()); | ||||||
})) | ||||||
return true; | ||||||
} | ||||||
|
||||||
if (!Stride) | ||||||
Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE); | ||||||
|
@@ -1287,7 +1305,7 @@ bool AccessAnalysis::createCheckForAccess( | |||||
} | ||||||
|
||||||
if (!isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy, | ||||||
TheLoop, Assume)) | ||||||
TheLoop, Assume, std::nullopt, &DT)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
return false; | ||||||
} | ||||||
|
||||||
|
@@ -1602,7 +1620,7 @@ std::optional<int64_t> | |||||
llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, | ||||||
const Loop *Lp, | ||||||
const DenseMap<Value *, const SCEV *> &StridesMap, | ||||||
bool Assume, bool ShouldCheckWrap) { | ||||||
bool Assume, bool ShouldCheckWrap, DominatorTree *DT) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Would be good to forbid nullptr here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You cannot place a parameter without a default argument after the one that has a default argument, I think. |
||||||
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); | ||||||
if (PSE.getSE()->isLoopInvariant(PtrScev, Lp)) | ||||||
return 0; | ||||||
|
@@ -1624,7 +1642,7 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, | |||||
if (!ShouldCheckWrap || !Stride) | ||||||
return Stride; | ||||||
|
||||||
if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, Stride)) | ||||||
if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, Stride, DT)) | ||||||
return Stride; | ||||||
|
||||||
LLVM_DEBUG( | ||||||
|
@@ -2041,10 +2059,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( | |||||
BPtr->getType()->getPointerAddressSpace()) | ||||||
return MemoryDepChecker::Dependence::Unknown; | ||||||
|
||||||
std::optional<int64_t> StrideAPtr = | ||||||
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true); | ||||||
std::optional<int64_t> StrideBPtr = | ||||||
getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true); | ||||||
std::optional<int64_t> StrideAPtr = getPtrStride( | ||||||
PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true, DT); | ||||||
std::optional<int64_t> StrideBPtr = getPtrStride( | ||||||
PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true, DT); | ||||||
|
||||||
const SCEV *Src = PSE.getSCEV(APtr); | ||||||
const SCEV *Sink = PSE.getSCEV(BPtr); | ||||||
|
@@ -2621,7 +2639,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, | |||||
} | ||||||
|
||||||
MemoryDepChecker::DepCandidates DepCands; | ||||||
AccessAnalysis Accesses(TheLoop, AA, LI, DepCands, *PSE, LoopAliasScopes); | ||||||
AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE, | ||||||
LoopAliasScopes); | ||||||
|
||||||
// Holds the analyzed pointers. We don't want to call getUnderlyingObjects | ||||||
// multiple times on the same object. If the ptr is accessed twice, once | ||||||
|
@@ -2685,7 +2704,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, | |||||
bool IsReadOnlyPtr = false; | ||||||
Type *AccessTy = getLoadStoreType(LD); | ||||||
if (Seen.insert({Ptr, AccessTy}).second || | ||||||
!getPtrStride(*PSE, AccessTy, Ptr, TheLoop, SymbolicStrides)) { | ||||||
!getPtrStride(*PSE, AccessTy, Ptr, TheLoop, SymbolicStrides, false, | ||||||
true, DT)) { | ||||||
++NumReads; | ||||||
IsReadOnlyPtr = true; | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,18 +17,33 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea | |
; RV32-LABEL: @foo4( | ||
; RV32-NEXT: entry: | ||
; RV32-NEXT: br label [[VECTOR_MEMCHECK:%.*]] | ||
; RV32: vector.scevcheck: | ||
; RV32-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 128, i32 624) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strange check: I wonder why it didn't get constant-folded by SCEV? |
||
; RV32-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 | ||
; RV32-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 | ||
; RV32-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[MUL_RESULT]] | ||
; RV32-NEXT: [[TMP1:%.*]] = icmp ult ptr [[TMP0]], [[A]] | ||
; RV32-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[MUL_OVERFLOW]] | ||
; RV32-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 256, i32 624) | ||
; RV32-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 | ||
; RV32-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 | ||
; RV32-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[MUL_RESULT2]] | ||
; RV32-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[B]] | ||
; RV32-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW3]] | ||
; RV32-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[TMP5]] | ||
; RV32-NEXT: br i1 [[TMP6]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK1:%.*]] | ||
; RV32: vector.memcheck: | ||
; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 79880 | ||
; RV32-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i32 39940 | ||
; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 159752 | ||
; RV32-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] | ||
; RV32-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] | ||
; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i32 79880 | ||
; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B]], i32 159752 | ||
; RV32-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] | ||
; RV32-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] | ||
; RV32-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] | ||
; RV32-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]] | ||
; RV32-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] | ||
; RV32-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] | ||
; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] | ||
; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] | ||
; RV32: vector.ph: | ||
; RV32-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() | ||
; RV32-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 16) | ||
|
@@ -43,25 +58,26 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea | |
; RV32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0 | ||
; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <vscale x 2 x i64> [[VEC_IND]] | ||
; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 [[TMP13]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]] | ||
; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 [[TMP13]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] | ||
; RV32-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100) | ||
; RV32-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 1) | ||
; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[B]], <vscale x 2 x i64> [[TMP15]] | ||
; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 [[TMP16]], <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]] | ||
; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 [[TMP16]], <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]] | ||
; RV32-NEXT: [[TMP17:%.*]] = sitofp <vscale x 2 x i32> [[WIDE_MASKED_GATHER]] to <vscale x 2 x double> | ||
; RV32-NEXT: [[TMP18:%.*]] = fadd <vscale x 2 x double> [[WIDE_MASKED_GATHER6]], [[TMP17]] | ||
; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]] | ||
; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP18]], <vscale x 2 x ptr> align 8 [[TMP19]], <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] | ||
; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP18]], <vscale x 2 x ptr> align 8 [[TMP19]], <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3]], !noalias [[META5]] | ||
; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] | ||
; RV32-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] | ||
; RV32-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 | ||
; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] | ||
; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] | ||
; RV32: middle.block: | ||
; RV32-NEXT: br label [[FOR_END:%.*]] | ||
; RV32: scalar.ph: | ||
; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ] | ||
; RV32-NEXT: br label [[FOR_BODY:%.*]] | ||
; RV32: for.body: | ||
; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] | ||
; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] | ||
; RV32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] | ||
; RV32-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 | ||
; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP21]], 100 | ||
|
@@ -78,7 +94,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea | |
; RV32: for.inc: | ||
; RV32-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 | ||
; RV32-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 | ||
; RV32-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] | ||
; RV32-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]] | ||
; RV32: for.end: | ||
; RV32-NEXT: ret void | ||
; | ||
|
@@ -146,7 +162,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea | |
; RV64: for.inc: | ||
; RV64-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 | ||
; RV64-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 | ||
; RV64-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] | ||
; RV64-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP11:![0-9]+]] | ||
; RV64: for.end: | ||
; RV64-NEXT: ret void | ||
; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if the GEP is used by another GEP, which is then used by a load/store, as opposed to an icmp and then never used by a load/store? Is it safe to bail out like this?