Skip to content

Commit 2ae996c

Browse files
authored
[LAA] Support assumptions in evaluatePtrAddRecAtMaxBTCWillNotWrap (#147047)
This patch extends the logic added in #128061 to support dereferenceability information from assumptions as well. Unfortunately both assumption cache and the dominator tree need to be threaded through multiple layers to make them available where needed. PR: #147047
1 parent 6da1a09 commit 2ae996c

File tree

8 files changed

+111
-45
lines changed

8 files changed

+111
-45
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,12 @@ class MemoryDepChecker {
180180
const SmallVectorImpl<Instruction *> &Instrs) const;
181181
};
182182

183-
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
183+
MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
184+
DominatorTree *DT, const Loop *L,
184185
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
185186
unsigned MaxTargetVectorWidthInBits)
186-
: PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
187+
: PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
188+
SymbolicStrides(SymbolicStrides),
187189
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
188190

189191
/// Register the ___location (instructions are given increasing numbers)
@@ -288,6 +290,15 @@ class MemoryDepChecker {
288290
return PointerBounds;
289291
}
290292

293+
DominatorTree *getDT() const {
294+
assert(DT && "requested DT, but it is not available");
295+
return DT;
296+
}
297+
AssumptionCache *getAC() const {
298+
assert(AC && "requested AC, but it is not available");
299+
return AC;
300+
}
301+
291302
private:
292303
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
293304
/// applies dynamic knowledge to simplify SCEV expressions and convert them
@@ -296,6 +307,10 @@ class MemoryDepChecker {
296307
/// example we might assume a unit stride for a pointer in order to prove
297308
/// that a memory access is strided and doesn't wrap.
298309
PredicatedScalarEvolution &PSE;
310+
311+
AssumptionCache *AC;
312+
DominatorTree *DT;
313+
299314
const Loop *InnermostLoop;
300315

301316
/// Reference to map of pointer values to
@@ -670,7 +685,7 @@ class LoopAccessInfo {
670685
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
671686
const TargetTransformInfo *TTI,
672687
const TargetLibraryInfo *TLI, AAResults *AA,
673-
DominatorTree *DT, LoopInfo *LI,
688+
DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC,
674689
bool AllowPartial = false);
675690

676691
/// Return true we can analyze the memory accesses in the loop and there are
@@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
922937
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
923938
const SCEV *MaxBTC, ScalarEvolution *SE,
924939
DenseMap<std::pair<const SCEV *, Type *>,
925-
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
940+
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
941+
DominatorTree *DT, AssumptionCache *AC);
926942

927943
class LoopAccessInfoManager {
928944
/// The cache.
@@ -935,12 +951,13 @@ class LoopAccessInfoManager {
935951
LoopInfo &LI;
936952
TargetTransformInfo *TTI;
937953
const TargetLibraryInfo *TLI = nullptr;
954+
AssumptionCache *AC;
938955

939956
public:
940957
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
941958
LoopInfo &LI, TargetTransformInfo *TTI,
942-
const TargetLibraryInfo *TLI)
943-
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
959+
const TargetLibraryInfo *TLI, AssumptionCache *AC)
960+
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {}
944961

945962
LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);
946963

llvm/lib/Analysis/Loads.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(
342342
: SE.getConstantMaxBackedgeTakenCount(L);
343343
}
344344
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
345-
L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr);
345+
L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, &DT, AC);
346346
if (isa<SCEVCouldNotCompute>(AccessStart) ||
347347
isa<SCEVCouldNotCompute>(AccessEnd))
348348
return false;

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 49 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include "llvm/ADT/SmallVector.h"
2424
#include "llvm/Analysis/AliasAnalysis.h"
2525
#include "llvm/Analysis/AliasSetTracker.h"
26+
#include "llvm/Analysis/AssumeBundleQueries.h"
27+
#include "llvm/Analysis/AssumptionCache.h"
2628
#include "llvm/Analysis/LoopAnalysisManager.h"
2729
#include "llvm/Analysis/LoopInfo.h"
2830
#include "llvm/Analysis/LoopIterator.h"
@@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
208210

209211
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210212
/// \p MaxBTC is guaranteed inbounds of the accessed object.
211-
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
212-
const SCEV *MaxBTC,
213-
const SCEV *EltSize,
214-
ScalarEvolution &SE,
215-
const DataLayout &DL) {
213+
static bool
214+
evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
215+
const SCEV *MaxBTC, const SCEV *EltSize,
216+
ScalarEvolution &SE, const DataLayout &DL,
217+
DominatorTree *DT, AssumptionCache *AC) {
216218
auto *PointerBase = SE.getPointerBase(AR->getStart());
217219
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218220
if (!StartPtr)
219221
return false;
222+
const Loop *L = AR->getLoop();
220223
bool CheckForNonNull, CheckForFreed;
221-
uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes(
224+
Value *StartPtrV = StartPtr->getValue();
225+
uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
222226
DL, CheckForNonNull, CheckForFreed);
223227

224-
if (CheckForNonNull || CheckForFreed)
228+
if (DerefBytes && (CheckForNonNull || CheckForFreed))
225229
return false;
226230

227231
const SCEV *Step = AR->getStepRecurrence(SE);
232+
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
233+
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
234+
235+
// Check if we have a suitable dereferencable assumption we can use.
236+
if (!StartPtrV->canBeFreed()) {
237+
RetainedKnowledge DerefRK = getKnowledgeValidInContext(
238+
StartPtrV, {Attribute::Dereferenceable}, *AC,
239+
L->getLoopPredecessor()->getTerminator(), DT);
240+
if (DerefRK) {
241+
DerefBytesSCEV = SE.getUMaxExpr(
242+
DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue));
243+
}
244+
}
245+
246+
if (DerefBytesSCEV->isZero())
247+
return false;
248+
228249
bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
229250
if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
230251
return false;
231252

232-
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
233253
Step = SE.getNoopOrSignExtend(Step, WiderTy);
234254
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
235255

@@ -256,24 +276,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
256276
const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
257277
if (!EndBytes)
258278
return false;
259-
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes,
260-
SE.getConstant(WiderTy, DerefBytes));
279+
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
261280
}
262281

263282
// For negative steps check if
264283
// * StartOffset >= (MaxBTC * Step + EltSize)
265284
// * StartOffset <= DerefBytes.
266285
assert(SE.isKnownNegative(Step) && "must be known negative");
267286
return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268-
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
269-
SE.getConstant(WiderTy, DerefBytes));
287+
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
270288
}
271289

272290
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
273291
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
274292
const SCEV *MaxBTC, ScalarEvolution *SE,
275293
DenseMap<std::pair<const SCEV *, Type *>,
276-
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
294+
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
295+
DominatorTree *DT, AssumptionCache *AC) {
277296
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
278297
if (PointerBounds) {
279298
auto [Iter, Ins] = PointerBounds->insert(
@@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
308327
// sets ScEnd to the maximum unsigned value for the type. Note that LAA
309328
// separately checks that accesses cannot not wrap, so unsigned max
310329
// represents an upper bound.
311-
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE,
312-
DL)) {
330+
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
331+
DT, AC)) {
313332
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
314333
} else {
315334
ScEnd = SE->getAddExpr(
@@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
356375
bool NeedsFreeze) {
357376
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
358377
const SCEV *BTC = PSE.getBackedgeTakenCount();
359-
const auto &[ScStart, ScEnd] =
360-
getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361-
PSE.getSE(), &DC.getPointerBounds());
378+
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
379+
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
380+
&DC.getPointerBounds(), DC.getDT(), DC.getAC());
362381
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
363382
!isa<SCEVCouldNotCompute>(ScEnd) &&
364383
"must be able to compute both start and end expressions");
@@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
19611980
const SCEV *BTC = PSE.getBackedgeTakenCount();
19621981
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
19631982
ScalarEvolution &SE = *PSE.getSE();
1964-
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
1965-
InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
1983+
const auto &[SrcStart_, SrcEnd_] =
1984+
getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
1985+
&SE, &PointerBounds, DT, AC);
19661986
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
19671987
return false;
19681988

1969-
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
1970-
InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
1989+
const auto &[SinkStart_, SinkEnd_] =
1990+
getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
1991+
&SE, &PointerBounds, DT, AC);
19711992
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
19721993
isa<SCEVCouldNotCompute>(SinkEnd_))
19731994
return false;
@@ -3002,7 +3023,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30023023
const TargetTransformInfo *TTI,
30033024
const TargetLibraryInfo *TLI, AAResults *AA,
30043025
DominatorTree *DT, LoopInfo *LI,
3005-
bool AllowPartial)
3026+
AssumptionCache *AC, bool AllowPartial)
30063027
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
30073028
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
30083029
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
@@ -3012,8 +3033,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30123033
MaxTargetVectorWidthInBits =
30133034
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
30143035

3015-
DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
3016-
MaxTargetVectorWidthInBits);
3036+
DepChecker = std::make_unique<MemoryDepChecker>(
3037+
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
30173038
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
30183039
if (canAnalyzeLoop())
30193040
CanVecMem = analyzeLoop(AA, LI, TLI, DT);
@@ -3082,7 +3103,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
30823103
// or if it was created with a different value of AllowPartial.
30833104
if (Inserted || It->second->hasAllowPartial() != AllowPartial)
30843105
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3085-
&LI, AllowPartial);
3106+
&LI, AC, AllowPartial);
30863107

30873108
return *It->second;
30883109
}
@@ -3125,7 +3146,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
31253146
auto &LI = FAM.getResult<LoopAnalysis>(F);
31263147
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
31273148
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
3128-
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI);
3149+
auto &AC = FAM.getResult<AssumptionAnalysis>(F);
3150+
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
31293151
}
31303152

31313153
AnalysisKey LoopAccessAnalysis::Key;

llvm/lib/Transforms/Scalar/LoopFlatten.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,8 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
10091009
// in simplified form, and also needs LCSSA. Running
10101010
// this pass will simplify all loops that contain inner loops,
10111011
// regardless of whether anything ends up being flattened.
1012-
LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
1012+
LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr,
1013+
&AR.AC);
10131014
for (Loop *InnerLoop : LN.getLoops()) {
10141015
auto *OuterLoop = InnerLoop->getParentLoop();
10151016
if (!OuterLoop)

llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
551551
const Function *F = L.getHeader()->getParent();
552552
OptimizationRemarkEmitter ORE(F);
553553

554-
LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
554+
LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr, &LAR.AC);
555555
if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
556556
return PreservedAnalyses::all();
557557
return getLoopPassPreservedAnalyses();

llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ e.1:
505505
ret i32 1
506506
}
507507

508-
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) {
508+
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) nosync nofree {
509509
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption'
510510
; CHECK-NEXT: loop.header:
511511
; CHECK-NEXT: Memory dependences are safe with run-time checks
@@ -518,10 +518,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_kno
518518
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
519519
; CHECK-NEXT: Grouped accesses:
520520
; CHECK-NEXT: Group GRP0:
521-
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
521+
; CHECK-NEXT: (Low: %B High: (2000 + %B))
522522
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
523523
; CHECK-NEXT: Group GRP1:
524-
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
524+
; CHECK-NEXT: (Low: %A High: (2000 + %A))
525525
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
526526
; CHECK-EMPTY:
527527
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -565,7 +565,7 @@ e.2:
565565
ret void
566566
}
567567

568-
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) {
568+
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) nosync nofree {
569569
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small'
570570
; CHECK-NEXT: loop.header:
571571
; CHECK-NEXT: Memory dependences are safe with run-time checks

llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,46 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
99
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
10+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
11+
; CHECK: vector.ph:
1012
; CHECK-NEXT: br label [[LOOP:%.*]]
13+
; CHECK: vector.body:
14+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
15+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
16+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
17+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
18+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
19+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
20+
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
21+
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
22+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
23+
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
24+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
25+
; CHECK: middle.split:
26+
; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
27+
; CHECK: middle.block:
28+
; CHECK-NEXT: br label [[LOOP_END:%.*]]
29+
; CHECK: vector.early.exit:
30+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
31+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]]
32+
; CHECK-NEXT: br label [[LOOP_END]]
33+
; CHECK: scalar.ph:
34+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
35+
; CHECK-NEXT: br label [[LOOP1:%.*]]
1136
; CHECK: loop:
12-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
37+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1338
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
1439
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1540
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
1641
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
1742
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
18-
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
43+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
1944
; CHECK: loop.inc:
2045
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
2146
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
22-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
47+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
2348
; CHECK: loop.end:
24-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ]
49+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VECTOR_EARLY_EXIT]] ]
2550
; CHECK-NEXT: ret i64 [[RETVAL]]
2651
;
2752
entry:

llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class VPlanSlpTest : public VPlanTestIRBase {
4141
AARes.reset(new AAResults(*TLI));
4242
AARes->addAAResult(*BasicAA);
4343
PSE.reset(new PredicatedScalarEvolution(*SE, *L));
44-
LAI.reset(new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI));
44+
LAI.reset(
45+
new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI, &*AC));
4546
IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));
4647
IAI->analyzeInterleaving(false);
4748
return {Plan, *IAI};

0 commit comments

Comments
 (0)