23
23
#include " llvm/ADT/SmallVector.h"
24
24
#include " llvm/Analysis/AliasAnalysis.h"
25
25
#include " llvm/Analysis/AliasSetTracker.h"
26
+ #include " llvm/Analysis/AssumeBundleQueries.h"
27
+ #include " llvm/Analysis/AssumptionCache.h"
26
28
#include " llvm/Analysis/LoopAnalysisManager.h"
27
29
#include " llvm/Analysis/LoopInfo.h"
28
30
#include " llvm/Analysis/LoopIterator.h"
@@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
208
210
209
211
// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210
212
// / \p MaxBTC is guaranteed inbounds of the accessed object.
211
- static bool evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
212
- const SCEV *MaxBTC ,
213
- const SCEV *EltSize,
214
- ScalarEvolution &SE ,
215
- const DataLayout &DL ) {
213
+ static bool
214
+ evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR ,
215
+ const SCEV *MaxBTC, const SCEV *EltSize,
216
+ ScalarEvolution &SE, const DataLayout &DL ,
217
+ DominatorTree *DT, AssumptionCache *AC ) {
216
218
auto *PointerBase = SE.getPointerBase (AR->getStart ());
217
219
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218
220
if (!StartPtr)
219
221
return false ;
222
+ const Loop *L = AR->getLoop ();
220
223
bool CheckForNonNull, CheckForFreed;
221
- uint64_t DerefBytes = StartPtr->getValue ()->getPointerDereferenceableBytes (
224
+ Value *StartPtrV = StartPtr->getValue ();
225
+ uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes (
222
226
DL, CheckForNonNull, CheckForFreed);
223
227
224
- if (CheckForNonNull || CheckForFreed)
228
+ if (DerefBytes && ( CheckForNonNull || CheckForFreed) )
225
229
return false ;
226
230
227
231
const SCEV *Step = AR->getStepRecurrence (SE);
232
+ Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233
+ const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
234
+
235
+ // Check if we have a suitable dereferencable assumption we can use.
236
+ if (!StartPtrV->canBeFreed ()) {
237
+ RetainedKnowledge DerefRK = getKnowledgeValidInContext (
238
+ StartPtrV, {Attribute::Dereferenceable}, *AC,
239
+ L->getLoopPredecessor ()->getTerminator (), DT);
240
+ if (DerefRK) {
241
+ DerefBytesSCEV = SE.getUMaxExpr (
242
+ DerefBytesSCEV, SE.getConstant (WiderTy, DerefRK.ArgValue ));
243
+ }
244
+ }
245
+
246
+ if (DerefBytesSCEV->isZero ())
247
+ return false ;
248
+
228
249
bool IsKnownNonNegative = SE.isKnownNonNegative (Step);
229
250
if (!IsKnownNonNegative && !SE.isKnownNegative (Step))
230
251
return false ;
231
252
232
- Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233
253
Step = SE.getNoopOrSignExtend (Step, WiderTy);
234
254
MaxBTC = SE.getNoopOrZeroExtend (MaxBTC, WiderTy);
235
255
@@ -256,24 +276,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
256
276
const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
257
277
if (!EndBytes)
258
278
return false ;
259
- return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes,
260
- SE.getConstant (WiderTy, DerefBytes));
279
+ return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
261
280
}
262
281
263
282
// For negative steps check if
264
283
// * StartOffset >= (MaxBTC * Step + EltSize)
265
284
// * StartOffset <= DerefBytes.
266
285
assert (SE.isKnownNegative (Step) && " must be known negative" );
267
286
return SE.isKnownPredicate (CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268
- SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset,
269
- SE.getConstant (WiderTy, DerefBytes));
287
+ SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
270
288
}
271
289
272
290
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess (
273
291
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
274
292
const SCEV *MaxBTC, ScalarEvolution *SE,
275
293
DenseMap<std::pair<const SCEV *, Type *>,
276
- std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
294
+ std::pair<const SCEV *, const SCEV *>> *PointerBounds,
295
+ DominatorTree *DT, AssumptionCache *AC) {
277
296
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
278
297
if (PointerBounds) {
279
298
auto [Iter, Ins] = PointerBounds->insert (
@@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
308
327
// sets ScEnd to the maximum unsigned value for the type. Note that LAA
309
328
// separately checks that accesses cannot not wrap, so unsigned max
310
329
// represents an upper bound.
311
- if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE,
312
- DL )) {
330
+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
331
+ DT, AC )) {
313
332
ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
314
333
} else {
315
334
ScEnd = SE->getAddExpr (
@@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
356
375
bool NeedsFreeze) {
357
376
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
358
377
const SCEV *BTC = PSE.getBackedgeTakenCount ();
359
- const auto &[ScStart, ScEnd] =
360
- getStartAndEndForAccess ( Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361
- PSE. getSE (), & DC.getPointerBounds ());
378
+ const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
379
+ Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE. getSE () ,
380
+ &DC. getPointerBounds (), DC. getDT (), DC.getAC ());
362
381
assert (!isa<SCEVCouldNotCompute>(ScStart) &&
363
382
!isa<SCEVCouldNotCompute>(ScEnd) &&
364
383
" must be able to compute both start and end expressions" );
@@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
1961
1980
const SCEV *BTC = PSE.getBackedgeTakenCount ();
1962
1981
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
1963
1982
ScalarEvolution &SE = *PSE.getSE ();
1964
- const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess (
1965
- InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
1983
+ const auto &[SrcStart_, SrcEnd_] =
1984
+ getStartAndEndForAccess (InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
1985
+ &SE, &PointerBounds, DT, AC);
1966
1986
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
1967
1987
return false ;
1968
1988
1969
- const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess (
1970
- InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
1989
+ const auto &[SinkStart_, SinkEnd_] =
1990
+ getStartAndEndForAccess (InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
1991
+ &SE, &PointerBounds, DT, AC);
1971
1992
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
1972
1993
isa<SCEVCouldNotCompute>(SinkEnd_))
1973
1994
return false ;
@@ -3002,7 +3023,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
3002
3023
const TargetTransformInfo *TTI,
3003
3024
const TargetLibraryInfo *TLI, AAResults *AA,
3004
3025
DominatorTree *DT, LoopInfo *LI,
3005
- bool AllowPartial)
3026
+ AssumptionCache *AC, bool AllowPartial)
3006
3027
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3007
3028
PtrRtChecking (nullptr ), TheLoop(L), AllowPartial(AllowPartial) {
3008
3029
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
@@ -3012,8 +3033,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
3012
3033
MaxTargetVectorWidthInBits =
3013
3034
TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
3014
3035
3015
- DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
3016
- MaxTargetVectorWidthInBits);
3036
+ DepChecker = std::make_unique<MemoryDepChecker>(
3037
+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3017
3038
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3018
3039
if (canAnalyzeLoop ())
3019
3040
CanVecMem = analyzeLoop (AA, LI, TLI, DT);
@@ -3082,7 +3103,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
3082
3103
// or if it was created with a different value of AllowPartial.
3083
3104
if (Inserted || It->second ->hasAllowPartial () != AllowPartial)
3084
3105
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3085
- &LI, AllowPartial);
3106
+ &LI, AC, AllowPartial);
3086
3107
3087
3108
return *It->second ;
3088
3109
}
@@ -3125,7 +3146,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
3125
3146
auto &LI = FAM.getResult <LoopAnalysis>(F);
3126
3147
auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
3127
3148
auto &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
3128
- return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI);
3149
+ auto &AC = FAM.getResult <AssumptionAnalysis>(F);
3150
+ return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI, &AC);
3129
3151
}
3130
3152
3131
3153
AnalysisKey LoopAccessAnalysis::Key;
0 commit comments