Skip to content

Commit 08f50e9

Browse files
authored
[VPlan] Use vector tripcount if computable when simplifying conds. (#151034)
Update isConditionTrueViaVFAndUF to use the vector trip count if computable. This is the case when it has been materialized to a constant. Otherwise fall back to the trip count. PR: #151034
1 parent c304a2b commit 08f50e9

File tree

3 files changed

+19
-26
lines changed

3 files changed

+19
-26
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,15 +1429,15 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
14291429
// count is not conveniently available as SCEV so far, so we compare directly
14301430
// against the original trip count. This is stricter than necessary, as we
14311431
// will only return true if the trip count == vector trip count.
1432-
// TODO: Use SCEV for vector trip count once available, to cover cases where
1433-
// vector trip count == UF * VF, but original trip count != UF * VF.
1434-
const SCEV *TripCount =
1435-
vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
1436-
assert(!isa<SCEVCouldNotCompute>(TripCount) &&
1432+
const SCEV *VectorTripCount =
1433+
vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE);
1434+
if (isa<SCEVCouldNotCompute>(VectorTripCount))
1435+
VectorTripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
1436+
assert(!isa<SCEVCouldNotCompute>(VectorTripCount) &&
14371437
"Trip count SCEV must be computable");
14381438
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
1439-
const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
1440-
return SE.isKnownPredicate(CmpInst::ICMP_EQ, TripCount, C);
1439+
const SCEV *C = SE.getElementCount(VectorTripCount->getType(), NumElements);
1440+
return SE.isKnownPredicate(CmpInst::ICMP_EQ, VectorTripCount, C);
14411441
}
14421442

14431443
/// Try to simplify the branch condition of \p Plan. This may restrict the

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,11 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
7373
}
7474

7575
const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
76-
if (V->isLiveIn())
77-
return SE.getSCEV(V->getLiveInIRValue());
76+
if (V->isLiveIn()) {
77+
if (Value *LiveIn = V->getLiveInIRValue())
78+
return SE.getSCEV(LiveIn);
79+
return SE.getCouldNotCompute();
80+
}
7881

7982
// TODO: Support constructing SCEVs for more recipes as needed.
8083
return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())

llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -329,19 +329,14 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
329329
; VF8UF2: [[VECTOR_PH]]:
330330
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
331331
; VF8UF2: [[VECTOR_BODY]]:
332-
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
333-
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
334-
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8
335-
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
332+
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8
333+
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
336334
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
337335
; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
338336
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
339-
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
340337
; VF8UF2-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP2]], [[TMP3]]
341338
; VF8UF2-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP4]])
342-
; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
343-
; VF8UF2-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
344-
; VF8UF2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
339+
; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
345340
; VF8UF2: [[MIDDLE_SPLIT]]:
346341
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
347342
; VF8UF2: [[MIDDLE_BLOCK]]:
@@ -360,7 +355,7 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
360355
; VF8UF2: [[LOOP_LATCH]]:
361356
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
362357
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
363-
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
358+
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
364359
; VF8UF2: [[EXIT]]:
365360
; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
366361
; VF8UF2-NEXT: ret i8 [[RES]]
@@ -372,15 +367,10 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
372367
; VF16UF1: [[VECTOR_PH]]:
373368
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
374369
; VF16UF1: [[VECTOR_BODY]]:
375-
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
376-
; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
377-
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
370+
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1
378371
; VF16UF1-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
379-
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
380372
; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
381-
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
382-
; VF16UF1-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
383-
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
373+
; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
384374
; VF16UF1: [[MIDDLE_SPLIT]]:
385375
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
386376
; VF16UF1: [[MIDDLE_BLOCK]]:
@@ -399,7 +389,7 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
399389
; VF16UF1: [[LOOP_LATCH]]:
400390
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
401391
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
402-
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
392+
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
403393
; VF16UF1: [[EXIT]]:
404394
; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
405395
; VF16UF1-NEXT: ret i8 [[RES]]

0 commit comments

Comments
 (0)