Skip to content

Commit 7250b66

Browse files
authored
[VPlan] Create AVL as a phi from TC -> 0 with EVL tail folding (#151481)
This implements the first half of #151459, by changing the AVL so it's no longer computed as `trip-count - EVL-based IV`, but instead a separate scalar phi that is decremented by EVL each iteration. This shortens the dependency chain for computing the AVL and should eventually allow us to convert the branch condition to `branch-count avl-next, 0`. `simplifyBranchConditionForVFAndUF` had to be updated to prevent a regression because this introduces a VPPhi in the header block.
1 parent b2e5303 commit 7250b66

35 files changed

+323
-162
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,11 +1483,11 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
14831483
auto *CanIVTy = Plan.getCanonicalIV()->getScalarType();
14841484
if (all_of(Header->phis(),
14851485
IsaPred<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1486-
VPFirstOrderRecurrencePHIRecipe>)) {
1486+
VPFirstOrderRecurrencePHIRecipe, VPPhi>)) {
14871487
for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) {
1488-
auto *HeaderPhiR = cast<VPHeaderPHIRecipe>(&HeaderR);
1489-
HeaderPhiR->replaceAllUsesWith(HeaderPhiR->getStartValue());
1490-
HeaderPhiR->eraseFromParent();
1488+
auto *Phi = cast<VPPhiAccessors>(&HeaderR);
1489+
HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1490+
HeaderR.eraseFromParent();
14911491
}
14921492

14931493
VPBlockBase *Preheader = VectorRegion->getSinglePredecessor();
@@ -2292,10 +2292,12 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22922292
/// ...
22932293
/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
22942294
/// [ %NextEVLIV, %vector.body ]
2295-
/// %AVL = sub original TC, %EVLPhi
2295+
/// %AVL = phi [ trip-count, %vector.ph ], [ %NextAVL, %vector.body ]
22962296
/// %VPEVL = EXPLICIT-VECTOR-LENGTH %AVL
22972297
/// ...
2298-
/// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
2298+
/// %OpEVL = cast i32 %VPEVL to IVSize
2299+
/// %NextEVLIV = add IVSize %OpEVL, %EVLPhi
2300+
/// %NextAVL = sub IVSize nuw %AVL, %OpEVL
22992301
/// ...
23002302
///
23012303
/// If MaxSafeElements is provided, the function adds the following recipes:
@@ -2306,12 +2308,14 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
23062308
/// ...
23072309
/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
23082310
/// [ %NextEVLIV, %vector.body ]
2309-
/// %AVL = sub original TC, %EVLPhi
2311+
/// %AVL = phi [ trip-count, %vector.ph ], [ %NextAVL, %vector.body ]
23102312
/// %cmp = cmp ult %AVL, MaxSafeElements
23112313
/// %SAFE_AVL = select %cmp, %AVL, MaxSafeElements
23122314
/// %VPEVL = EXPLICIT-VECTOR-LENGTH %SAFE_AVL
23132315
/// ...
2314-
/// %NextEVLIV = add IVSize (cast i32 %VPEVL to IVSize), %EVLPhi
2316+
/// %OpEVL = cast i32 %VPEVL to IVSize
2317+
/// %NextEVLIV = add IVSize %OpEVL, %EVLPhi
2318+
/// %NextAVL = sub IVSize nuw %AVL, %OpEVL
23152319
/// ...
23162320
///
23172321
bool VPlanTransforms::tryAddExplicitVectorLength(
@@ -2333,9 +2337,12 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
23332337
auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
23342338
EVLPhi->insertAfter(CanonicalIVPHI);
23352339
VPBuilder Builder(Header, Header->getFirstNonPhi());
2336-
// Compute original TC - IV as the AVL (application vector length).
2337-
VPValue *AVL = Builder.createNaryOp(
2338-
Instruction::Sub, {Plan.getTripCount(), EVLPhi}, DebugLoc(), "avl");
2340+
// Create the AVL (application vector length), starting from TC -> 0 in steps
2341+
// of EVL.
2342+
VPPhi *AVLPhi = Builder.createScalarPhi(
2343+
{Plan.getTripCount()}, DebugLoc::getCompilerGenerated(), "avl");
2344+
VPValue *AVL = AVLPhi;
2345+
23392346
if (MaxSafeElements) {
23402347
// Support for MaxSafeDist for correct loop emission.
23412348
VPValue *AVLSafe =
@@ -2362,6 +2369,11 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
23622369
CanonicalIVIncrement->getDebugLoc(), "index.evl.next");
23632370
EVLPhi->addOperand(NextEVLIV);
23642371

2372+
VPValue *NextAVL = Builder.createOverflowingOp(
2373+
Instruction::Sub, {AVLPhi, OpVPEVL}, {/*hasNUW=*/true, /*hasNSW=*/false},
2374+
DebugLoc::getCompilerGenerated(), "avl.next");
2375+
AVLPhi->addOperand(NextAVL);
2376+
23652377
transformRecipestoEVLRecipes(Plan, *VPEVL);
23662378

23672379
// Replace all uses of VPCanonicalIVPHIRecipe by

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
172172
[&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); })
173173
.Case<VPInstruction>([&](const VPInstruction *I) {
174174
if (I->getOpcode() == Instruction::PHI ||
175-
I->getOpcode() == Instruction::ICmp)
175+
I->getOpcode() == Instruction::ICmp ||
176+
I->getOpcode() == Instruction::Sub)
176177
return VerifyEVLUse(*I, 1);
177178
switch (I->getOpcode()) {
178179
case Instruction::Add:

llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
2525
; CHECK: vector.body:
2626
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
2727
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_BODY]] ]
28-
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
28+
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ]
2929
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
3030
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
3131
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]]
@@ -35,6 +35,7 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
3535
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VEC_IND]], ptr align 8 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
3636
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP11]] to i64
3737
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
38+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
3839
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
3940
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
4041
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,15 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
132132
; IF-EVL-OUTLOOP: vector.body:
133133
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
134134
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
135-
; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
135+
; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i32 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
136136
; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true)
137137
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[EVL_BASED_IV]]
138138
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP7]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
139139
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[VP_OP_LOAD]] to <vscale x 4 x i32>
140140
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP9]]
141141
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
142142
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
143+
; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP5]]
143144
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
144145
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
145146
; IF-EVL-OUTLOOP: middle.block:
@@ -185,14 +186,15 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
185186
; IF-EVL-INLOOP: vector.body:
186187
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
187188
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
188-
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
189+
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = phi i32 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
189190
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 8, i1 true)
190191
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[EVL_BASED_IV]]
191192
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP8]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
192193
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
193194
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
194195
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
195196
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
197+
; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[TMP5]], [[TMP6]]
196198
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
197199
; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
198200
; IF-EVL-INLOOP: middle.block:
@@ -350,7 +352,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
350352
; IF-EVL-OUTLOOP: vector.body:
351353
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
352354
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
353-
; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
355+
; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
354356
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
355357
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
356358
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -359,6 +361,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
359361
; IF-EVL-OUTLOOP-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
360362
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
361363
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
364+
; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
362365
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
363366
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
364367
; IF-EVL-OUTLOOP: middle.block:
@@ -398,14 +401,15 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
398401
; IF-EVL-INLOOP: vector.body:
399402
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
400403
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
401-
; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
404+
; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
402405
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
403406
; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
404407
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
405408
; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
406409
; IF-EVL-INLOOP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP13]], i32 [[VEC_PHI]])
407410
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64
408411
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
412+
; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
409413
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
410414
; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
411415
; IF-EVL-INLOOP: middle.block:

llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
117117
; PREDICATED_DATA-WITH-EVL: vector.body:
118118
; PREDICATED_DATA-WITH-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
119119
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
120-
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = sub i32 1024, [[EVL_BASED_IV]]
120+
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = phi i32 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
121121
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
122122
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
123123
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
@@ -139,6 +139,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
139139
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP13]]
140140
; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x ptr> align 1 [[TMP14]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
141141
; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
142+
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]]
142143
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
143144
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
144145
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -325,7 +326,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
325326
; PREDICATED_DATA-WITH-EVL: vector.body:
326327
; PREDICATED_DATA-WITH-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
327328
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
328-
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = sub i32 1024, [[EVL_BASED_IV]]
329+
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = phi i32 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
329330
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
330331
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
331332
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
@@ -363,6 +364,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
363364
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP25]]
364365
; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP18]], <vscale x 16 x ptr> align 1 [[TMP26]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
365366
; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
367+
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]]
366368
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
367369
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
368370
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
3232
; CHECK: vector.body:
3333
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_COND]] ]
3434
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
35-
; CHECK-NEXT: [[AVL:%.*]] = sub i32 9, [[EVL_BASED_IV]]
35+
; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ 9, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[FOR_COND]] ]
3636
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 2, i1 true)
3737
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP11]], i64 0
3838
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -48,6 +48,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
4848
; CHECK-NEXT: [[TMP17:%.*]] = trunc <vscale x 2 x i32> [[TMP16]] to <vscale x 2 x i8>
4949
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> [[TMP17]], <vscale x 2 x ptr> align 1 [[BROADCAST_SPLAT4]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
5050
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]]
51+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP11]]
5152
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT8]]
5253
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9
5354
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]

0 commit comments

Comments
 (0)