Skip to content

[VPlan] Support VPWidenPointerInductionRecipes with EVL tail folding #152110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8450,11 +8450,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
*Plan, CM.getMinimalBitwidths());
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
// TODO: try to put it close to addActiveLaneMask().
// Discard the plan if it is not EVL-compatible
if (CM.foldTailWithEVL() && !HasScalarVF &&
!VPlanTransforms::runPass(VPlanTransforms::tryAddExplicitVectorLength,
*Plan, CM.getMaxSafeElements()))
break;
if (CM.foldTailWithEVL() && !HasScalarVF)
VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
*Plan, CM.getMaxSafeElements());
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
Expand Down
30 changes: 18 additions & 12 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2180,6 +2180,21 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
"User of VF that we can't transform to EVL.");
Plan.getVF().replaceAllUsesWith(&EVL);

assert(all_of(Plan.getVFxUF().users(),
[&Plan](VPUser *U) {
return match(U, m_c_Binary<Instruction::Add>(
m_Specific(Plan.getCanonicalIV()),
m_Specific(&Plan.getVFxUF()))) ||
isa<VPWidenPointerInductionRecipe>(U);
}) &&
"Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
"increment of the canonical induction.");
Plan.getVFxUF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) {
// Only replace uses in VPWidenPointerInductionRecipe; The increment of the
// canonical induction must not be updated.
return isa<VPWidenPointerInductionRecipe>(U);
});

// Defer erasing recipes till the end so that we don't invalidate the
// VPTypeAnalysis cache.
SmallVector<VPRecipeBase *> ToErase;
Expand Down Expand Up @@ -2315,16 +2330,9 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
/// %NextAVL = sub IVSize nuw %AVL, %OpEVL
/// ...
///
bool VPlanTransforms::tryAddExplicitVectorLength(
void VPlanTransforms::addExplicitVectorLength(
VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) {
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
// The transform updates all users of inductions to work based on EVL, instead
// of the VF directly. At the moment, widened pointer inductions cannot be
// updated, so bail out if the plan contains any.
bool ContainsWidenPointerInductions =
any_of(Header->phis(), IsaPred<VPWidenPointerInductionRecipe>);
if (ContainsWidenPointerInductions)
return false;

auto *CanonicalIVPHI = Plan.getCanonicalIV();
auto *CanIVTy = CanonicalIVPHI->getScalarType();
Expand Down Expand Up @@ -2379,7 +2387,6 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
// TODO: support unroll factor > 1.
Plan.setUF(1);
return true;
}

void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
Expand Down Expand Up @@ -2803,13 +2810,12 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R,
R->replaceAllUsesWith(PtrAdd);

// Create the backedge value for the scalar pointer phi.
Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock();
Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator());
Comment on lines +2813 to +2814
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this related to the EVL change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously the backedge value was created after the first non PHI, which uses VF.

However now that the VF passed might be the defined in the vector loop body (i.e. the VPInstruction::ExplicitVectorLength) this inserts it at the end of the region where it should dominate.

VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF),
DL);
VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});

VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock();
Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator());
VPValue *InductionGEP =
Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind");
ScalarPtrPhi->addOperand(InductionGEP);
Expand Down
7 changes: 3 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,9 @@ struct VPlanTransforms {
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe.
/// VPCanonicalIVPHIRecipe is only used to control the loop after
/// this transformation.
/// \returns true if the transformation succeeds, or false if it doesn't.
static bool
tryAddExplicitVectorLength(VPlan &Plan,
const std::optional<unsigned> &MaxEVLSafeElements);
static void
addExplicitVectorLength(VPlan &Plan,
const std::optional<unsigned> &MaxEVLSafeElements);

// For each Interleave Group in \p InterleaveGroups replace the Recipes
// widening its memory instructions with a single VPInterleaveRecipe at its
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
return VerifyEVLUse(*S, S->getNumOperands() - 1);
})
.Case<VPWidenStoreEVLRecipe, VPReductionEVLRecipe,
VPWidenIntOrFpInductionRecipe>(
VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
.Case<VPScalarIVStepsRecipe>([&](auto *R) {
if (R->getNumOperands() != 3) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 2 x i64> [[TMP13]], splat (i64 1)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP15]]
Expand All @@ -48,6 +47,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1)
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP20]], ptr [[TMP18]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -119,7 +119,6 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP12]], splat (i64 1)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]]
Expand All @@ -128,6 +127,7 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1)
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP17]], ptr [[TMP15]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], splat (i64 2)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]]
Expand All @@ -250,6 +249,7 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[TMP12]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: store <vscale x 2 x ptr> [[VECTOR_GEP]], ptr [[NEXT_GEP]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
Expand Down Expand Up @@ -313,14 +313,14 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], splat (i64 1)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[VECTOR_GEP]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i64 0
; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <vscale x 2 x i1> [[TMP6]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
Expand Down
48 changes: 42 additions & 6 deletions llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,50 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) {
; CHECK-LABEL: define void @test_wide_ptr_induction(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[B]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 8)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[VECTOR_GEP]], ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP10]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[B]], [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[VECTOR_BODY]] ], [ [[B]], [[VECTOR_PH]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[ADDR]], i64 8
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: store ptr [[ADDR]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw nsw i64 [[EVL_BASED_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
Expand All @@ -109,4 +143,6 @@ for.cond.cleanup:
; CHECK: [[META2]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]], [[META1]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]], [[META3]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META1]]}
;.
Original file line number Diff line number Diff line change
@@ -1,29 +1,36 @@
; RUN: opt -passes=loop-vectorize \
; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
; RUN: -mtriple=riscv64 -mattr=+v -S -debug %s 2>&1 | FileCheck %s
; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S -debug %s 2>&1 | FileCheck %s

; REQUIRES: asserts

; Make sure we do not vectorize a loop with a widened pointer induction.
define void @test_wide_pointer_induction(ptr noalias %a, i64 %N) {
; For %for.1, we are fine initially, because the previous value %for.1.next dominates the
; user of %for.1. But for %for.2, we have to sink the user (%for.1.next) past the previous
; value %for.2.next. This however breaks the condition we have for %for.1. We cannot fix
; both first order recurrences and cannot vectorize the loop.
;
; Make sure we don't compute costs if there are no vector VPlans.

; CHECK-NOT: LV: Vector loop of width {{.+}} costs:
;
; CHECK: define void @test_wide_pointer_induction(
; CHECK: define i32 @test(
; CHECK-NOT: vector.body
;
define i32 @test(i32 %N) {
entry:
br label %loop
br label %for.body

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%iv.ptr = phi ptr [ %a, %entry ], [ %iv.ptr.next, %loop ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
store ptr %iv.ptr, ptr %arrayidx, align 8
%iv.next = add nuw nsw i64 %iv, 1
%iv.ptr.next = getelementptr i64, ptr %iv.ptr, i32 1
%exitcond.not = icmp eq i64 %iv.next, %N
br i1 %exitcond.not, label %exit, label %loop
for.body: ; preds = %for.body.preheader, %for.body
%iv = phi i32 [ %inc, %for.body ], [ 10, %entry ]
%for.1 = phi i32 [ %for.1.next, %for.body ], [ 20, %entry ]
%for.2 = phi i32 [ %for.2.next, %for.body ], [ 11, %entry ]
%for.1.next = add nsw i32 %for.2, 1
%for.2.next = shl i32 %for.1, 24
%inc = add nsw i32 %iv, 1
%exitcond = icmp eq i32 %inc, %N
br i1 %exitcond, label %for.cond1.for.end_crit_edge, label %for.body

exit:
ret void
for.cond1.for.end_crit_edge: ; preds = %for.body
%add.lcssa = phi i32 [ %for.1.next, %for.body ]
%sext.lcssa = phi i32 [ %for.2.next, %for.body ]
%res = add i32 %add.lcssa, %sext.lcssa
ret i32 %res
}
Loading