Skip to content

Commit 94a6cd4

Browse files
authored
[VPlan] Expand VPWidenPointerInductionRecipe into separate recipes (#148274)
This is the VPWidenPointerInductionRecipe equivalent of #118638, with the motivation of allowing us to use the EVL as the induction step. There is a new VPInstruction added, WidePtrAdd to allow adding the step vector to the induction phi, since VPInstruction::PtrAdd only handles scalars or multiple scalar lanes. Originally this transformation was copied from the original recipe's execute code, but it's since been simplifed by teaching `unrollWidenInductionByUF` to unroll the recipe, which brings it inline with VPWidenIntOrFpInductionRecipe.
1 parent fbd1864 commit 94a6cd4

File tree

13 files changed

+156
-189
lines changed

13 files changed

+156
-189
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,13 @@ class VPBuilder {
263263
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
264264
GEPNoWrapFlags::inBounds(), DL, Name));
265265
}
266+
VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
267+
DebugLoc DL = DebugLoc::getUnknown(),
268+
const Twine &Name = "") {
269+
return tryInsertInstruction(
270+
new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset},
271+
GEPNoWrapFlags::none(), DL, Name));
272+
}
266273

267274
VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL,
268275
const Twine &Name = "") {

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,21 +1038,6 @@ void VPlan::execute(VPTransformState *State) {
10381038
if (isa<VPWidenPHIRecipe>(&R))
10391039
continue;
10401040

1041-
if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1042-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1043-
"recipe generating only scalars should have been replaced");
1044-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1045-
PHINode *Phi = cast<PHINode>(GEP->getPointerOperand());
1046-
1047-
Phi->setIncomingBlock(1, VectorLatchBB);
1048-
1049-
// Move the last step to the end of the latch block. This ensures
1050-
// consistent placement of all induction updates.
1051-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1052-
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
1053-
continue;
1054-
}
1055-
10561041
auto *PhiR = cast<VPSingleDefRecipe>(&R);
10571042
// VPInstructions currently model scalar Phis only.
10581043
bool NeedsScalar = isa<VPInstruction>(PhiR) ||

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,9 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
991991
// operand). Only generates scalar values (either for the first lane only or
992992
// for all lanes, depending on its uses).
993993
PtrAdd,
994+
// Add a vector offset in bytes (second operand) to a scalar base pointer
995+
// (first operand).
996+
WidePtrAdd,
994997
// Returns a scalar boolean value, which is true if any lane of its
995998
// (boolean) vector operands is true. It produces the reduced value across
996999
// all unrolled iterations. Unrolling will add all copies of its original
@@ -1979,6 +1982,9 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
19791982
/// Update the step value of the recipe.
19801983
void setStepValue(VPValue *V) { setOperand(1, V); }
19811984

1985+
VPValue *getVFValue() { return getOperand(2); }
1986+
const VPValue *getVFValue() const { return getOperand(2); }
1987+
19821988
/// Returns the number of incoming values, also number of incoming blocks.
19831989
/// Note that at the moment, VPWidenPointerInductionRecipe only has a single
19841990
/// incoming value, its start value.
@@ -2068,9 +2074,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
20682074
VPSlotTracker &SlotTracker) const override;
20692075
#endif
20702076

2071-
VPValue *getVFValue() { return getOperand(2); }
2072-
const VPValue *getVFValue() const { return getOperand(2); }
2073-
20742077
VPValue *getSplatVFValue() {
20752078
// If the recipe has been unrolled return the VPValue for the induction
20762079
// increment.
@@ -2106,8 +2109,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
21062109
}
21072110
};
21082111

2109-
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
2110-
public VPUnrollPartAccessor<4> {
2112+
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
21112113
bool IsScalarAfterVectorization;
21122114

21132115
public:
@@ -2136,18 +2138,14 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
21362138
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
21372139

21382140
/// Generate vector values for the pointer induction.
2139-
void execute(VPTransformState &State) override;
2141+
void execute(VPTransformState &State) override {
2142+
llvm_unreachable("cannot execute this recipe, should be expanded via "
2143+
"expandVPWidenPointerInduction");
2144+
};
21402145

21412146
/// Returns true if only scalar values will be generated.
21422147
bool onlyScalarsGenerated(bool IsScalable);
21432148

2144-
/// Returns the VPValue representing the value of this induction at
2145-
/// the first unrolled part, if it exists. Returns itself if unrolling did not
2146-
/// take place.
2147-
VPValue *getFirstUnrolledPartOperand() {
2148-
return getUnrollPart(*this) == 0 ? this : getOperand(3);
2149-
}
2150-
21512149
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
21522150
/// Print the recipe.
21532151
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
128128
return IntegerType::get(Ctx, 1);
129129
case VPInstruction::Broadcast:
130130
case VPInstruction::PtrAdd:
131+
case VPInstruction::WidePtrAdd:
131132
// Return the type based on first operand.
132133
return inferScalarType(R->getOperand(0));
133134
case VPInstruction::BranchOnCond:

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 15 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
478478
case VPInstruction::FirstOrderRecurrenceSplice:
479479
case VPInstruction::LogicalAnd:
480480
case VPInstruction::PtrAdd:
481+
case VPInstruction::WidePtrAdd:
481482
case VPInstruction::WideIVStep:
482483
return 2;
483484
case Instruction::Select:
@@ -858,6 +859,12 @@ Value *VPInstruction::generate(VPTransformState &State) {
858859
Value *Addend = State.get(getOperand(1), VPLane(0));
859860
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
860861
}
862+
case VPInstruction::WidePtrAdd: {
863+
Value *Ptr =
864+
State.get(getOperand(0), vputils::isSingleScalar(getOperand(0)));
865+
Value *Addend = State.get(getOperand(1));
866+
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
867+
}
861868
case VPInstruction::AnyOf: {
862869
Value *Res = State.get(getOperand(0));
863870
for (VPValue *Op : drop_begin(operands()))
@@ -1085,6 +1092,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
10851092
case VPInstruction::Not:
10861093
case VPInstruction::PtrAdd:
10871094
case VPInstruction::WideIVStep:
1095+
case VPInstruction::WidePtrAdd:
10881096
case VPInstruction::StepVector:
10891097
case VPInstruction::ReductionStartVector:
10901098
return false;
@@ -1123,6 +1131,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
11231131
return true;
11241132
case VPInstruction::PtrAdd:
11251133
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
1134+
case VPInstruction::WidePtrAdd:
1135+
return Op == getOperand(0);
11261136
case VPInstruction::ComputeAnyOfResult:
11271137
case VPInstruction::ComputeFindIVResult:
11281138
return Op == getOperand(1);
@@ -1231,6 +1241,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
12311241
case VPInstruction::PtrAdd:
12321242
O << "ptradd";
12331243
break;
1244+
case VPInstruction::WidePtrAdd:
1245+
O << "wide-ptradd";
1246+
break;
12341247
case VPInstruction::AnyOf:
12351248
O << "any-of";
12361249
break;
@@ -1817,7 +1830,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
18171830
return Opcode == Instruction::AShr;
18181831
case OperationType::GEPOp:
18191832
return Opcode == Instruction::GetElementPtr ||
1820-
Opcode == VPInstruction::PtrAdd;
1833+
Opcode == VPInstruction::PtrAdd ||
1834+
Opcode == VPInstruction::WidePtrAdd;
18211835
case OperationType::FPMathOp:
18221836
return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
18231837
Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
@@ -3682,87 +3696,6 @@ bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
36823696
(!IsScalable || vputils::onlyFirstLaneUsed(this));
36833697
}
36843698

3685-
void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
3686-
assert(getInductionDescriptor().getKind() ==
3687-
InductionDescriptor::IK_PtrInduction &&
3688-
"Not a pointer induction according to InductionDescriptor!");
3689-
assert(State.TypeAnalysis.inferScalarType(this)->isPointerTy() &&
3690-
"Unexpected type.");
3691-
assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
3692-
"Recipe should have been replaced");
3693-
3694-
unsigned CurrentPart = getUnrollPart(*this);
3695-
3696-
// Build a pointer phi
3697-
Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3698-
Type *ScStValueType = ScalarStartValue->getType();
3699-
3700-
BasicBlock *VectorPH =
3701-
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
3702-
PHINode *NewPointerPhi = nullptr;
3703-
if (CurrentPart == 0) {
3704-
IRBuilder<>::InsertPointGuard Guard(State.Builder);
3705-
if (State.Builder.GetInsertPoint() !=
3706-
State.Builder.GetInsertBlock()->getFirstNonPHIIt())
3707-
State.Builder.SetInsertPoint(
3708-
State.Builder.GetInsertBlock()->getFirstNonPHIIt());
3709-
NewPointerPhi = State.Builder.CreatePHI(ScStValueType, 2, "pointer.phi");
3710-
NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3711-
NewPointerPhi->setDebugLoc(getDebugLoc());
3712-
} else {
3713-
// The recipe has been unrolled. In that case, fetch the single pointer phi
3714-
// shared among all unrolled parts of the recipe.
3715-
auto *GEP =
3716-
cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3717-
NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3718-
}
3719-
3720-
// A pointer induction, performed by using a gep
3721-
BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3722-
Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3723-
Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3724-
Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3725-
// Add induction update using an incorrect block temporarily. The phi node
3726-
// will be fixed after VPlan execution. Note that at this point the latch
3727-
// block cannot be used, as it does not exist yet.
3728-
// TODO: Model increment value in VPlan, by turning the recipe into a
3729-
// multi-def and a subclass of VPHeaderPHIRecipe.
3730-
if (CurrentPart == 0) {
3731-
// The recipe represents the first part of the pointer induction. Create the
3732-
// GEP to increment the phi across all unrolled parts.
3733-
Value *NumUnrolledElems = State.get(getOperand(2), true);
3734-
3735-
Value *InductionGEP = GetElementPtrInst::Create(
3736-
State.Builder.getInt8Ty(), NewPointerPhi,
3737-
State.Builder.CreateMul(
3738-
ScalarStepValue,
3739-
State.Builder.CreateTrunc(NumUnrolledElems, PhiType)),
3740-
"ptr.ind", InductionLoc);
3741-
3742-
NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3743-
}
3744-
3745-
// Create actual address geps that use the pointer phi as base and a
3746-
// vectorized version of the step value (<step*0, ..., step*N>) as offset.
3747-
Type *VecPhiType = VectorType::get(PhiType, State.VF);
3748-
Value *StartOffsetScalar = State.Builder.CreateMul(
3749-
RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3750-
Value *StartOffset =
3751-
State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3752-
// Create a vector of consecutive numbers from zero to VF.
3753-
StartOffset = State.Builder.CreateAdd(
3754-
StartOffset, State.Builder.CreateStepVector(VecPhiType));
3755-
3756-
assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3757-
"scalar step must be the same across all parts");
3758-
Value *GEP = State.Builder.CreateGEP(
3759-
State.Builder.getInt8Ty(), NewPointerPhi,
3760-
State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3761-
State.VF, ScalarStepValue)),
3762-
"vector.gep");
3763-
State.set(this, GEP);
3764-
}
3765-
37663699
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
37673700
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
37683701
VPSlotTracker &SlotTracker) const {
@@ -3921,11 +3854,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
39213854
Value *Op0 = State.get(getOperand(0));
39223855
Type *VecTy = Op0->getType();
39233856
Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
3924-
// Manually move it with the other PHIs in case PHI recipes above this one
3925-
// also inserted non-phi instructions.
3926-
// TODO: Remove once VPWidenPointerInductionRecipe is also expanded in
3927-
// convertToConcreteRecipes.
3928-
VecPhi->moveBefore(State.Builder.GetInsertBlock()->getFirstNonPHIIt());
39293857
State.set(this, VecPhi);
39303858
}
39313859

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,7 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
963963
RFlags.getGEPNoWrapFlags());
964964
}
965965
case VPInstruction::PtrAdd:
966+
case VPInstruction::WidePtrAdd:
966967
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
967968
Ops[1],
968969
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
@@ -2750,6 +2751,70 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
27502751
WidenIVR->replaceAllUsesWith(WidePHI);
27512752
}
27522753

2754+
/// Expand a VPWidenPointerInductionRecipe into executable recipes, for the
2755+
/// initial value, phi and backedge value. In the following example:
2756+
///
2757+
/// <x1> vector loop: {
2758+
/// vector.body:
2759+
/// EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION %start, %step, %vf
2760+
/// ...
2761+
/// EMIT branch-on-count ...
2762+
/// }
2763+
///
2764+
/// WIDEN-POINTER-INDUCTION will get expanded to:
2765+
///
2766+
/// <x1> vector loop: {
2767+
/// vector.body:
2768+
/// EMIT-SCALAR %pointer.phi = phi %start, %ptr.ind
2769+
/// EMIT %mul = mul %stepvector, %step
2770+
/// EMIT %vector.gep = wide-ptradd %pointer.phi, %mul
2771+
/// ...
2772+
/// EMIT %ptr.ind = ptradd %pointer.phi, %vf
2773+
/// EMIT branch-on-count ...
2774+
/// }
2775+
static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R,
2776+
VPTypeAnalysis &TypeInfo) {
2777+
VPlan *Plan = R->getParent()->getPlan();
2778+
VPValue *Start = R->getStartValue();
2779+
VPValue *Step = R->getStepValue();
2780+
VPValue *VF = R->getVFValue();
2781+
2782+
assert(R->getInductionDescriptor().getKind() ==
2783+
InductionDescriptor::IK_PtrInduction &&
2784+
"Not a pointer induction according to InductionDescriptor!");
2785+
assert(TypeInfo.inferScalarType(R)->isPointerTy() && "Unexpected type.");
2786+
assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) &&
2787+
"Recipe should have been replaced");
2788+
2789+
VPBuilder Builder(R);
2790+
DebugLoc DL = R->getDebugLoc();
2791+
2792+
// Build a scalar pointer phi.
2793+
VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start, DL, "pointer.phi");
2794+
2795+
// Create actual address geps that use the pointer phi as base and a
2796+
// vectorized version of the step value (<step*0, ..., step*N>) as offset.
2797+
Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
2798+
Type *StepTy = TypeInfo.inferScalarType(Step);
2799+
VPValue *Offset = Builder.createNaryOp(VPInstruction::StepVector, {}, StepTy);
2800+
Offset = Builder.createNaryOp(Instruction::Mul, {Offset, Step});
2801+
VPValue *PtrAdd = Builder.createNaryOp(
2802+
VPInstruction::WidePtrAdd, {ScalarPtrPhi, Offset}, DL, "vector.gep");
2803+
R->replaceAllUsesWith(PtrAdd);
2804+
2805+
// Create the backedge value for the scalar pointer phi.
2806+
Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
2807+
VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF),
2808+
DL);
2809+
VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});
2810+
2811+
VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock();
2812+
Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator());
2813+
VPValue *InductionGEP =
2814+
Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind");
2815+
ScalarPtrPhi->addOperand(InductionGEP);
2816+
}
2817+
27532818
void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
27542819
// Replace loop regions with explicity CFG.
27552820
SmallVector<VPRegionBlock *> LoopRegions;
@@ -2775,6 +2840,12 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
27752840
continue;
27762841
}
27772842

2843+
if (auto *WidenIVR = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
2844+
expandVPWidenPointerInduction(WidenIVR, TypeInfo);
2845+
ToRemove.push_back(WidenIVR);
2846+
continue;
2847+
}
2848+
27782849
// Expand VPBlendRecipe into VPInstruction::Select.
27792850
VPBuilder Builder(&R);
27802851
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {

0 commit comments

Comments
 (0)