Skip to content

[VPlan] Materialize Build(Struct)Vectors for VPReplicateRecipes. (NFCI) #151487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7284,6 +7284,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
OrigLoop->getHeader()->getContext());
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
bool HasBranchWeights =
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
set(Def, VectorValue);
} else {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
assert(isa<VPInstruction>(Def) && "Explicit BuildVector recipes must "
"handle packing for non-VPInstructions.");
// Initialize packing with insertelements to start from poison.
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::Load:
case VPInstruction::AnyOf:
case VPInstruction::BranchOnCond:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExplicitVectorLength:
Expand Down
47 changes: 47 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3176,6 +3176,53 @@ void VPlanTransforms::materializeVectorTripCount(
Plan.getVectorTripCount().setUnderlyingValue(NewC->getValue());
}

void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;

VPTypeAnalysis TypeInfo(Plan);
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()));
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()));
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
// excluding ones in replicate regions. Those are not unrolled explicitly yet.
for (VPBasicBlock *VPBB :
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (!RepR || RepR->isSingleScalar())
continue;
VPInstruction *BuildVector = nullptr;
for (VPUser *U : to_vector(RepR->users())) {
VPRegionBlock *ParentRegion =
cast<VPRecipeBase>(U)->getParent()->getParent();
if (U->usesScalars(RepR) && ParentRegion == LoopRegion)
continue;

if (!BuildVector) {
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
unsigned Opc = ScalarTy->isStructTy()
? VPInstruction::BuildStructVector
: VPInstruction::BuildVector;
BuildVector = new VPInstruction(Opc, {RepR});
BuildVector->insertAfter(RepR);
}

// Only update a single operand per users, as the same user is added
// multiple times, once per use.
// TODO: Introduce de-duplicating iterator over users.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe not the right place for this TODO?

for (unsigned Idx = 0; Idx != U->getNumOperands(); ++Idx)
if (U->getOperand(Idx) == RepR) {
U->setOperand(Idx, BuildVector);
break;
}
}
}
}
}

/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ struct VPlanTransforms {
unsigned BestUF,
PredicatedScalarEvolution &PSE);

/// Add explicit Build[Struct]Vector recipes that combine scalar values
/// produced by VPReplicateRecipes to a single vector.
static void materializeBuildVectors(VPlan &Plan);

/// Try to convert a plan with interleave groups with VF elements to a plan
/// with the interleave groups replaced by wide loads and stores processing VF
/// elements, if all transformed interleave groups access the full vector
Expand Down
47 changes: 31 additions & 16 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,9 +463,10 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
}

/// Create a single-scalar clone of \p RepR for lane \p Lane.
static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
Type *IdxTy, VPReplicateRecipe *RepR,
VPLane Lane) {
static VPReplicateRecipe *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
VPReplicateRecipe *RepR, VPLane Lane,
DenseMap<VPValue *, SmallVector<VPValue *>> &Value2Lanes) {
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : RepR->operands()) {
Expand All @@ -478,6 +479,11 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
continue;
}
if (Value2Lanes.contains(Op)) {
NewOps.push_back(Value2Lanes[Op][Lane.getKnownLane()]);
continue;
}

// Look through buildvector to avoid unnecessary extracts.
if (match(Op, m_BuildVector())) {
NewOps.push_back(
Expand Down Expand Up @@ -510,6 +516,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
auto VPBBsToUnroll =
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
DenseMap<VPValue *, SmallVector<VPValue *>> Value2Lanes;
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
Expand All @@ -521,36 +529,43 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
vputils::isSingleScalar(RepR->getOperand(1))) {
// Stores to invariant addresses need to store the last lane only.
cloneForLane(Plan, Builder, IdxTy, RepR,
VPLane::getLastLaneForVF(VF));
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
Value2Lanes);
} else {
// Create single-scalar version of RepR for all lanes.
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Value2Lanes);
}
RepR->eraseFromParent();
continue;
}
/// Create single-scalar version of RepR for all lanes.
SmallVector<VPValue *> LaneDefs;
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
LaneDefs.push_back(
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Value2Lanes));

Value2Lanes[RepR] = LaneDefs;
/// Users that only demand the first lane can use the definition for lane
/// 0.
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
return U.onlyFirstLaneUsed(RepR);
});

// If needed, create a Build(Struct)Vector recipe to insert the scalar
// lane values into a vector.
Type *ResTy = RepR->getUnderlyingInstr()->getType();
VPValue *VecRes = Builder.createNaryOp(
ResTy->isStructTy() ? VPInstruction::BuildStructVector
: VPInstruction::BuildVector,
LaneDefs);
RepR->replaceAllUsesWith(VecRes);
RepR->eraseFromParent();
for (VPUser *U : to_vector(RepR->users())) {
auto *VPI = dyn_cast<VPInstruction>(U);
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
VPI->getOpcode() != VPInstruction::BuildStructVector))
continue;
assert(VPI->getNumOperands() == 1 &&
"Build(Struct)Vector must have a single operand");
VPI->setOperand(0, LaneDefs[0]);
for (VPValue *Def : drop_begin(LaneDefs))
VPI->addOperand(Def);
}
ToRemove.push_back(RepR);
}
}
for (auto *R : reverse(ToRemove))
R->eraseFromParent();
}