Skip to content

[LV][EVL] Support interleaved access with tail folding by EVL #152070

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4228,6 +4228,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenIntOrFpInductionSC:
case VPDef::VPWidenPointerInductionSC:
case VPDef::VPReductionPHISC:
case VPDef::VPInterleaveEVLSC:
case VPDef::VPInterleaveSC:
case VPDef::VPWidenLoadEVLSC:
case VPDef::VPWidenLoadSC:
Expand Down Expand Up @@ -4256,8 +4257,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,

// If no def nor is a store, e.g., branches, continue - no value to check.
if (R.getNumDefinedValues() == 0 &&
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveRecipe>(
&R))
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveBase>(&R))
continue;
// For multi-def recipes, currently only interleaved loads, suffice to
// check first def only.
Expand Down
178 changes: 139 additions & 39 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPPartialReductionSC:
return true;
case VPRecipeBase::VPBranchOnMaskSC:
case VPRecipeBase::VPInterleaveEVLSC:
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPIRInstructionSC:
case VPRecipeBase::VPWidenLoadEVLSC:
Expand Down Expand Up @@ -2371,11 +2372,14 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
}
};

/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
/// or stores into one wide load/store and shuffles. The first operand of a
/// VPInterleave recipe is the address, followed by the stored values, followed
/// by an optional mask.
class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
/// A common base class for interleaved memory operations.
/// Interleaved memory operation is a memory access method that combines
/// multiple strided loads/stores into a single wide load/store with shuffles.
/// The first operand must be the address. The optional operands are, in order,
/// the stored values and the mask.
/// TODO: Inherit from VPIRMetadata
class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase {
protected:
const InterleaveGroup<Instruction> *IG;

/// Indicates if the interleave group is in a conditional block and requires a
Expand All @@ -2386,90 +2390,186 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
/// unusued gaps can be loaded speculatively.
bool NeedsMaskForGaps = false;

public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
bool NeedsMaskForGaps, DebugLoc DL)
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr},
DL),

IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) {
VPInterleaveBase(const unsigned char SC,
const InterleaveGroup<Instruction> *IG,
ArrayRef<VPValue *> Operands,
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
bool NeedsMaskForGaps, DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), IG(IG),
NeedsMaskForGaps(NeedsMaskForGaps) {
// TODO: extend the masked interleaved-group support to reversed access.
assert((!Mask || !IG->isReverse()) &&
"Reversed masked interleave-group not supported.");
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *I = IG->getMember(i)) {
if (I->getType()->isVoidTy())
for (unsigned I = 0; I < IG->getFactor(); ++I)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This better to implement in a separate NFC patch

if (Instruction *Inst = IG->getMember(I)) {
if (Inst->getType()->isVoidTy())
continue;
new VPValue(I, this);
new VPValue(Inst, this);
}

for (auto *SV : StoredValues)
addOperand(SV);

if (Mask) {
HasMask = true;
addOperand(Mask);
}
}
~VPInterleaveRecipe() override = default;

VPInterleaveRecipe *clone() override {
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
NeedsMaskForGaps, getDebugLoc());
public:
VPInterleaveBase *clone() override {
llvm_unreachable("cloning not supported");
}

VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
}

static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && classof(R);
}

/// Return the address accessed by this recipe.
VPValue *getAddr() const {
return getOperand(0); // Address is the 1st, mandatory operand.
}

/// Return true if the access needs a mask because of the gaps.
bool needsMaskForGaps() const { return NeedsMaskForGaps; }

/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
VPValue *getMask() const {
// Mask is optional and therefore the last, currently 2nd operand.
// Mask is optional and the last operand.
return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
}

const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }

Instruction *getInsertPos() const { return IG->getInsertPos(); }

void execute(VPTransformState &State) override {
llvm_unreachable("VPInterleaveBase should not be instantiated.");
}

/// Return the cost of this VPInterleaveRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

/// Returns true if the recipe only uses the first lane of operand \p Op.
virtual bool onlyFirstLaneUsed(const VPValue *Op) const = 0;

/// Returns the number of stored operands of this interleave group. Returns 0
/// for load interleave groups.
virtual unsigned getNumStoreOperands() const = 0;

/// Return the VPValues stored by this interleave group. If it is a load
/// interleave group, return an empty ArrayRef.
ArrayRef<VPValue *> getStoredValues() const {
// The first operand is the address, followed by the stored values, followed
// by an optional mask.
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
.slice(1, getNumStoreOperands());
virtual ArrayRef<VPValue *> getStoredValues() const = 0;
};

/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
/// or stores into one wide load/store and shuffles. The first operand of a
/// VPInterleave recipe is the address, followed by the stored values, followed
/// by an optional mask.
class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
bool NeedsMaskForGaps, DebugLoc DL)
: VPInterleaveBase(VPDef::VPInterleaveSC, IG, ArrayRef<VPValue *>({Addr}),
StoredValues, Mask, NeedsMaskForGaps, DL) {}

~VPInterleaveRecipe() override = default;

VPInterleaveRecipe *clone() override {
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
NeedsMaskForGaps, getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)

/// Generate the wide load or store, and shuffles.
void execute(VPTransformState &State) override;

/// Return the cost of this VPInterleaveRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
}

/// Returns the number of stored operands of this interleave group. Returns 0
/// for load interleave groups.
unsigned getNumStoreOperands() const {
unsigned getNumStoreOperands() const override {
return getNumOperands() - (HasMask ? 2 : 1);
}

/// The recipe only uses the first lane of the address.
ArrayRef<VPValue *> getStoredValues() const override {
// The first operand is the address, followed by the stored values, followed
// by an optional mask.
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
.slice(1, getNumStoreOperands());
}
};

/// A recipe for interleaved access operations with vector-predication
/// intrinsics. The first operand is the address, the second operand is the
/// explicit vector length . Stored values and mask are optional operands.
class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
public:
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask,
DebugLoc DL = {})
: VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
ArrayRef<VPValue *>({R.getAddr(), &EVL}),
R.getStoredValues(), Mask, R.needsMaskForGaps(), DL) {
assert(!IG->isReverse() &&
"Reversed interleave-group with tail folding is not supported.");
}

~VPInterleaveEVLRecipe() override = default;

VPInterleaveEVLRecipe *clone() override {
llvm_unreachable("cloning not implemented yet");
}

VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)

/// The VPValue of the explicit vector length.
VPValue *getEVL() const { return getOperand(1); }

/// Generate the wide load or store, and shuffles.
void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// The recipe only uses the first lane of the address, and EVL operand.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op) ||
Op == getEVL();
}

Instruction *getInsertPos() const { return IG->getInsertPos(); }
unsigned getNumStoreOperands() const override {
return getNumOperands() - (HasMask ? 3 : 2);
}

ArrayRef<VPValue *> getStoredValues() const override {
// The first operand is the address, and the second operand is EVL, followed
// by the stored values, followe by an optional mask.
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
.slice(2, getNumStoreOperands());
}
Comment on lines +2567 to +2572
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be moved to VPInterleaveBase if we compute it as iterator_range(op_end() - getNumStoreOperands(), op_end())?

};

/// A recipe to represent inloop reduction operations, performing a reduction on
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
.Case<VPInterleaveRecipe, VPInterleaveEVLRecipe>([V](const auto *R) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work?

Suggested change
.Case<VPInterleaveRecipe, VPInterleaveEVLRecipe>([V](const auto *R) {
.Case<VPInterleaveBase>([V](const auto *R) {

// TODO: Use info from interleave group.
return V->getUnderlyingValue()->getType();
})
Expand Down
Loading