-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[LV][EVL] Support interleaved access with tail folding by EVL #152070
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Mel-Chen
wants to merge
1
commit into
llvm:main
Choose a base branch
from
Mel-Chen:evl-interleave-base
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -557,6 +557,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |
case VPRecipeBase::VPPartialReductionSC: | ||
return true; | ||
case VPRecipeBase::VPBranchOnMaskSC: | ||
case VPRecipeBase::VPInterleaveEVLSC: | ||
case VPRecipeBase::VPInterleaveSC: | ||
case VPRecipeBase::VPIRInstructionSC: | ||
case VPRecipeBase::VPWidenLoadEVLSC: | ||
|
@@ -2371,11 +2372,14 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe { | |
} | ||
}; | ||
|
||
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load | ||
/// or stores into one wide load/store and shuffles. The first operand of a | ||
/// VPInterleave recipe is the address, followed by the stored values, followed | ||
/// by an optional mask. | ||
class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase { | ||
/// A common base class for interleaved memory operations. | ||
/// Interleaved memory operation is a memory access method that combines | ||
/// multiple strided loads/stores into a single wide load/store with shuffles. | ||
/// The first operand must be the address. The optional operands are, in order, | ||
/// the stored values and the mask. | ||
/// TODO: Inherit from VPIRMetadata | ||
class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase { | ||
protected: | ||
const InterleaveGroup<Instruction> *IG; | ||
|
||
/// Indicates if the interleave group is in a conditional block and requires a | ||
|
@@ -2386,90 +2390,186 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase { | |
/// unusued gaps can be loaded speculatively. | ||
bool NeedsMaskForGaps = false; | ||
|
||
public: | ||
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, | ||
ArrayRef<VPValue *> StoredValues, VPValue *Mask, | ||
bool NeedsMaskForGaps, DebugLoc DL) | ||
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, | ||
DL), | ||
|
||
IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) { | ||
VPInterleaveBase(const unsigned char SC, | ||
const InterleaveGroup<Instruction> *IG, | ||
ArrayRef<VPValue *> Operands, | ||
ArrayRef<VPValue *> StoredValues, VPValue *Mask, | ||
bool NeedsMaskForGaps, DebugLoc DL) | ||
: VPRecipeBase(SC, Operands, DL), IG(IG), | ||
NeedsMaskForGaps(NeedsMaskForGaps) { | ||
// TODO: extend the masked interleaved-group support to reversed access. | ||
assert((!Mask || !IG->isReverse()) && | ||
"Reversed masked interleave-group not supported."); | ||
for (unsigned i = 0; i < IG->getFactor(); ++i) | ||
if (Instruction *I = IG->getMember(i)) { | ||
if (I->getType()->isVoidTy()) | ||
for (unsigned I = 0; I < IG->getFactor(); ++I) | ||
if (Instruction *Inst = IG->getMember(I)) { | ||
if (Inst->getType()->isVoidTy()) | ||
continue; | ||
new VPValue(I, this); | ||
new VPValue(Inst, this); | ||
} | ||
|
||
for (auto *SV : StoredValues) | ||
addOperand(SV); | ||
|
||
if (Mask) { | ||
HasMask = true; | ||
addOperand(Mask); | ||
} | ||
} | ||
~VPInterleaveRecipe() override = default; | ||
|
||
VPInterleaveRecipe *clone() override { | ||
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(), | ||
NeedsMaskForGaps, getDebugLoc()); | ||
public: | ||
VPInterleaveBase *clone() override { | ||
llvm_unreachable("cloning not supported"); | ||
} | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) | ||
static inline bool classof(const VPRecipeBase *R) { | ||
return R->getVPDefID() == VPRecipeBase::VPInterleaveSC || | ||
R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC; | ||
} | ||
|
||
static inline bool classof(const VPUser *U) { | ||
auto *R = dyn_cast<VPRecipeBase>(U); | ||
return R && classof(R); | ||
} | ||
|
||
/// Return the address accessed by this recipe. | ||
VPValue *getAddr() const { | ||
return getOperand(0); // Address is the 1st, mandatory operand. | ||
} | ||
|
||
/// Return true if the access needs a mask because of the gaps. | ||
bool needsMaskForGaps() const { return NeedsMaskForGaps; } | ||
|
||
/// Return the mask used by this recipe. Note that a full mask is represented | ||
/// by a nullptr. | ||
VPValue *getMask() const { | ||
// Mask is optional and therefore the last, currently 2nd operand. | ||
// Mask is optional and the last operand. | ||
return HasMask ? getOperand(getNumOperands() - 1) : nullptr; | ||
} | ||
|
||
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } | ||
|
||
Instruction *getInsertPos() const { return IG->getInsertPos(); } | ||
|
||
void execute(VPTransformState &State) override { | ||
llvm_unreachable("VPInterleaveBase should not be instantiated."); | ||
} | ||
|
||
/// Return the cost of this VPInterleaveRecipe. | ||
InstructionCost computeCost(ElementCount VF, | ||
VPCostContext &Ctx) const override; | ||
|
||
/// Returns true if the recipe only uses the first lane of operand \p Op. | ||
virtual bool onlyFirstLaneUsed(const VPValue *Op) const = 0; | ||
|
||
/// Returns the number of stored operands of this interleave group. Returns 0 | ||
/// for load interleave groups. | ||
virtual unsigned getNumStoreOperands() const = 0; | ||
|
||
/// Return the VPValues stored by this interleave group. If it is a load | ||
/// interleave group, return an empty ArrayRef. | ||
ArrayRef<VPValue *> getStoredValues() const { | ||
// The first operand is the address, followed by the stored values, followed | ||
// by an optional mask. | ||
return ArrayRef<VPValue *>(op_begin(), getNumOperands()) | ||
.slice(1, getNumStoreOperands()); | ||
virtual ArrayRef<VPValue *> getStoredValues() const = 0; | ||
}; | ||
|
||
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load | ||
/// or stores into one wide load/store and shuffles. The first operand of a | ||
/// VPInterleave recipe is the address, followed by the stored values, followed | ||
/// by an optional mask. | ||
class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase { | ||
public: | ||
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, | ||
ArrayRef<VPValue *> StoredValues, VPValue *Mask, | ||
bool NeedsMaskForGaps, DebugLoc DL) | ||
: VPInterleaveBase(VPDef::VPInterleaveSC, IG, ArrayRef<VPValue *>({Addr}), | ||
StoredValues, Mask, NeedsMaskForGaps, DL) {} | ||
|
||
~VPInterleaveRecipe() override = default; | ||
|
||
VPInterleaveRecipe *clone() override { | ||
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(), | ||
NeedsMaskForGaps, getDebugLoc()); | ||
} | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) | ||
|
||
/// Generate the wide load or store, and shuffles. | ||
void execute(VPTransformState &State) override; | ||
|
||
/// Return the cost of this VPInterleaveRecipe. | ||
InstructionCost computeCost(ElementCount VF, | ||
VPCostContext &Ctx) const override; | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the recipe. | ||
void print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const override; | ||
#endif | ||
|
||
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } | ||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); | ||
} | ||
|
||
/// Returns the number of stored operands of this interleave group. Returns 0 | ||
/// for load interleave groups. | ||
unsigned getNumStoreOperands() const { | ||
unsigned getNumStoreOperands() const override { | ||
return getNumOperands() - (HasMask ? 2 : 1); | ||
} | ||
|
||
/// The recipe only uses the first lane of the address. | ||
ArrayRef<VPValue *> getStoredValues() const override { | ||
// The first operand is the address, followed by the stored values, followed | ||
// by an optional mask. | ||
return ArrayRef<VPValue *>(op_begin(), getNumOperands()) | ||
.slice(1, getNumStoreOperands()); | ||
} | ||
}; | ||
|
||
/// A recipe for interleaved access operations with vector-predication | ||
/// intrinsics. The first operand is the address, the second operand is the | ||
/// explicit vector length . Stored values and mask are optional operands. | ||
class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase { | ||
public: | ||
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask, | ||
DebugLoc DL = {}) | ||
: VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(), | ||
ArrayRef<VPValue *>({R.getAddr(), &EVL}), | ||
R.getStoredValues(), Mask, R.needsMaskForGaps(), DL) { | ||
assert(!IG->isReverse() && | ||
"Reversed interleave-group with tail folding is not supported."); | ||
} | ||
|
||
~VPInterleaveEVLRecipe() override = default; | ||
|
||
VPInterleaveEVLRecipe *clone() override { | ||
llvm_unreachable("cloning not implemented yet"); | ||
} | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC) | ||
|
||
/// The VPValue of the explicit vector length. | ||
VPValue *getEVL() const { return getOperand(1); } | ||
|
||
/// Generate the wide load or store, and shuffles. | ||
void execute(VPTransformState &State) override; | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the recipe. | ||
void print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const override; | ||
#endif | ||
|
||
/// The recipe only uses the first lane of the address, and EVL operand. | ||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); | ||
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op) || | ||
Op == getEVL(); | ||
} | ||
|
||
Instruction *getInsertPos() const { return IG->getInsertPos(); } | ||
unsigned getNumStoreOperands() const override { | ||
return getNumOperands() - (HasMask ? 3 : 2); | ||
} | ||
|
||
ArrayRef<VPValue *> getStoredValues() const override { | ||
// The first operand is the address, and the second operand is EVL, followed | ||
// by the stored values, followe by an optional mask. | ||
return ArrayRef<VPValue *>(op_begin(), getNumOperands()) | ||
.slice(2, getNumStoreOperands()); | ||
} | ||
Comment on lines
+2567
to
+2572
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be moved to VPInterleaveBase if we compute it as |
||
}; | ||
|
||
/// A recipe to represent inloop reduction operations, performing a reduction on | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -295,7 +295,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { | |||||
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe, | ||||||
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>( | ||||||
[this](const auto *R) { return inferScalarTypeForRecipe(R); }) | ||||||
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) { | ||||||
.Case<VPInterleaveRecipe, VPInterleaveEVLRecipe>([V](const auto *R) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this work?
Suggested change
|
||||||
// TODO: Use info from interleave group. | ||||||
return V->getUnderlyingValue()->getType(); | ||||||
}) | ||||||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This better to implement in a separate NFC patch