Skip to content

Commit 16d38aa

Browse files
committed
Support EVL interleave access
1 parent 767de32 commit 16d38aa

File tree

9 files changed

+364
-120
lines changed

9 files changed

+364
-120
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4228,6 +4228,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42284228
case VPDef::VPWidenIntOrFpInductionSC:
42294229
case VPDef::VPWidenPointerInductionSC:
42304230
case VPDef::VPReductionPHISC:
4231+
case VPDef::VPInterleaveEVLSC:
42314232
case VPDef::VPInterleaveSC:
42324233
case VPDef::VPWidenLoadEVLSC:
42334234
case VPDef::VPWidenLoadSC:
@@ -4256,7 +4257,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42564257

42574258
// If no def nor is a store, e.g., branches, continue - no value to check.
42584259
if (R.getNumDefinedValues() == 0 &&
4259-
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveRecipe>(
4260+
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveBase>(
42604261
&R))
42614262
continue;
42624263
// For multi-def recipes, currently only interleaved loads, suffice to

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 139 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
557557
case VPRecipeBase::VPPartialReductionSC:
558558
return true;
559559
case VPRecipeBase::VPBranchOnMaskSC:
560+
case VPRecipeBase::VPInterleaveEVLSC:
560561
case VPRecipeBase::VPInterleaveSC:
561562
case VPRecipeBase::VPIRInstructionSC:
562563
case VPRecipeBase::VPWidenLoadEVLSC:
@@ -2371,11 +2372,14 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
23712372
}
23722373
};
23732374

2374-
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2375-
/// or stores into one wide load/store and shuffles. The first operand of a
2376-
/// VPInterleave recipe is the address, followed by the stored values, followed
2377-
/// by an optional mask.
2378-
class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
2375+
/// A common base class for interleaved memory operations.
2376+
/// Interleaved memory operation is a memory access method that combines
2377+
/// multiple strided loads/stores into a single wide load/store with shuffles.
2378+
/// The first operand must be the address. The optional operands are, in order,
2379+
/// the stored values and the mask.
2380+
/// TODO: Inherit from VPIRMetadata
2381+
class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase {
2382+
protected:
23792383
const InterleaveGroup<Instruction> *IG;
23802384

23812385
/// Indicates if the interleave group is in a conditional block and requires a
@@ -2386,90 +2390,186 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
23862390
/// unusued gaps can be loaded speculatively.
23872391
bool NeedsMaskForGaps = false;
23882392

2389-
public:
2390-
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2391-
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2392-
bool NeedsMaskForGaps, DebugLoc DL)
2393-
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr},
2394-
DL),
2395-
2396-
IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) {
2393+
VPInterleaveBase(const unsigned char SC,
2394+
const InterleaveGroup<Instruction> *IG,
2395+
ArrayRef<VPValue *> Operands,
2396+
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2397+
bool NeedsMaskForGaps, DebugLoc DL)
2398+
: VPRecipeBase(SC, Operands, DL), IG(IG),
2399+
NeedsMaskForGaps(NeedsMaskForGaps) {
23972400
// TODO: extend the masked interleaved-group support to reversed access.
23982401
assert((!Mask || !IG->isReverse()) &&
23992402
"Reversed masked interleave-group not supported.");
2400-
for (unsigned i = 0; i < IG->getFactor(); ++i)
2401-
if (Instruction *I = IG->getMember(i)) {
2402-
if (I->getType()->isVoidTy())
2403+
for (unsigned I = 0; I < IG->getFactor(); ++I)
2404+
if (Instruction *Inst = IG->getMember(I)) {
2405+
if (Inst->getType()->isVoidTy())
24032406
continue;
2404-
new VPValue(I, this);
2407+
new VPValue(Inst, this);
24052408
}
24062409

24072410
for (auto *SV : StoredValues)
24082411
addOperand(SV);
2412+
24092413
if (Mask) {
24102414
HasMask = true;
24112415
addOperand(Mask);
24122416
}
24132417
}
2414-
~VPInterleaveRecipe() override = default;
24152418

2416-
VPInterleaveRecipe *clone() override {
2417-
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2418-
NeedsMaskForGaps, getDebugLoc());
2419+
public:
2420+
VPInterleaveBase *clone() override {
2421+
llvm_unreachable("cloning not supported");
24192422
}
24202423

2421-
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2424+
static inline bool classof(const VPRecipeBase *R) {
2425+
return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2426+
R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2427+
}
2428+
2429+
static inline bool classof(const VPUser *U) {
2430+
auto *R = dyn_cast<VPRecipeBase>(U);
2431+
return R && classof(R);
2432+
}
24222433

24232434
/// Return the address accessed by this recipe.
24242435
VPValue *getAddr() const {
24252436
return getOperand(0); // Address is the 1st, mandatory operand.
24262437
}
24272438

2439+
/// Return true if the access needs a mask because of the gaps.
2440+
bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2441+
24282442
/// Return the mask used by this recipe. Note that a full mask is represented
24292443
/// by a nullptr.
24302444
VPValue *getMask() const {
2431-
// Mask is optional and therefore the last, currently 2nd operand.
2445+
// Mask is optional and the last operand.
24322446
return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
24332447
}
24342448

2449+
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2450+
2451+
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2452+
2453+
void execute(VPTransformState &State) override {
2454+
llvm_unreachable("VPInterleaveBase should not be instantiated.");
2455+
}
2456+
2457+
/// Return the cost of this VPInterleaveRecipe.
2458+
InstructionCost computeCost(ElementCount VF,
2459+
VPCostContext &Ctx) const override;
2460+
2461+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2462+
virtual bool onlyFirstLaneUsed(const VPValue *Op) const = 0;
2463+
2464+
/// Returns the number of stored operands of this interleave group. Returns 0
2465+
/// for load interleave groups.
2466+
virtual unsigned getNumStoreOperands() const = 0;
2467+
24352468
/// Return the VPValues stored by this interleave group. If it is a load
24362469
/// interleave group, return an empty ArrayRef.
2437-
ArrayRef<VPValue *> getStoredValues() const {
2438-
// The first operand is the address, followed by the stored values, followed
2439-
// by an optional mask.
2440-
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2441-
.slice(1, getNumStoreOperands());
2470+
virtual ArrayRef<VPValue *> getStoredValues() const = 0;
2471+
};
2472+
2473+
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2474+
/// or stores into one wide load/store and shuffles. The first operand of a
2475+
/// VPInterleave recipe is the address, followed by the stored values, followed
2476+
/// by an optional mask.
2477+
class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
2478+
public:
2479+
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2480+
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2481+
bool NeedsMaskForGaps, DebugLoc DL)
2482+
: VPInterleaveBase(VPDef::VPInterleaveSC, IG, ArrayRef<VPValue *>({Addr}),
2483+
StoredValues, Mask, NeedsMaskForGaps, DL) {}
2484+
2485+
~VPInterleaveRecipe() override = default;
2486+
2487+
VPInterleaveRecipe *clone() override {
2488+
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2489+
NeedsMaskForGaps, getDebugLoc());
24422490
}
24432491

2492+
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2493+
24442494
/// Generate the wide load or store, and shuffles.
24452495
void execute(VPTransformState &State) override;
24462496

2447-
/// Return the cost of this VPInterleaveRecipe.
2448-
InstructionCost computeCost(ElementCount VF,
2449-
VPCostContext &Ctx) const override;
2450-
24512497
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
24522498
/// Print the recipe.
24532499
void print(raw_ostream &O, const Twine &Indent,
24542500
VPSlotTracker &SlotTracker) const override;
24552501
#endif
24562502

2457-
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2503+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2504+
assert(is_contained(operands(), Op) &&
2505+
"Op must be an operand of the recipe");
2506+
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2507+
}
24582508

2459-
/// Returns the number of stored operands of this interleave group. Returns 0
2460-
/// for load interleave groups.
2461-
unsigned getNumStoreOperands() const {
2509+
unsigned getNumStoreOperands() const override {
24622510
return getNumOperands() - (HasMask ? 2 : 1);
24632511
}
24642512

2465-
/// The recipe only uses the first lane of the address.
2513+
ArrayRef<VPValue *> getStoredValues() const override {
2514+
// The first operand is the address, followed by the stored values, followed
2515+
// by an optional mask.
2516+
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2517+
.slice(1, getNumStoreOperands());
2518+
}
2519+
};
2520+
2521+
/// A recipe for interleaved access operations with vector-predication
2522+
/// intrinsics. The first operand is the address, the second operand is the
2523+
/// explicit vector length . Stored values and mask are optional operands.
2524+
class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
2525+
public:
2526+
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask,
2527+
DebugLoc DL = {})
2528+
: VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2529+
ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2530+
R.getStoredValues(), Mask, R.needsMaskForGaps(), DL) {
2531+
assert(!IG->isReverse() &&
2532+
"Reversed interleave-group with tail folding is not supported.");
2533+
}
2534+
2535+
~VPInterleaveEVLRecipe() override = default;
2536+
2537+
VPInterleaveEVLRecipe *clone() override {
2538+
llvm_unreachable("cloning not implemented yet");
2539+
}
2540+
2541+
VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2542+
2543+
/// The VPValue of the explicit vector length.
2544+
VPValue *getEVL() const { return getOperand(1); }
2545+
2546+
/// Generate the wide load or store, and shuffles.
2547+
void execute(VPTransformState &State) override;
2548+
2549+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2550+
/// Print the recipe.
2551+
void print(raw_ostream &O, const Twine &Indent,
2552+
VPSlotTracker &SlotTracker) const override;
2553+
#endif
2554+
2555+
/// The recipe only uses the first lane of the address, and EVL operand.
24662556
bool onlyFirstLaneUsed(const VPValue *Op) const override {
24672557
assert(is_contained(operands(), Op) &&
24682558
"Op must be an operand of the recipe");
2469-
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2559+
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op) ||
2560+
Op == getEVL();
24702561
}
24712562

2472-
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2563+
unsigned getNumStoreOperands() const override {
2564+
return getNumOperands() - (HasMask ? 3 : 2);
2565+
}
2566+
2567+
ArrayRef<VPValue *> getStoredValues() const override {
2568+
// The first operand is the address, and the second operand is EVL, followed
2569+
// by the stored values, followe by an optional mask.
2570+
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2571+
.slice(2, getNumStoreOperands());
2572+
}
24732573
};
24742574

24752575
/// A recipe to represent inloop reduction operations, performing a reduction on

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
295295
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
296296
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
297297
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
298-
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
298+
.Case<VPInterleaveRecipe, VPInterleaveEVLRecipe>([V](const auto *R) {
299299
// TODO: Use info from interleave group.
300300
return V->getUnderlyingValue()->getType();
301301
})

0 commit comments

Comments
 (0)