Skip to content

Commit f92563f

Browse files
committed
[VectorUtils][X86] De-templatize scaleShuffleMask and 2 X86 shuffle mask helpers and move their implementation to cpp files
Summary: These were templated due to SelectionDAG using int masks for shuffles and IR using unsigned masks for shuffles. But now that D72467 has landed we have an int mask version of IRBuilder::CreateShuffleVector. So just use int instead of a template Reviewers: spatel, efriedma, RKSimon Reviewed By: efriedma Subscribers: hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D77183
1 parent 15f34ff commit f92563f

File tree

7 files changed

+84
-74
lines changed

7 files changed

+84
-74
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -339,22 +339,8 @@ bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
339339
///
340340
/// This is the reverse process of "canWidenShuffleElements", but can always
341341
/// succeed.
342-
template <typename T>
343-
void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
344-
SmallVectorImpl<T> &ScaledMask) {
345-
assert(Scale > 0 && "Unexpected scaling factor");
346-
347-
// Fast-path: if no scaling, then it is just a copy.
348-
if (Scale == 1) {
349-
ScaledMask.assign(Mask.begin(), Mask.end());
350-
return;
351-
}
352-
353-
ScaledMask.clear();
354-
for (int MaskElt : Mask)
355-
for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt)
356-
ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt);
357-
}
342+
void scaleShuffleMask(size_t Scale, ArrayRef<int> Mask,
343+
SmallVectorImpl<int> &ScaledMask);
358344

359345
/// Compute a map of integer instructions to their minimum legal type
360346
/// size.

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,22 @@ bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
397397
return false;
398398
}
399399

400+
void llvm::scaleShuffleMask(size_t Scale, ArrayRef<int> Mask,
401+
SmallVectorImpl<int> &ScaledMask) {
402+
assert(Scale > 0 && "Unexpected scaling factor");
403+
404+
// Fast-path: if no scaling, then it is just a copy.
405+
if (Scale == 1) {
406+
ScaledMask.assign(Mask.begin(), Mask.end());
407+
return;
408+
}
409+
410+
ScaledMask.clear();
411+
for (int MaskElt : Mask)
412+
for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt)
413+
ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt);
414+
}
415+
400416
MapVector<Instruction *, uint64_t>
401417
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
402418
const TargetTransformInfo *TTI) {

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19815,8 +19815,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
1981519815
ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
1981619816
SmallVector<int, 8> InnerMask;
1981719817
SmallVector<int, 8> OuterMask;
19818-
scaleShuffleMask<int>(InnerScale, InnerSVN->getMask(), InnerMask);
19819-
scaleShuffleMask<int>(OuterScale, SVN->getMask(), OuterMask);
19818+
scaleShuffleMask(InnerScale, InnerSVN->getMask(), InnerMask);
19819+
scaleShuffleMask(OuterScale, SVN->getMask(), OuterMask);
1982019820

1982119821
// Merge the shuffle masks.
1982219822
SmallVector<int, 8> NewMask;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6133,6 +6133,35 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
61336133
return SDValue();
61346134
}
61356135

6136+
void llvm::createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
6137+
bool Lo, bool Unary) {
6138+
assert(Mask.empty() && "Expected an empty shuffle mask vector");
6139+
int NumElts = VT.getVectorNumElements();
6140+
int NumEltsInLane = 128 / VT.getScalarSizeInBits();
6141+
for (int i = 0; i < NumElts; ++i) {
6142+
unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
6143+
int Pos = (i % NumEltsInLane) / 2 + LaneStart;
6144+
Pos += (Unary ? 0 : NumElts * (i % 2));
6145+
Pos += (Lo ? 0 : NumEltsInLane / 2);
6146+
Mask.push_back(Pos);
6147+
}
6148+
}
6149+
6150+
/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
6151+
/// imposed by AVX and specific to the unary pattern. Example:
6152+
/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
6153+
/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
6154+
void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
6155+
bool Lo) {
6156+
assert(Mask.empty() && "Expected an empty shuffle mask vector");
6157+
int NumElts = VT.getVectorNumElements();
6158+
for (int i = 0; i < NumElts; ++i) {
6159+
int Pos = i / 2;
6160+
Pos += (Lo ? 0 : NumElts / 2);
6161+
Mask.push_back(Pos);
6162+
}
6163+
}
6164+
61366165
/// Returns a vector_shuffle node for an unpackl operation.
61376166
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
61386167
SDValue V1, SDValue V2) {
@@ -7320,8 +7349,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
73207349

73217350
size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
73227351
SmallVector<int, 64> Mask0, Mask1;
7323-
scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
7324-
scaleShuffleMask<int>(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
7352+
scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
7353+
scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
73257354
for (size_t i = 0; i != MaskSize; ++i) {
73267355
if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
73277356
Mask.push_back(SM_SentinelUndef);
@@ -7379,7 +7408,7 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
73797408
if ((NumSubElts % SubMask.size()) == 0) {
73807409
int Scale = NumSubElts / SubMask.size();
73817410
SmallVector<int,64> ScaledSubMask;
7382-
scaleShuffleMask<int>(Scale, SubMask, ScaledSubMask);
7411+
scaleShuffleMask(Scale, SubMask, ScaledSubMask);
73837412
SubMask = ScaledSubMask;
73847413
} else {
73857414
int Scale = SubMask.size() / NumSubElts;
@@ -16279,7 +16308,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1627916308
SmallVector<int, 2> RepeatedMask;
1628016309
if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
1628116310
SmallVector<int, 4> PSHUFDMask;
16282-
scaleShuffleMask<int>(2, RepeatedMask, PSHUFDMask);
16311+
scaleShuffleMask(2, RepeatedMask, PSHUFDMask);
1628316312
return DAG.getBitcast(
1628416313
MVT::v4i64,
1628516314
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
@@ -16928,7 +16957,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
1692816957
SmallVector<int, 2> Widened256Mask;
1692916958
if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) {
1693016959
Widened128Mask.clear();
16931-
llvm::scaleShuffleMask<int>(2, Widened256Mask, Widened128Mask);
16960+
llvm::scaleShuffleMask(2, Widened256Mask, Widened128Mask);
1693216961
}
1693316962

1693416963
// Try to lower to vshuf64x2/vshuf32x4.
@@ -17079,7 +17108,7 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1707917108
SmallVector<int, 2> Repeated128Mask;
1708017109
if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
1708117110
SmallVector<int, 4> PSHUFDMask;
17082-
scaleShuffleMask<int>(2, Repeated128Mask, PSHUFDMask);
17111+
scaleShuffleMask(2, Repeated128Mask, PSHUFDMask);
1708317112
return DAG.getBitcast(
1708417113
MVT::v8i64,
1708517114
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32,
@@ -20166,7 +20195,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
2016620195
// Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
2016720196
SmallVector<int, 64> Mask;
2016820197
int Scale = 64 / OutVT.getScalarSizeInBits();
20169-
scaleShuffleMask<int>(Scale, ArrayRef<int>({ 0, 2, 1, 3 }), Mask);
20198+
scaleShuffleMask(Scale, { 0, 2, 1, 3 }, Mask);
2017020199
Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);
2017120200

2017220201
if (DstVT.is256BitVector())
@@ -33612,7 +33641,7 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
3361233641
// Narrow the repeated mask to create 32-bit element permutes.
3361333642
SmallVector<int, 4> WordMask = RepeatedMask;
3361433643
if (MaskScalarSizeInBits == 64)
33615-
scaleShuffleMask<int>(2, RepeatedMask, WordMask);
33644+
scaleShuffleMask(2, RepeatedMask, WordMask);
3361633645

3361733646
Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
3361833647
ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
@@ -34065,7 +34094,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3406534094
if (BaseMaskEltSizeInBits > 64) {
3406634095
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
3406734096
int MaskScale = BaseMaskEltSizeInBits / 64;
34068-
scaleShuffleMask<int>(MaskScale, BaseMask, Mask);
34097+
scaleShuffleMask(MaskScale, BaseMask, Mask);
3406934098
} else {
3407034099
Mask = SmallVector<int, 64>(BaseMask.begin(), BaseMask.end());
3407134100
}
@@ -38189,7 +38218,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
3818938218
if ((NumSrcElts % Mask.size()) == 0) {
3819038219
SmallVector<int, 16> ScaledMask;
3819138220
int Scale = NumSrcElts / Mask.size();
38192-
scaleShuffleMask<int>(Scale, Mask, ScaledMask);
38221+
scaleShuffleMask(Scale, Mask, ScaledMask);
3819338222
Mask = std::move(ScaledMask);
3819438223
} else if ((Mask.size() % NumSrcElts) == 0) {
3819538224
// Simplify Mask based on demanded element.

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,35 +1564,14 @@ namespace llvm {
15641564
};
15651565

15661566
/// Generate unpacklo/unpackhi shuffle mask.
1567-
template <typename T = int>
1568-
void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1569-
bool Unary) {
1570-
assert(Mask.empty() && "Expected an empty shuffle mask vector");
1571-
int NumElts = VT.getVectorNumElements();
1572-
int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1573-
for (int i = 0; i < NumElts; ++i) {
1574-
unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1575-
int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1576-
Pos += (Unary ? 0 : NumElts * (i % 2));
1577-
Pos += (Lo ? 0 : NumEltsInLane / 2);
1578-
Mask.push_back(Pos);
1579-
}
1580-
}
1567+
void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1568+
bool Unary);
15811569

15821570
/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
15831571
/// imposed by AVX and specific to the unary pattern. Example:
15841572
/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
15851573
/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1586-
template <typename T = int>
1587-
void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo) {
1588-
assert(Mask.empty() && "Expected an empty shuffle mask vector");
1589-
int NumElts = VT.getVectorNumElements();
1590-
for (int i = 0; i < NumElts; ++i) {
1591-
int Pos = i / 2;
1592-
Pos += (Lo ? 0 : NumElts / 2);
1593-
Mask.push_back(Pos);
1594-
}
1595-
}
1574+
void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
15961575

15971576
} // end namespace llvm
15981577

llvm/lib/Target/X86/X86InterleavedAccess.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -325,19 +325,19 @@ void X86InterleavedAccessGroup::interleave8bitStride4VF8(
325325

326326
MVT VT = MVT::v8i16;
327327
TransposedMatrix.resize(2);
328-
SmallVector<uint32_t, 16> MaskLow;
329-
SmallVector<uint32_t, 32> MaskLowTemp1, MaskLowWord;
330-
SmallVector<uint32_t, 32> MaskHighTemp1, MaskHighWord;
328+
SmallVector<int, 16> MaskLow;
329+
SmallVector<int, 32> MaskLowTemp1, MaskLowWord;
330+
SmallVector<int, 32> MaskHighTemp1, MaskHighWord;
331331

332332
for (unsigned i = 0; i < 8; ++i) {
333333
MaskLow.push_back(i);
334334
MaskLow.push_back(i + 8);
335335
}
336336

337-
createUnpackShuffleMask<uint32_t>(VT, MaskLowTemp1, true, false);
338-
createUnpackShuffleMask<uint32_t>(VT, MaskHighTemp1, false, false);
339-
scaleShuffleMask<uint32_t>(2, MaskHighTemp1, MaskHighWord);
340-
scaleShuffleMask<uint32_t>(2, MaskLowTemp1, MaskLowWord);
337+
createUnpackShuffleMask(VT, MaskLowTemp1, true, false);
338+
createUnpackShuffleMask(VT, MaskHighTemp1, false, false);
339+
scaleShuffleMask(2, MaskHighTemp1, MaskHighWord);
340+
scaleShuffleMask(2, MaskLowTemp1, MaskLowWord);
341341
// IntrVec1Low = c0 m0 c1 m1 c2 m2 c3 m3 c4 m4 c5 m5 c6 m6 c7 m7
342342
// IntrVec2Low = y0 k0 y1 k1 y2 k2 y3 k3 y4 k4 y5 k5 y6 k6 y7 k7
343343
Value *IntrVec1Low =
@@ -367,25 +367,25 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
367367
MVT HalfVT = scaleVectorType(VT);
368368

369369
TransposedMatrix.resize(4);
370-
SmallVector<uint32_t, 32> MaskHigh;
371-
SmallVector<uint32_t, 32> MaskLow;
372-
SmallVector<uint32_t, 32> LowHighMask[2];
373-
SmallVector<uint32_t, 32> MaskHighTemp;
374-
SmallVector<uint32_t, 32> MaskLowTemp;
370+
SmallVector<int, 32> MaskHigh;
371+
SmallVector<int, 32> MaskLow;
372+
SmallVector<int, 32> LowHighMask[2];
373+
SmallVector<int, 32> MaskHighTemp;
374+
SmallVector<int, 32> MaskLowTemp;
375375

376376
// MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86
377377
// shuffle pattern.
378378

379-
createUnpackShuffleMask<uint32_t>(VT, MaskLow, true, false);
380-
createUnpackShuffleMask<uint32_t>(VT, MaskHigh, false, false);
379+
createUnpackShuffleMask(VT, MaskLow, true, false);
380+
createUnpackShuffleMask(VT, MaskHigh, false, false);
381381

382382
// MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86
383383
// shuffle pattern.
384384

385-
createUnpackShuffleMask<uint32_t>(HalfVT, MaskLowTemp, true, false);
386-
createUnpackShuffleMask<uint32_t>(HalfVT, MaskHighTemp, false, false);
387-
scaleShuffleMask<uint32_t>(2, MaskLowTemp, LowHighMask[0]);
388-
scaleShuffleMask<uint32_t>(2, MaskHighTemp, LowHighMask[1]);
385+
createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false);
386+
createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false);
387+
scaleShuffleMask(2, MaskLowTemp, LowHighMask[0]);
388+
scaleShuffleMask(2, MaskHighTemp, LowHighMask[1]);
389389

390390
// IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23
391391
// IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31

llvm/unittests/Analysis/VectorUtilsTest.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ TEST_F(BasicTest, isSplat) {
100100

101101
TEST_F(BasicTest, scaleShuffleMask) {
102102
SmallVector<int, 16> ScaledMask;
103-
scaleShuffleMask<int>(1, {3,2,0,-2}, ScaledMask);
104-
EXPECT_EQ(makeArrayRef<int>(ScaledMask), makeArrayRef<int>({3,2,0,-2}));
105-
scaleShuffleMask<int>(4, {3,2,0,-1}, ScaledMask);
106-
EXPECT_EQ(makeArrayRef<int>(ScaledMask), makeArrayRef<int>({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1}));
103+
scaleShuffleMask(1, {3,2,0,-2}, ScaledMask);
104+
EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({3,2,0,-2}));
105+
scaleShuffleMask(4, {3,2,0,-1}, ScaledMask);
106+
EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1}));
107107
}
108108

109109
TEST_F(BasicTest, getSplatIndex) {

0 commit comments

Comments
 (0)