Skip to content

Commit 26d70f7

Browse files
committed
[X86][SSE] Add support for combining target shuffles to UNPCKL/UNPCKH.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288663 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 55f84f1 commit 26d70f7

File tree

6 files changed

+79
-57
lines changed

6 files changed

+79
-57
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 63 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25846,8 +25846,10 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
2584625846
static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
2584725847
SDValue &V1, SDValue &V2,
2584825848
const X86Subtarget &Subtarget,
25849-
unsigned &Shuffle, MVT &ShuffleVT) {
25849+
unsigned &Shuffle, MVT &ShuffleVT,
25850+
bool IsUnary) {
2585025851
bool FloatDomain = MaskVT.isFloatingPoint();
25852+
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
2585125853

2585225854
if (MaskVT.is128BitVector()) {
2585325855
if (isTargetShuffleEquivalent(Mask, {0, 0}) && FloatDomain) {
@@ -25875,33 +25877,65 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
2587525877
ShuffleVT = MVT::v4f32;
2587625878
return true;
2587725879
}
25878-
if (isTargetShuffleEquivalent(Mask, {0, 0, 1, 1}) && FloatDomain) {
25879-
V2 = V1;
25880-
Shuffle = X86ISD::UNPCKL;
25881-
ShuffleVT = MVT::v4f32;
25882-
return true;
25883-
}
25884-
if (isTargetShuffleEquivalent(Mask, {2, 2, 3, 3}) && FloatDomain) {
25885-
V2 = V1;
25886-
Shuffle = X86ISD::UNPCKH;
25887-
ShuffleVT = MVT::v4f32;
25888-
return true;
25889-
}
25890-
if (isTargetShuffleEquivalent(Mask, {0, 0, 1, 1, 2, 2, 3, 3}) ||
25891-
isTargetShuffleEquivalent(
25892-
Mask, {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7})) {
25893-
V2 = V1;
25894-
Shuffle = X86ISD::UNPCKL;
25895-
ShuffleVT = Mask.size() == 8 ? MVT::v8i16 : MVT::v16i8;
25896-
return true;
25897-
}
25898-
if (isTargetShuffleEquivalent(Mask, {4, 4, 5, 5, 6, 6, 7, 7}) ||
25899-
isTargetShuffleEquivalent(Mask, {8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
25900-
13, 14, 14, 15, 15})) {
25901-
V2 = V1;
25902-
Shuffle = X86ISD::UNPCKH;
25903-
ShuffleVT = Mask.size() == 8 ? MVT::v8i16 : MVT::v16i8;
25904-
return true;
25880+
}
25881+
25882+
// Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle.
25883+
if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||
25884+
(MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
25885+
(MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) ||
25886+
(MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
25887+
(MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
25888+
MVT LegalVT = MaskVT;
25889+
if (LegalVT.is256BitVector() && !Subtarget.hasAVX2())
25890+
LegalVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
25891+
25892+
SmallVector<int, 64> Unpckl, Unpckh;
25893+
if (IsUnary) {
25894+
createUnpackShuffleMask(MaskVT, Unpckl, true, true);
25895+
if (isTargetShuffleEquivalent(Mask, Unpckl)) {
25896+
V2 = V1;
25897+
Shuffle = X86ISD::UNPCKL;
25898+
ShuffleVT = LegalVT;
25899+
return true;
25900+
}
25901+
25902+
createUnpackShuffleMask(MaskVT, Unpckh, false, true);
25903+
if (isTargetShuffleEquivalent(Mask, Unpckh)) {
25904+
V2 = V1;
25905+
Shuffle = X86ISD::UNPCKH;
25906+
ShuffleVT = LegalVT;
25907+
return true;
25908+
}
25909+
} else {
25910+
createUnpackShuffleMask(MaskVT, Unpckl, true, false);
25911+
if (isTargetShuffleEquivalent(Mask, Unpckl)) {
25912+
Shuffle = X86ISD::UNPCKL;
25913+
ShuffleVT = LegalVT;
25914+
return true;
25915+
}
25916+
25917+
createUnpackShuffleMask(MaskVT, Unpckh, false, false);
25918+
if (isTargetShuffleEquivalent(Mask, Unpckh)) {
25919+
Shuffle = X86ISD::UNPCKH;
25920+
ShuffleVT = LegalVT;
25921+
return true;
25922+
}
25923+
25924+
ShuffleVectorSDNode::commuteMask(Unpckl);
25925+
if (isTargetShuffleEquivalent(Mask, Unpckl)) {
25926+
std::swap(V1, V2);
25927+
Shuffle = X86ISD::UNPCKL;
25928+
ShuffleVT = LegalVT;
25929+
return true;
25930+
}
25931+
25932+
ShuffleVectorSDNode::commuteMask(Unpckh);
25933+
if (isTargetShuffleEquivalent(Mask, Unpckh)) {
25934+
std::swap(V1, V2);
25935+
Shuffle = X86ISD::UNPCKH;
25936+
ShuffleVT = LegalVT;
25937+
return true;
25938+
}
2590525939
}
2590625940
}
2590725941

@@ -26167,7 +26201,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
2616726201
}
2616826202

2616926203
if (matchBinaryVectorShuffle(MaskVT, Mask, V1, V2, Subtarget, Shuffle,
26170-
ShuffleVT)) {
26204+
ShuffleVT, UnaryShuffle)) {
2617126205
if (Depth == 1 && Root.getOpcode() == Shuffle)
2617226206
return false; // Nothing to do!
2617326207
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))

test/CodeGen/X86/combine-srem.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,11 @@ define <4 x i32> @combine_vec_srem_by_pos1(<4 x i32> %x) {
5656
; SSE-NEXT: andl $7, %eax
5757
; SSE-NEXT: movd %eax, %xmm2
5858
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
59-
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
6059
; SSE-NEXT: pextrd $1, %xmm0, %eax
6160
; SSE-NEXT: andl $3, %eax
6261
; SSE-NEXT: movd %eax, %xmm0
6362
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
64-
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
63+
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
6564
; SSE-NEXT: retq
6665
;
6766
; AVX-LABEL: combine_vec_srem_by_pos1:
@@ -74,12 +73,11 @@ define <4 x i32> @combine_vec_srem_by_pos1(<4 x i32> %x) {
7473
; AVX-NEXT: andl $7, %eax
7574
; AVX-NEXT: vmovd %eax, %xmm2
7675
; AVX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
77-
; AVX-NEXT: vpbroadcastq %xmm1, %xmm1
7876
; AVX-NEXT: vpextrd $1, %xmm0, %eax
7977
; AVX-NEXT: andl $3, %eax
8078
; AVX-NEXT: vmovd %eax, %xmm0
8179
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
82-
; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
80+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
8381
; AVX-NEXT: retq
8482
%1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
8583
%2 = srem <4 x i32> %1, <i32 1, i32 4, i32 8, i32 16>

test/CodeGen/X86/combine-urem.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,11 @@ define <4 x i32> @combine_vec_urem_by_pow2b(<4 x i32> %x) {
5454
; SSE-NEXT: andl $7, %eax
5555
; SSE-NEXT: movd %eax, %xmm2
5656
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
57-
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
5857
; SSE-NEXT: pextrd $1, %xmm0, %eax
5958
; SSE-NEXT: andl $3, %eax
6059
; SSE-NEXT: movd %eax, %xmm0
6160
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
62-
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
61+
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
6362
; SSE-NEXT: retq
6463
;
6564
; AVX-LABEL: combine_vec_urem_by_pow2b:
@@ -71,12 +70,11 @@ define <4 x i32> @combine_vec_urem_by_pow2b(<4 x i32> %x) {
7170
; AVX-NEXT: andl $7, %eax
7271
; AVX-NEXT: vmovd %eax, %xmm2
7372
; AVX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
74-
; AVX-NEXT: vpbroadcastq %xmm1, %xmm1
7573
; AVX-NEXT: vpextrd $1, %xmm0, %eax
7674
; AVX-NEXT: andl $3, %eax
7775
; AVX-NEXT: vmovd %eax, %xmm0
7876
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
79-
; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
77+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
8078
; AVX-NEXT: retq
8179
%1 = urem <4 x i32> %x, <i32 1, i32 4, i32 8, i32 16>
8280
ret <4 x i32> %1

test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -907,14 +907,12 @@ define <16 x i32> @combine_vpermi2var_16i32_identity(<16 x i32> %x0, <16 x i32>
907907
define <16 x float> @combine_vpermt2var_vpermi2var_16f32_as_unpckhps(<16 x float> %a0, <16 x float> %a1) {
908908
; X32-LABEL: combine_vpermt2var_vpermi2var_16f32_as_unpckhps:
909909
; X32: # BB#0:
910-
; X32-NEXT: vmovaps {{.*#+}} zmm2 = [18,2,19,3,22,6,23,7,26,10,27,11,30,14,31,15]
911-
; X32-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
910+
; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[14],zmm0[14],zmm1[15],zmm0[15]
912911
; X32-NEXT: retl
913912
;
914913
; X64-LABEL: combine_vpermt2var_vpermi2var_16f32_as_unpckhps:
915914
; X64: # BB#0:
916-
; X64-NEXT: vmovaps {{.*#+}} zmm2 = [18,2,19,3,22,6,23,7,26,10,27,11,30,14,31,15]
917-
; X64-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
915+
; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[14],zmm0[14],zmm1[15],zmm0[15]
918916
; X64-NEXT: retq
919917
%res0 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %a0, <16 x i32> <i32 18, i32 2, i32 19, i32 3, i32 22, i32 6, i32 23, i32 7, i32 26, i32 10, i32 27, i32 11, i32 30, i32 14, i32 31, i32 15>, <16 x float> %a1, i16 -1)
920918
ret <16 x float> %res0
@@ -923,14 +921,12 @@ define <16 x float> @combine_vpermt2var_vpermi2var_16f32_as_unpckhps(<16 x float
923921
define <16 x i32> @vpermt2var_vpermi2var_16i32_as_unpckldq(<16 x i32> %a0, <16 x i32> %a1) {
924922
; X32-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq:
925923
; X32: # BB#0:
926-
; X32-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,1,17,4,20,5,21,8,24,9,25,12,28,13,29]
927-
; X32-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
924+
; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
928925
; X32-NEXT: retl
929926
;
930927
; X64-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq:
931928
; X64: # BB#0:
932-
; X64-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,1,17,4,20,5,21,8,24,9,25,12,28,13,29]
933-
; X64-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
929+
; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
934930
; X64-NEXT: retq
935931
%res0 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>, <16 x i32> %a1, i16 -1)
936932
ret <16 x i32> %res0

test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,12 @@ define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_vperm2(<16 x i16> %x0,
7878
define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpckhwd(<16 x i16> %a0, <16 x i16> %a1) {
7979
; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpckhwd:
8080
; X32: # BB#0:
81-
; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [20,4,21,5,22,6,23,7,28,12,29,13,30,14,31,15]
82-
; X32-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
81+
; X32-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
8382
; X32-NEXT: retl
8483
;
8584
; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpckhwd:
8685
; X64: # BB#0:
87-
; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [20,4,21,5,22,6,23,7,28,12,29,13,30,14,31,15]
88-
; X64-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
86+
; X64-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
8987
; X64-NEXT: retq
9088
%res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %a0, <16 x i16> <i16 20, i16 4, i16 21, i16 5, i16 22, i16 6, i16 23, i16 7, i16 28, i16 12, i16 29, i16 13, i16 30, i16 14, i16 31, i16 15>, <16 x i16> %a1, i16 -1)
9189
ret <16 x i16> %res0
@@ -94,14 +92,12 @@ define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpckhwd(<16 x i16> %a
9492
define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpcklwd(<16 x i16> %a0, <16 x i16> %a1) {
9593
; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpcklwd:
9694
; X32: # BB#0:
97-
; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,8,24,9,25,10,26,11,27]
98-
; X32-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
95+
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
9996
; X32-NEXT: retl
10097
;
10198
; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpcklwd:
10299
; X64: # BB#0:
103-
; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,8,24,9,25,10,26,11,27]
104-
; X64-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
100+
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
105101
; X64-NEXT: retq
106102
%res0 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 0, i16 16, i16 1, i16 17, i16 2, i16 18, i16 3, i16 19, i16 8, i16 24, i16 9, i16 25, i16 10, i16 26, i16 11, i16 27>, <16 x i16> %a0, <16 x i16> %a1, i16 -1)
107103
ret <16 x i16> %res0

test/CodeGen/X86/vector-shuffle-combining-xop.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,12 +230,12 @@ define <16 x i8> @combine_vpperm_as_unary_unpckhbw(<16 x i8> %a0, <16 x i8> %a1)
230230
define <16 x i8> @combine_vpperm_as_unpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
231231
; X32-LABEL: combine_vpperm_as_unpckhbw:
232232
; X32: # BB#0:
233-
; X32-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
233+
; X32-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
234234
; X32-NEXT: retl
235235
;
236236
; X64-LABEL: combine_vpperm_as_unpckhbw:
237237
; X64: # BB#0:
238-
; X64-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
238+
; X64-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
239239
; X64-NEXT: retq
240240
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
241241
ret <16 x i8> %res0
@@ -244,12 +244,12 @@ define <16 x i8> @combine_vpperm_as_unpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
244244
define <16 x i8> @combine_vpperm_as_unpcklbw(<16 x i8> %a0, <16 x i8> %a1) {
245245
; X32-LABEL: combine_vpperm_as_unpcklbw:
246246
; X32: # BB#0:
247-
; X32-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
247+
; X32-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
248248
; X32-NEXT: retl
249249
;
250250
; X64-LABEL: combine_vpperm_as_unpcklbw:
251251
; X64: # BB#0:
252-
; X64-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
252+
; X64-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
253253
; X64-NEXT: retq
254254
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 16, i8 0, i8 17, i8 1, i8 18, i8 2, i8 19, i8 3, i8 20, i8 4, i8 21, i8 5, i8 22, i8 6, i8 23, i8 7>)
255255
ret <16 x i8> %res0

0 commit comments

Comments
 (0)