Skip to content

Commit 17e91b7

Browse files
committed
[X86][SSE] combineBitcastvxi1 - add pre-AVX512 v64i1 handling
1 parent eee6a45 commit 17e91b7

File tree

3 files changed

+43
-773
lines changed

3 files changed

+43
-773
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36441,7 +36441,14 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
3644136441
case MVT::v64i1:
3644236442
// If we have AVX512F, but not AVX512BW and the input is truncated from
3644336443
// v64i8 checked earlier. Then split the input and make two pmovmskbs.
36444-
if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) {
36444+
if (Subtarget.hasAVX512()) {
36445+
if (Subtarget.hasBWI())
36446+
return SDValue();
36447+
SExtVT = MVT::v64i8;
36448+
break;
36449+
}
36450+
// Split if this is a <64 x i8> comparison result.
36451+
if (checkBitcastSrcVectorSize(Src, 512)) {
3644536452
SExtVT = MVT::v64i8;
3644636453
break;
3644736454
}

llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -534,28 +534,28 @@ define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x floa
534534
define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
535535
; SSE-LABEL: v64i8:
536536
; SSE: # %bb.0:
537-
; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
538-
; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
539537
; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
540538
; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
541-
; SSE-NEXT: pcmpgtb %xmm7, %xmm3
542-
; SSE-NEXT: pcmpgtb %xmm6, %xmm2
543-
; SSE-NEXT: pcmpgtb %xmm5, %xmm1
539+
; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
540+
; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
544541
; SSE-NEXT: pcmpgtb %xmm4, %xmm0
545-
; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
546-
; SSE-NEXT: pand %xmm3, %xmm9
547-
; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
548-
; SSE-NEXT: pand %xmm2, %xmm8
542+
; SSE-NEXT: pcmpgtb %xmm5, %xmm1
543+
; SSE-NEXT: pcmpgtb %xmm6, %xmm2
544+
; SSE-NEXT: pcmpgtb %xmm7, %xmm3
549545
; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
550-
; SSE-NEXT: pand %xmm1, %xmm11
546+
; SSE-NEXT: pand %xmm0, %xmm11
551547
; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
552-
; SSE-NEXT: pand %xmm0, %xmm10
553-
; SSE-NEXT: pmovmskb %xmm10, %eax
554-
; SSE-NEXT: pmovmskb %xmm11, %ecx
548+
; SSE-NEXT: pand %xmm1, %xmm10
549+
; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
550+
; SSE-NEXT: pand %xmm2, %xmm9
551+
; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
552+
; SSE-NEXT: pand %xmm3, %xmm8
553+
; SSE-NEXT: pmovmskb %xmm11, %eax
554+
; SSE-NEXT: pmovmskb %xmm10, %ecx
555555
; SSE-NEXT: shll $16, %ecx
556556
; SSE-NEXT: orl %eax, %ecx
557-
; SSE-NEXT: pmovmskb %xmm8, %edx
558-
; SSE-NEXT: pmovmskb %xmm9, %eax
557+
; SSE-NEXT: pmovmskb %xmm9, %edx
558+
; SSE-NEXT: pmovmskb %xmm8, %eax
559559
; SSE-NEXT: shll $16, %eax
560560
; SSE-NEXT: orl %edx, %eax
561561
; SSE-NEXT: shlq $32, %rax
@@ -599,12 +599,12 @@ define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
599599
;
600600
; AVX2-LABEL: v64i8:
601601
; AVX2: # %bb.0:
602-
; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
603602
; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
604-
; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
605-
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
603+
; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
606604
; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
607605
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
606+
; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
607+
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
608608
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
609609
; AVX2-NEXT: vpmovmskb %ymm1, %eax
610610
; AVX2-NEXT: shlq $32, %rax

0 commit comments

Comments
 (0)