@@ -2680,9 +2680,12 @@ define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) {
2680
2680
2681
2681
define <8 x i16 > @sse2_psrai_w_128_masked (<8 x i16 > %v , i32 %a ) {
2682
2682
; CHECK-LABEL: @sse2_psrai_w_128_masked(
2683
- ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 15
2684
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[V:%.*]], i32 [[TMP1]])
2685
- ; CHECK-NEXT: ret <8 x i16> [[TMP2]]
2683
+ ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2684
+ ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
2685
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
2686
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
2687
+ ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]]
2688
+ ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2686
2689
;
2687
2690
%1 = and i32 %a , 15
2688
2691
%2 = tail call <8 x i16 > @llvm.x86.sse2.psrai.w (<8 x i16 > %v , i32 %1 )
@@ -2692,7 +2695,9 @@ define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) {
2692
2695
define <8 x i32 > @avx2_psrai_d_256_masked (<8 x i32 > %v , i32 %a ) {
2693
2696
; CHECK-LABEL: @avx2_psrai_d_256_masked(
2694
2697
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2695
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[V:%.*]], i32 [[TMP1]])
2698
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
2699
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
2700
+ ; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]]
2696
2701
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
2697
2702
;
2698
2703
%1 = and i32 %a , 31
@@ -2703,8 +2708,11 @@ define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) {
2703
2708
define <8 x i64 > @avx512_psrai_q_512_masked (<8 x i64 > %v , i32 %a ) {
2704
2709
; CHECK-LABEL: @avx512_psrai_q_512_masked(
2705
2710
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2706
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[V:%.*]], i32 [[TMP1]])
2707
- ; CHECK-NEXT: ret <8 x i64> [[TMP2]]
2711
+ ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
2712
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0
2713
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
2714
+ ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]]
2715
+ ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2708
2716
;
2709
2717
%1 = and i32 %a , 63
2710
2718
%2 = tail call <8 x i64 > @llvm.x86.avx512.psrai.q.512 (<8 x i64 > %v , i32 %1 )
@@ -2714,7 +2722,9 @@ define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) {
2714
2722
define <4 x i32 > @sse2_psrli_d_128_masked (<4 x i32 > %v , i32 %a ) {
2715
2723
; CHECK-LABEL: @sse2_psrli_d_128_masked(
2716
2724
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2717
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[V:%.*]], i32 [[TMP1]])
2725
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
2726
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
2727
+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]]
2718
2728
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
2719
2729
;
2720
2730
%1 = and i32 %a , 31
@@ -2725,8 +2735,11 @@ define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) {
2725
2735
define <4 x i64 > @avx2_psrli_q_256_masked (<4 x i64 > %v , i32 %a ) {
2726
2736
; CHECK-LABEL: @avx2_psrli_q_256_masked(
2727
2737
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2728
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[V:%.*]], i32 [[TMP1]])
2729
- ; CHECK-NEXT: ret <4 x i64> [[TMP2]]
2738
+ ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
2739
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP2]], i32 0
2740
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
2741
+ ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]]
2742
+ ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
2730
2743
;
2731
2744
%1 = and i32 %a , 63
2732
2745
%2 = tail call <4 x i64 > @llvm.x86.avx2.psrli.q (<4 x i64 > %v , i32 %1 )
@@ -2735,9 +2748,12 @@ define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) {
2735
2748
2736
2749
define <32 x i16 > @avx512_psrli_w_512_masked (<32 x i16 > %v , i32 %a ) {
2737
2750
; CHECK-LABEL: @avx512_psrli_w_512_masked(
2738
- ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 15
2739
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[V:%.*]], i32 [[TMP1]])
2740
- ; CHECK-NEXT: ret <32 x i16> [[TMP2]]
2751
+ ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2752
+ ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
2753
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> undef, i16 [[TMP2]], i32 0
2754
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> undef, <32 x i32> zeroinitializer
2755
+ ; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]]
2756
+ ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2741
2757
;
2742
2758
%1 = and i32 %a , 15
2743
2759
%2 = tail call <32 x i16 > @llvm.x86.avx512.psrli.w.512 (<32 x i16 > %v , i32 %1 )
@@ -2747,8 +2763,11 @@ define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) {
2747
2763
define <2 x i64 > @sse2_pslli_q_128_masked (<2 x i64 > %v , i32 %a ) {
2748
2764
; CHECK-LABEL: @sse2_pslli_q_128_masked(
2749
2765
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2750
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[V:%.*]], i32 [[TMP1]])
2751
- ; CHECK-NEXT: ret <2 x i64> [[TMP2]]
2766
+ ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
2767
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
2768
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer
2769
+ ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]]
2770
+ ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
2752
2771
;
2753
2772
%1 = and i32 %a , 63
2754
2773
%2 = tail call <2 x i64 > @llvm.x86.sse2.pslli.q (<2 x i64 > %v , i32 %1 )
@@ -2757,9 +2776,12 @@ define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) {
2757
2776
2758
2777
define <16 x i16 > @avx2_pslli_w_256_masked (<16 x i16 > %v , i32 %a ) {
2759
2778
; CHECK-LABEL: @avx2_pslli_w_256_masked(
2760
- ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 15
2761
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[V:%.*]], i32 [[TMP1]])
2762
- ; CHECK-NEXT: ret <16 x i16> [[TMP2]]
2779
+ ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2780
+ ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
2781
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> undef, i16 [[TMP2]], i32 0
2782
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> undef, <16 x i32> zeroinitializer
2783
+ ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]]
2784
+ ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2763
2785
;
2764
2786
%1 = and i32 %a , 15
2765
2787
%2 = tail call <16 x i16 > @llvm.x86.avx2.pslli.w (<16 x i16 > %v , i32 %1 )
@@ -2769,7 +2791,9 @@ define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) {
2769
2791
define <16 x i32 > @avx512_pslli_d_512_masked (<16 x i32 > %v , i32 %a ) {
2770
2792
; CHECK-LABEL: @avx512_pslli_d_512_masked(
2771
2793
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2772
- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[V:%.*]], i32 [[TMP1]])
2794
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP1]], i32 0
2795
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer
2796
+ ; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]]
2773
2797
; CHECK-NEXT: ret <16 x i32> [[TMP2]]
2774
2798
;
2775
2799
%1 = and i32 %a , 31
0 commit comments