@@ -267,7 +267,7 @@ define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscal
267
267
; SME2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
268
268
; SME2-NEXT: zip { z0.h - z3.h }, { z0.h - z3.h }
269
269
; SME2-NEXT: ret
270
- %retval = call <vscale x 32 x i16 > @llvm.vector.interleave4.nxv8i16 (<vscale x 8 x i16 > %vec0 , <vscale x 8 x i16 > %vec1 , <vscale x 8 x i16 > %vec2 , <vscale x 8 x i16 > %vec3 )
270
+ %retval = call <vscale x 32 x i16 > @llvm.vector.interleave4.nxv32i16 (<vscale x 8 x i16 > %vec0 , <vscale x 8 x i16 > %vec1 , <vscale x 8 x i16 > %vec2 , <vscale x 8 x i16 > %vec3 )
271
271
ret <vscale x 32 x i16 > %retval
272
272
}
273
273
@@ -540,6 +540,172 @@ define <vscale x 4 x i32> @interleave2_nxv2i32(<vscale x 2 x i32> %vec0, <vscale
540
540
ret <vscale x 4 x i32 > %retval
541
541
}
542
542
543
+ define <vscale x 4 x i16 > @interleave2_same_const_splat_nxv4i16 () {
544
+ ; SVE-LABEL: interleave2_same_const_splat_nxv4i16:
545
+ ; SVE: // %bb.0:
546
+ ; SVE-NEXT: mov z0.d, #3 // =0x3
547
+ ; SVE-NEXT: zip2 z1.d, z0.d, z0.d
548
+ ; SVE-NEXT: zip1 z0.d, z0.d, z0.d
549
+ ; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
550
+ ; SVE-NEXT: ret
551
+ ;
552
+ ; SME2-LABEL: interleave2_same_const_splat_nxv4i16:
553
+ ; SME2: // %bb.0:
554
+ ; SME2-NEXT: mov z0.d, #3 // =0x3
555
+ ; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
556
+ ; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
557
+ ; SME2-NEXT: ret
558
+ %retval = call <vscale x 4 x i16 > @llvm.vector.interleave2.nxv4i16 (<vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 3 ))
559
+ ret <vscale x 4 x i16 > %retval
560
+ }
561
+
562
+ define <vscale x 4 x i16 > @interleave2_diff_const_splat_nxv4i16 () {
563
+ ; SVE-LABEL: interleave2_diff_const_splat_nxv4i16:
564
+ ; SVE: // %bb.0:
565
+ ; SVE-NEXT: mov z0.d, #4 // =0x4
566
+ ; SVE-NEXT: mov z1.d, #3 // =0x3
567
+ ; SVE-NEXT: zip2 z2.d, z1.d, z0.d
568
+ ; SVE-NEXT: zip1 z0.d, z1.d, z0.d
569
+ ; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
570
+ ; SVE-NEXT: ret
571
+ ;
572
+ ; SME2-LABEL: interleave2_diff_const_splat_nxv4i16:
573
+ ; SME2: // %bb.0:
574
+ ; SME2-NEXT: mov z0.d, #4 // =0x4
575
+ ; SME2-NEXT: mov z1.d, #3 // =0x3
576
+ ; SME2-NEXT: zip { z0.d, z1.d }, z1.d, z0.d
577
+ ; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
578
+ ; SME2-NEXT: ret
579
+ %retval = call <vscale x 4 x i16 > @llvm.vector.interleave2.v4i16 (<vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 4 ))
580
+ ret <vscale x 4 x i16 > %retval
581
+ }
582
+
583
+ define <vscale x 4 x i16 > @interleave2_same_nonconst_splat_nxv4i16 (i16 %a ) {
584
+ ; SVE-LABEL: interleave2_same_nonconst_splat_nxv4i16:
585
+ ; SVE: // %bb.0:
586
+ ; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
587
+ ; SVE-NEXT: mov z0.d, x0
588
+ ; SVE-NEXT: zip2 z1.d, z0.d, z0.d
589
+ ; SVE-NEXT: zip1 z0.d, z0.d, z0.d
590
+ ; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
591
+ ; SVE-NEXT: ret
592
+ ;
593
+ ; SME2-LABEL: interleave2_same_nonconst_splat_nxv4i16:
594
+ ; SME2: // %bb.0:
595
+ ; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
596
+ ; SME2-NEXT: mov z0.d, x0
597
+ ; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
598
+ ; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
599
+ ; SME2-NEXT: ret
600
+ %ins = insertelement <vscale x 2 x i16 > poison, i16 %a , i32 0
601
+ %splat = shufflevector <vscale x 2 x i16 > %ins , <vscale x 2 x i16 > poison, <vscale x 2 x i32 > zeroinitializer
602
+ %retval = call <vscale x 4 x i16 > @llvm.vector.interleave2.nxv4i16 (<vscale x 2 x i16 > %splat , <vscale x 2 x i16 > %splat )
603
+ ret <vscale x 4 x i16 > %retval
604
+ }
605
+
606
+ define <vscale x 4 x i16 > @interleave2_diff_nonconst_splat_nxv4i16 (i16 %a , i16 %b ) {
607
+ ; SVE-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
608
+ ; SVE: // %bb.0:
609
+ ; SVE-NEXT: // kill: def $w1 killed $w1 def $x1
610
+ ; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
611
+ ; SVE-NEXT: mov z0.d, x0
612
+ ; SVE-NEXT: mov z1.d, x1
613
+ ; SVE-NEXT: zip2 z2.d, z0.d, z1.d
614
+ ; SVE-NEXT: zip1 z0.d, z0.d, z1.d
615
+ ; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
616
+ ; SVE-NEXT: ret
617
+ ;
618
+ ; SME2-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
619
+ ; SME2: // %bb.0:
620
+ ; SME2-NEXT: // kill: def $w1 killed $w1 def $x1
621
+ ; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
622
+ ; SME2-NEXT: mov z0.d, x0
623
+ ; SME2-NEXT: mov z1.d, x1
624
+ ; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z1.d
625
+ ; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
626
+ ; SME2-NEXT: ret
627
+ %ins1 = insertelement <vscale x 2 x i16 > poison, i16 %a , i32 0
628
+ %splat1 = shufflevector <vscale x 2 x i16 > %ins1 , <vscale x 2 x i16 > poison, <vscale x 2 x i32 > zeroinitializer
629
+ %ins2 = insertelement <vscale x 2 x i16 > poison, i16 %b , i32 0
630
+ %splat2 = shufflevector <vscale x 2 x i16 > %ins2 , <vscale x 2 x i16 > poison, <vscale x 2 x i32 > zeroinitializer
631
+ %retval = call <vscale x 4 x i16 > @llvm.vector.interleave2.nxv4i16 (<vscale x 2 x i16 > %splat1 , <vscale x 2 x i16 > %splat2 )
632
+ ret <vscale x 4 x i16 > %retval
633
+ }
634
+
635
+ define <vscale x 8 x i16 > @interleave4_same_const_splat_nxv8i16 () {
636
+ ; SVE-LABEL: interleave4_same_const_splat_nxv8i16:
637
+ ; SVE: // %bb.0:
638
+ ; SVE-NEXT: mov z0.d, #3 // =0x3
639
+ ; SVE-NEXT: zip1 z1.d, z0.d, z0.d
640
+ ; SVE-NEXT: zip1 z2.d, z1.d, z1.d
641
+ ; SVE-NEXT: zip2 z1.d, z1.d, z1.d
642
+ ; SVE-NEXT: uzp1 z2.s, z2.s, z0.s
643
+ ; SVE-NEXT: uzp1 z2.h, z2.h, z0.h
644
+ ; SVE-NEXT: uunpklo z2.s, z2.h
645
+ ; SVE-NEXT: uunpklo z2.d, z2.s
646
+ ; SVE-NEXT: uzp1 z1.s, z2.s, z1.s
647
+ ; SVE-NEXT: uzp1 z2.h, z1.h, z0.h
648
+ ; SVE-NEXT: zip2 z0.d, z0.d, z0.d
649
+ ; SVE-NEXT: uunpkhi z2.s, z2.h
650
+ ; SVE-NEXT: zip1 z3.d, z0.d, z0.d
651
+ ; SVE-NEXT: zip2 z0.d, z0.d, z0.d
652
+ ; SVE-NEXT: uunpkhi z2.d, z2.s
653
+ ; SVE-NEXT: uzp1 z2.s, z3.s, z2.s
654
+ ; SVE-NEXT: uzp1 z2.h, z1.h, z2.h
655
+ ; SVE-NEXT: uunpkhi z2.s, z2.h
656
+ ; SVE-NEXT: uunpklo z2.d, z2.s
657
+ ; SVE-NEXT: uzp1 z0.s, z2.s, z0.s
658
+ ; SVE-NEXT: uzp1 z0.h, z1.h, z0.h
659
+ ; SVE-NEXT: ret
660
+ ;
661
+ ; SME-ALL-LABEL: interleave4_same_const_splat_nxv8i16:
662
+ ; SME-ALL: // %bb.0:
663
+ ; SME-ALL-NEXT: mov z0.d, #3 // =0x3
664
+ ; SME-ALL-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
665
+ ; SME-ALL-NEXT: zip { z2.d, z3.d }, z0.d, z0.d
666
+ ; SME-ALL-NEXT: uzp1 z4.s, z2.s, z0.s
667
+ ; SME-ALL-NEXT: uzp1 z4.h, z4.h, z0.h
668
+ ; SME-ALL-NEXT: uunpklo z4.s, z4.h
669
+ ; SME-ALL-NEXT: uunpklo z4.d, z4.s
670
+ ; SME-ALL-NEXT: uzp1 z2.s, z4.s, z3.s
671
+ ; SME-ALL-NEXT: uzp1 z3.h, z2.h, z0.h
672
+ ; SME-ALL-NEXT: zip { z0.d, z1.d }, z1.d, z1.d
673
+ ; SME-ALL-NEXT: uunpkhi z3.s, z3.h
674
+ ; SME-ALL-NEXT: uunpkhi z3.d, z3.s
675
+ ; SME-ALL-NEXT: uzp1 z3.s, z0.s, z3.s
676
+ ; SME-ALL-NEXT: uzp1 z3.h, z2.h, z3.h
677
+ ; SME-ALL-NEXT: uunpkhi z3.s, z3.h
678
+ ; SME-ALL-NEXT: uunpklo z3.d, z3.s
679
+ ; SME-ALL-NEXT: uzp1 z0.s, z3.s, z1.s
680
+ ; SME-ALL-NEXT: uzp1 z0.h, z2.h, z0.h
681
+ ; SME-ALL-NEXT: ret
682
+ ;
683
+ ; SME2-256-LABEL: interleave4_same_const_splat_nxv8i16:
684
+ ; SME2-256: // %bb.0:
685
+ ; SME2-256-NEXT: mov z0.d, #3 // =0x3
686
+ ; SME2-256-NEXT: mov z1.d, z0.d
687
+ ; SME2-256-NEXT: mov z2.d, z0.d
688
+ ; SME2-256-NEXT: mov z3.d, z0.d
689
+ ; SME2-256-NEXT: zip { z0.d - z3.d }, { z0.d - z3.d }
690
+ ; SME2-256-NEXT: uzp1 z4.s, z0.s, z0.s
691
+ ; SME2-256-NEXT: uzp1 z4.h, z4.h, z0.h
692
+ ; SME2-256-NEXT: uunpklo z4.s, z4.h
693
+ ; SME2-256-NEXT: uunpklo z4.d, z4.s
694
+ ; SME2-256-NEXT: uzp1 z4.s, z4.s, z1.s
695
+ ; SME2-256-NEXT: uzp1 z5.h, z4.h, z0.h
696
+ ; SME2-256-NEXT: uunpkhi z5.s, z5.h
697
+ ; SME2-256-NEXT: uunpkhi z5.d, z5.s
698
+ ; SME2-256-NEXT: uzp1 z5.s, z2.s, z5.s
699
+ ; SME2-256-NEXT: uzp1 z5.h, z4.h, z5.h
700
+ ; SME2-256-NEXT: uunpkhi z5.s, z5.h
701
+ ; SME2-256-NEXT: uunpklo z5.d, z5.s
702
+ ; SME2-256-NEXT: uzp1 z0.s, z5.s, z3.s
703
+ ; SME2-256-NEXT: uzp1 z0.h, z4.h, z0.h
704
+ ; SME2-256-NEXT: ret
705
+ %retval = call <vscale x 8 x i16 > @llvm.vector.interleave4.nxv8i16 (<vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 3 ))
706
+ ret <vscale x 8 x i16 > %retval
707
+ }
708
+
543
709
; Float declarations
544
710
declare <vscale x 4 x half > @llvm.vector.interleave2.nxv4f16 (<vscale x 2 x half >, <vscale x 2 x half >)
545
711
declare <vscale x 8 x half > @llvm.vector.interleave2.nxv8f16 (<vscale x 4 x half >, <vscale x 4 x half >)
@@ -567,3 +733,5 @@ declare <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64>,
567
733
declare <vscale x 16 x i8 > @llvm.vector.interleave2.nxv16i8 (<vscale x 8 x i8 >, <vscale x 8 x i8 >)
568
734
declare <vscale x 8 x i16 > @llvm.vector.interleave2.nxv8i16 (<vscale x 4 x i16 >, <vscale x 4 x i16 >)
569
735
declare <vscale x 4 x i32 > @llvm.vector.interleave2.nxv4i32 (<vscale x 2 x i32 >, <vscale x 2 x i32 >)
736
+ declare <vscale x 4 x i16 > @llvm.vector.interleave2.nxv4i16 (<vscale x 2 x i16 >, <vscale x 2 x i16 >)
737
+ declare <vscale x 8 x i16 > @llvm.vector.interleave4.nxv8i16 (<vscale x 2 x i16 >, <vscale x 2 x i16 >, <vscale x 2 x i16 >, <vscale x 2 x i16 >)
0 commit comments