@@ -295,6 +295,38 @@ entry:
295
295
ret <4 x i32 > %0
296
296
}
297
297
298
+ define arm_aapcs_vfpcc <16 x i8 > @test_vqdmlashq_n_s8 (<16 x i8 > %m1 , <16 x i8 > %m2 , i8 signext %add ) {
299
+ ; CHECK-LABEL: test_vqdmlashq_n_s8:
300
+ ; CHECK: @ %bb.0: @ %entry
301
+ ; CHECK-NEXT: vqdmlash.s8 q0, q1, r0
302
+ ; CHECK-NEXT: bx lr
303
+ entry:
304
+ %0 = zext i8 %add to i32
305
+ %1 = tail call <16 x i8 > @llvm.arm.mve.vqdmlash.v16i8 (<16 x i8 > %m1 , <16 x i8 > %m2 , i32 %0 )
306
+ ret <16 x i8 > %1
307
+ }
308
+
309
+ define arm_aapcs_vfpcc <8 x i16 > @test_vqdmlashq_n_s16 (<8 x i16 > %m1 , <8 x i16 > %m2 , i16 signext %add ) {
310
+ ; CHECK-LABEL: test_vqdmlashq_n_s16:
311
+ ; CHECK: @ %bb.0: @ %entry
312
+ ; CHECK-NEXT: vqdmlash.s16 q0, q1, r0
313
+ ; CHECK-NEXT: bx lr
314
+ entry:
315
+ %0 = zext i16 %add to i32
316
+ %1 = tail call <8 x i16 > @llvm.arm.mve.vqdmlash.v8i16 (<8 x i16 > %m1 , <8 x i16 > %m2 , i32 %0 )
317
+ ret <8 x i16 > %1
318
+ }
319
+
320
+ define arm_aapcs_vfpcc <4 x i32 > @test_vqdmlashq_n_s32 (<4 x i32 > %m1 , <4 x i32 > %m2 , i32 %add ) {
321
+ ; CHECK-LABEL: test_vqdmlashq_n_s32:
322
+ ; CHECK: @ %bb.0: @ %entry
323
+ ; CHECK-NEXT: vqdmlash.s32 q0, q1, r0
324
+ ; CHECK-NEXT: bx lr
325
+ entry:
326
+ %0 = tail call <4 x i32 > @llvm.arm.mve.vqdmlash.v4i32 (<4 x i32 > %m1 , <4 x i32 > %m2 , i32 %add )
327
+ ret <4 x i32 > %0
328
+ }
329
+
298
330
define arm_aapcs_vfpcc <16 x i8 > @test_vqrdmlahq_n_s8 (<16 x i8 > %a , <16 x i8 > %b , i8 signext %c ) {
299
331
; CHECK-LABEL: test_vqrdmlahq_n_s8:
300
332
; CHECK: @ %bb.0: @ %entry
@@ -711,6 +743,50 @@ entry:
711
743
ret <4 x i32 > %2
712
744
}
713
745
746
+ define arm_aapcs_vfpcc <16 x i8 > @test_vqdmlashq_m_n_s8 (<16 x i8 > %m1 , <16 x i8 > %m2 , i8 signext %add , i16 zeroext %p ) {
747
+ ; CHECK-LABEL: test_vqdmlashq_m_n_s8:
748
+ ; CHECK: @ %bb.0: @ %entry
749
+ ; CHECK-NEXT: vmsr p0, r1
750
+ ; CHECK-NEXT: vpst
751
+ ; CHECK-NEXT: vqdmlasht.s8 q0, q1, r0
752
+ ; CHECK-NEXT: bx lr
753
+ entry:
754
+ %0 = zext i8 %add to i32
755
+ %1 = zext i16 %p to i32
756
+ %2 = tail call <16 x i1 > @llvm.arm.mve.pred.i2v.v16i1 (i32 %1 )
757
+ %3 = tail call <16 x i8 > @llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1 (<16 x i8 > %m1 , <16 x i8 > %m2 , i32 %0 , <16 x i1 > %2 )
758
+ ret <16 x i8 > %3
759
+ }
760
+
761
+ define arm_aapcs_vfpcc <8 x i16 > @test_vqdmlashq_m_n_s16 (<8 x i16 > %m1 , <8 x i16 > %m2 , i16 signext %add , i16 zeroext %p ) {
762
+ ; CHECK-LABEL: test_vqdmlashq_m_n_s16:
763
+ ; CHECK: @ %bb.0: @ %entry
764
+ ; CHECK-NEXT: vmsr p0, r1
765
+ ; CHECK-NEXT: vpst
766
+ ; CHECK-NEXT: vqdmlasht.s16 q0, q1, r0
767
+ ; CHECK-NEXT: bx lr
768
+ entry:
769
+ %0 = zext i16 %add to i32
770
+ %1 = zext i16 %p to i32
771
+ %2 = tail call <8 x i1 > @llvm.arm.mve.pred.i2v.v8i1 (i32 %1 )
772
+ %3 = tail call <8 x i16 > @llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1 (<8 x i16 > %m1 , <8 x i16 > %m2 , i32 %0 , <8 x i1 > %2 )
773
+ ret <8 x i16 > %3
774
+ }
775
+
776
+ define arm_aapcs_vfpcc <4 x i32 > @test_vqdmlashq_m_n_s32 (<4 x i32 > %m1 , <4 x i32 > %m2 , i32 %add , i16 zeroext %p ) {
777
+ ; CHECK-LABEL: test_vqdmlashq_m_n_s32:
778
+ ; CHECK: @ %bb.0: @ %entry
779
+ ; CHECK-NEXT: vmsr p0, r1
780
+ ; CHECK-NEXT: vpst
781
+ ; CHECK-NEXT: vqdmlasht.s32 q0, q1, r0
782
+ ; CHECK-NEXT: bx lr
783
+ entry:
784
+ %0 = zext i16 %p to i32
785
+ %1 = tail call <4 x i1 > @llvm.arm.mve.pred.i2v.v4i1 (i32 %0 )
786
+ %2 = tail call <4 x i32 > @llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1 (<4 x i32 > %m1 , <4 x i32 > %m2 , i32 %add , <4 x i1 > %1 )
787
+ ret <4 x i32 > %2
788
+ }
789
+
714
790
define arm_aapcs_vfpcc <16 x i8 > @test_vqrdmlahq_m_n_s8 (<16 x i8 > %a , <16 x i8 > %b , i8 signext %c , i16 zeroext %p ) {
715
791
; CHECK-LABEL: test_vqrdmlahq_m_n_s8:
716
792
; CHECK: @ %bb.0: @ %entry
@@ -816,6 +892,9 @@ declare <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32>, <4 x i3
816
892
declare <16 x i8 > @llvm.arm.mve.vqdmlah.v16i8 (<16 x i8 >, <16 x i8 >, i32 )
817
893
declare <8 x i16 > @llvm.arm.mve.vqdmlah.v8i16 (<8 x i16 >, <8 x i16 >, i32 )
818
894
declare <4 x i32 > @llvm.arm.mve.vqdmlah.v4i32 (<4 x i32 >, <4 x i32 >, i32 )
895
+ declare <16 x i8 > @llvm.arm.mve.vqdmlash.v16i8 (<16 x i8 >, <16 x i8 >, i32 )
896
+ declare <8 x i16 > @llvm.arm.mve.vqdmlash.v8i16 (<8 x i16 >, <8 x i16 >, i32 )
897
+ declare <4 x i32 > @llvm.arm.mve.vqdmlash.v4i32 (<4 x i32 >, <4 x i32 >, i32 )
819
898
declare <16 x i8 > @llvm.arm.mve.vqrdmlah.v16i8 (<16 x i8 >, <16 x i8 >, i32 )
820
899
declare <8 x i16 > @llvm.arm.mve.vqrdmlah.v8i16 (<8 x i16 >, <8 x i16 >, i32 )
821
900
declare <4 x i32 > @llvm.arm.mve.vqrdmlah.v4i32 (<4 x i32 >, <4 x i32 >, i32 )
@@ -825,6 +904,9 @@ declare <4 x i32> @llvm.arm.mve.vqrdmlash.v4i32(<4 x i32>, <4 x i32>, i32)
825
904
declare <16 x i8 > @llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1 (<16 x i8 >, <16 x i8 >, i32 , <16 x i1 >)
826
905
declare <8 x i16 > @llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1 (<8 x i16 >, <8 x i16 >, i32 , <8 x i1 >)
827
906
declare <4 x i32 > @llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1 (<4 x i32 >, <4 x i32 >, i32 , <4 x i1 >)
907
+ declare <16 x i8 > @llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1 (<16 x i8 >, <16 x i8 >, i32 , <16 x i1 >)
908
+ declare <8 x i16 > @llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1 (<8 x i16 >, <8 x i16 >, i32 , <8 x i1 >)
909
+ declare <4 x i32 > @llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1 (<4 x i32 >, <4 x i32 >, i32 , <4 x i1 >)
828
910
declare <16 x i8 > @llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1 (<16 x i8 >, <16 x i8 >, i32 , <16 x i1 >)
829
911
declare <8 x i16 > @llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1 (<8 x i16 >, <8 x i16 >, i32 , <8 x i1 >)
830
912
declare <4 x i32 > @llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1 (<4 x i32 >, <4 x i32 >, i32 , <4 x i1 >)
0 commit comments