Skip to content

Commit 8f1651c

Browse files
committed
[ARM,MVE] Add missing tests for vqdmlash intrinsics.
Summary: These were accidentally left out of D76123. I added tests for the other three instructions in this small cross-product family (vqdmlah, vqrdmlah, vqrdmlash) but missed this one. Reviewers: miyuki Reviewed By: miyuki Subscribers: kristof.beyls, dmgreen, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76714
1 parent 6538b43 commit 8f1651c

File tree

2 files changed

+170
-1
lines changed
  • clang/test/CodeGen/arm-mve-intrinsics
  • llvm/test/CodeGen/Thumb2/mve-intrinsics

2 files changed

+170
-1
lines changed

clang/test/CodeGen/arm-mve-intrinsics/ternary.c

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,47 @@ int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
357357
#endif /* POLYMORPHIC */
358358
}
359359

360+
// CHECK-LABEL: @test_vqdmlashq_n_s8(
361+
// CHECK-NEXT: entry:
362+
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
363+
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 [[TMP0]])
364+
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
365+
//
366+
int8x16_t test_vqdmlashq_n_s8(int8x16_t m1, int8x16_t m2, int8_t add) {
367+
#ifdef POLYMORPHIC
368+
return vqdmlashq(m1, m2, add);
369+
#else /* POLYMORPHIC */
370+
return vqdmlashq_n_s8(m1, m2, add);
371+
#endif /* POLYMORPHIC */
372+
}
373+
374+
// CHECK-LABEL: @test_vqdmlashq_n_s16(
375+
// CHECK-NEXT: entry:
376+
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
377+
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlash.v8i16(<8 x i16> [[M1:%.*]], <8 x i16> [[M2:%.*]], i32 [[TMP0]])
378+
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
379+
//
380+
int16x8_t test_vqdmlashq_n_s16(int16x8_t m1, int16x8_t m2, int16_t add) {
381+
#ifdef POLYMORPHIC
382+
return vqdmlashq(m1, m2, add);
383+
#else /* POLYMORPHIC */
384+
return vqdmlashq_n_s16(m1, m2, add);
385+
#endif /* POLYMORPHIC */
386+
}
387+
388+
// CHECK-LABEL: @test_vqdmlashq_n_s32(
389+
// CHECK-NEXT: entry:
390+
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlash.v4i32(<4 x i32> [[M1:%.*]], <4 x i32> [[M2:%.*]], i32 [[ADD:%.*]])
391+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
392+
//
393+
int32x4_t test_vqdmlashq_n_s32(int32x4_t m1, int32x4_t m2, int32_t add) {
394+
#ifdef POLYMORPHIC
395+
return vqdmlashq(m1, m2, add);
396+
#else /* POLYMORPHIC */
397+
return vqdmlashq_n_s32(m1, m2, add);
398+
#endif /* POLYMORPHIC */
399+
}
400+
360401
// CHECK-LABEL: @test_vqrdmlahq_n_s8(
361402
// CHECK-NEXT: entry:
362403
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
@@ -810,6 +851,53 @@ int32x4_t test_vqdmlahq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_
810851
#endif /* POLYMORPHIC */
811852
}
812853

854+
// CHECK-LABEL: @test_vqdmlashq_m_n_s8(
855+
// CHECK-NEXT: entry:
856+
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
857+
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
858+
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
859+
// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
860+
// CHECK-NEXT: ret <16 x i8> [[TMP3]]
861+
//
862+
int8x16_t test_vqdmlashq_m_n_s8(int8x16_t m1, int8x16_t m2, int8_t add, mve_pred16_t p) {
863+
#ifdef POLYMORPHIC
864+
return vqdmlashq_m(m1, m2, add, p);
865+
#else /* POLYMORPHIC */
866+
return vqdmlashq_m_n_s8(m1, m2, add, p);
867+
#endif /* POLYMORPHIC */
868+
}
869+
870+
// CHECK-LABEL: @test_vqdmlashq_m_n_s16(
871+
// CHECK-NEXT: entry:
872+
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
873+
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
874+
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
875+
// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16> [[M1:%.*]], <8 x i16> [[M2:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
876+
// CHECK-NEXT: ret <8 x i16> [[TMP3]]
877+
//
878+
int16x8_t test_vqdmlashq_m_n_s16(int16x8_t m1, int16x8_t m2, int16_t add, mve_pred16_t p) {
879+
#ifdef POLYMORPHIC
880+
return vqdmlashq_m(m1, m2, add, p);
881+
#else /* POLYMORPHIC */
882+
return vqdmlashq_m_n_s16(m1, m2, add, p);
883+
#endif /* POLYMORPHIC */
884+
}
885+
886+
// CHECK-LABEL: @test_vqdmlashq_m_n_s32(
887+
// CHECK-NEXT: entry:
888+
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
889+
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
890+
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32> [[M1:%.*]], <4 x i32> [[M2:%.*]], i32 [[ADD:%.*]], <4 x i1> [[TMP1]])
891+
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
892+
//
893+
int32x4_t test_vqdmlashq_m_n_s32(int32x4_t m1, int32x4_t m2, int32_t add, mve_pred16_t p) {
894+
#ifdef POLYMORPHIC
895+
return vqdmlashq_m(m1, m2, add, p);
896+
#else /* POLYMORPHIC */
897+
return vqdmlashq_m_n_s32(m1, m2, add, p);
898+
#endif /* POLYMORPHIC */
899+
}
900+
813901
// CHECK-LABEL: @test_vqrdmlahq_m_n_s8(
814902
// CHECK-NEXT: entry:
815903
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
@@ -903,4 +991,3 @@ int32x4_t test_vqrdmlashq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred1
903991
return vqrdmlashq_m_n_s32(a, b, c, p);
904992
#endif /* POLYMORPHIC */
905993
}
906-

llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,38 @@ entry:
295295
ret <4 x i32> %0
296296
}
297297

298+
define arm_aapcs_vfpcc <16 x i8> @test_vqdmlashq_n_s8(<16 x i8> %m1, <16 x i8> %m2, i8 signext %add) {
299+
; CHECK-LABEL: test_vqdmlashq_n_s8:
300+
; CHECK: @ %bb.0: @ %entry
301+
; CHECK-NEXT: vqdmlash.s8 q0, q1, r0
302+
; CHECK-NEXT: bx lr
303+
entry:
304+
%0 = zext i8 %add to i32
305+
%1 = tail call <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8> %m1, <16 x i8> %m2, i32 %0)
306+
ret <16 x i8> %1
307+
}
308+
309+
define arm_aapcs_vfpcc <8 x i16> @test_vqdmlashq_n_s16(<8 x i16> %m1, <8 x i16> %m2, i16 signext %add) {
310+
; CHECK-LABEL: test_vqdmlashq_n_s16:
311+
; CHECK: @ %bb.0: @ %entry
312+
; CHECK-NEXT: vqdmlash.s16 q0, q1, r0
313+
; CHECK-NEXT: bx lr
314+
entry:
315+
%0 = zext i16 %add to i32
316+
%1 = tail call <8 x i16> @llvm.arm.mve.vqdmlash.v8i16(<8 x i16> %m1, <8 x i16> %m2, i32 %0)
317+
ret <8 x i16> %1
318+
}
319+
320+
define arm_aapcs_vfpcc <4 x i32> @test_vqdmlashq_n_s32(<4 x i32> %m1, <4 x i32> %m2, i32 %add) {
321+
; CHECK-LABEL: test_vqdmlashq_n_s32:
322+
; CHECK: @ %bb.0: @ %entry
323+
; CHECK-NEXT: vqdmlash.s32 q0, q1, r0
324+
; CHECK-NEXT: bx lr
325+
entry:
326+
%0 = tail call <4 x i32> @llvm.arm.mve.vqdmlash.v4i32(<4 x i32> %m1, <4 x i32> %m2, i32 %add)
327+
ret <4 x i32> %0
328+
}
329+
298330
define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlahq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
299331
; CHECK-LABEL: test_vqrdmlahq_n_s8:
300332
; CHECK: @ %bb.0: @ %entry
@@ -711,6 +743,50 @@ entry:
711743
ret <4 x i32> %2
712744
}
713745

746+
define arm_aapcs_vfpcc <16 x i8> @test_vqdmlashq_m_n_s8(<16 x i8> %m1, <16 x i8> %m2, i8 signext %add, i16 zeroext %p) {
747+
; CHECK-LABEL: test_vqdmlashq_m_n_s8:
748+
; CHECK: @ %bb.0: @ %entry
749+
; CHECK-NEXT: vmsr p0, r1
750+
; CHECK-NEXT: vpst
751+
; CHECK-NEXT: vqdmlasht.s8 q0, q1, r0
752+
; CHECK-NEXT: bx lr
753+
entry:
754+
%0 = zext i8 %add to i32
755+
%1 = zext i16 %p to i32
756+
%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
757+
%3 = tail call <16 x i8> @llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8> %m1, <16 x i8> %m2, i32 %0, <16 x i1> %2)
758+
ret <16 x i8> %3
759+
}
760+
761+
define arm_aapcs_vfpcc <8 x i16> @test_vqdmlashq_m_n_s16(<8 x i16> %m1, <8 x i16> %m2, i16 signext %add, i16 zeroext %p) {
762+
; CHECK-LABEL: test_vqdmlashq_m_n_s16:
763+
; CHECK: @ %bb.0: @ %entry
764+
; CHECK-NEXT: vmsr p0, r1
765+
; CHECK-NEXT: vpst
766+
; CHECK-NEXT: vqdmlasht.s16 q0, q1, r0
767+
; CHECK-NEXT: bx lr
768+
entry:
769+
%0 = zext i16 %add to i32
770+
%1 = zext i16 %p to i32
771+
%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
772+
%3 = tail call <8 x i16> @llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16> %m1, <8 x i16> %m2, i32 %0, <8 x i1> %2)
773+
ret <8 x i16> %3
774+
}
775+
776+
define arm_aapcs_vfpcc <4 x i32> @test_vqdmlashq_m_n_s32(<4 x i32> %m1, <4 x i32> %m2, i32 %add, i16 zeroext %p) {
777+
; CHECK-LABEL: test_vqdmlashq_m_n_s32:
778+
; CHECK: @ %bb.0: @ %entry
779+
; CHECK-NEXT: vmsr p0, r1
780+
; CHECK-NEXT: vpst
781+
; CHECK-NEXT: vqdmlasht.s32 q0, q1, r0
782+
; CHECK-NEXT: bx lr
783+
entry:
784+
%0 = zext i16 %p to i32
785+
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
786+
%2 = tail call <4 x i32> @llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32> %m1, <4 x i32> %m2, i32 %add, <4 x i1> %1)
787+
ret <4 x i32> %2
788+
}
789+
714790
define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlahq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c, i16 zeroext %p) {
715791
; CHECK-LABEL: test_vqrdmlahq_m_n_s8:
716792
; CHECK: @ %bb.0: @ %entry
@@ -816,6 +892,9 @@ declare <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32>, <4 x i3
816892
declare <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8>, <16 x i8>, i32)
817893
declare <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16>, <8 x i16>, i32)
818894
declare <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32>, <4 x i32>, i32)
895+
declare <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8>, <16 x i8>, i32)
896+
declare <8 x i16> @llvm.arm.mve.vqdmlash.v8i16(<8 x i16>, <8 x i16>, i32)
897+
declare <4 x i32> @llvm.arm.mve.vqdmlash.v4i32(<4 x i32>, <4 x i32>, i32)
819898
declare <16 x i8> @llvm.arm.mve.vqrdmlah.v16i8(<16 x i8>, <16 x i8>, i32)
820899
declare <8 x i16> @llvm.arm.mve.vqrdmlah.v8i16(<8 x i16>, <8 x i16>, i32)
821900
declare <4 x i32> @llvm.arm.mve.vqrdmlah.v4i32(<4 x i32>, <4 x i32>, i32)
@@ -825,6 +904,9 @@ declare <4 x i32> @llvm.arm.mve.vqrdmlash.v4i32(<4 x i32>, <4 x i32>, i32)
825904
declare <16 x i8> @llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
826905
declare <8 x i16> @llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
827906
declare <4 x i32> @llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
907+
declare <16 x i8> @llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
908+
declare <8 x i16> @llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
909+
declare <4 x i32> @llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
828910
declare <16 x i8> @llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
829911
declare <8 x i16> @llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
830912
declare <4 x i32> @llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)

0 commit comments

Comments
 (0)