Skip to content

Commit c9eaed5

Browse files
committed
[ARM] MVE VMOV.i64
In the original batch of MVE VMOVimm code generation VMOV.i64 was left out due to the way it was done downstream. It turns out that it's fairly simple though. This adds the codegen for it, similar to NEON. Bigendian is technically incorrect in this version, which John is fixing in a Neon patch.
1 parent b469535 commit c9eaed5

File tree

8 files changed

+60
-366
lines changed

8 files changed

+60
-366
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7181,7 +7181,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
71817181
return DAG.getUNDEF(VT);
71827182

71837183
if ((ST->hasNEON() && SplatBitSize <= 64) ||
7184-
(ST->hasMVEIntegerOps() && SplatBitSize <= 32)) {
7184+
(ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
71857185
// Check if an immediate VMOV works.
71867186
EVT VmovVT;
71877187
SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2389,6 +2389,8 @@ let Predicates = [HasMVEInt] in {
23892389
(v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
23902390
def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
23912391
(v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
2392+
def : Pat<(v2i64 (ARMvmovImm timm:$simm)),
2393+
(v2i64 (MVE_VMOVimmi64 nImmSplatI64:$simm))>;
23922394

23932395
def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
23942396
(v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;

llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -251,21 +251,13 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_zext(<2 x i16*>* %offptr) {
251251
; CHECK-LABEL: ptr_v2i16_zext:
252252
; CHECK: @ %bb.0: @ %entry
253253
; CHECK-NEXT: ldrd r1, r0, [r0]
254-
; CHECK-NEXT: adr r2, .LCPI9_0
254+
; CHECK-NEXT: vmov.i64 q0, #0xffff
255255
; CHECK-NEXT: ldrh r0, [r0]
256-
; CHECK-NEXT: vldrw.u32 q0, [r2]
257256
; CHECK-NEXT: ldrh r1, [r1]
258257
; CHECK-NEXT: vmov.32 q1[0], r1
259258
; CHECK-NEXT: vmov.32 q1[2], r0
260259
; CHECK-NEXT: vand q0, q1, q0
261260
; CHECK-NEXT: bx lr
262-
; CHECK-NEXT: .p2align 4
263-
; CHECK-NEXT: @ %bb.1:
264-
; CHECK-NEXT: .LCPI9_0:
265-
; CHECK-NEXT: .long 65535 @ 0xffff
266-
; CHECK-NEXT: .long 0 @ 0x0
267-
; CHECK-NEXT: .long 65535 @ 0xffff
268-
; CHECK-NEXT: .long 0 @ 0x0
269261
entry:
270262
%offs = load <2 x i16*>, <2 x i16*>* %offptr, align 4
271263
%gather = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> %offs, i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> undef)

llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll

Lines changed: 20 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
408408
; CHECK-LE-NEXT: ldrd lr, r12, [r1]
409409
; CHECK-LE-NEXT: movs r1, #0
410410
; CHECK-LE-NEXT: @ implicit-def: $q1
411+
; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff
411412
; CHECK-LE-NEXT: rsbs.w r3, lr, #0
412413
; CHECK-LE-NEXT: vmov.32 q0[0], lr
413414
; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31
@@ -424,23 +425,21 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
424425
; CHECK-LE-NEXT: bfi r1, lr, #0, #1
425426
; CHECK-LE-NEXT: vmov.32 q0[2], r12
426427
; CHECK-LE-NEXT: and r3, r1, #3
427-
; CHECK-LE-NEXT: adr.w r12, .LCPI7_0
428+
; CHECK-LE-NEXT: mov.w r12, #0
428429
; CHECK-LE-NEXT: lsls r1, r1, #31
429430
; CHECK-LE-NEXT: itt ne
430431
; CHECK-LE-NEXT: ldrne r1, [r2]
431432
; CHECK-LE-NEXT: vmovne.32 q1[0], r1
432433
; CHECK-LE-NEXT: lsls r1, r3, #30
433-
; CHECK-LE-NEXT: vmov r3, s0
434434
; CHECK-LE-NEXT: itt mi
435435
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
436436
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
437+
; CHECK-LE-NEXT: vmov r1, s0
437438
; CHECK-LE-NEXT: movs r2, #0
438-
; CHECK-LE-NEXT: vldrw.u32 q2, [r12]
439-
; CHECK-LE-NEXT: mov.w r12, #0
440439
; CHECK-LE-NEXT: vand q1, q1, q2
441-
; CHECK-LE-NEXT: rsbs r1, r3, #0
442-
; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31
440+
; CHECK-LE-NEXT: rsbs r3, r1, #0
443441
; CHECK-LE-NEXT: vmov r3, s2
442+
; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31
444443
; CHECK-LE-NEXT: it lt
445444
; CHECK-LE-NEXT: movlt.w r12, #1
446445
; CHECK-LE-NEXT: rsbs r1, r3, #0
@@ -460,13 +459,6 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
460459
; CHECK-LE-NEXT: vstrmi d3, [r0, #8]
461460
; CHECK-LE-NEXT: add sp, #4
462461
; CHECK-LE-NEXT: pop {r7, pc}
463-
; CHECK-LE-NEXT: .p2align 4
464-
; CHECK-LE-NEXT: @ %bb.1:
465-
; CHECK-LE-NEXT: .LCPI7_0:
466-
; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff
467-
; CHECK-LE-NEXT: .long 0 @ 0x0
468-
; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff
469-
; CHECK-LE-NEXT: .long 0 @ 0x0
470462
;
471463
; CHECK-BE-LABEL: foo_zext_v2i64_v2i32:
472464
; CHECK-BE: @ %bb.0: @ %entry
@@ -511,15 +503,13 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
511503
; CHECK-BE-NEXT: .LBB7_4: @ %else2
512504
; CHECK-BE-NEXT: vrev64.32 q3, q2
513505
; CHECK-BE-NEXT: movs r2, #0
514-
; CHECK-BE-NEXT: vmov r3, s15
515-
; CHECK-BE-NEXT: adr.w r12, .LCPI7_0
516-
; CHECK-BE-NEXT: vldrb.u8 q0, [r12]
506+
; CHECK-BE-NEXT: vmov r1, s15
517507
; CHECK-BE-NEXT: mov.w r12, #0
518-
; CHECK-BE-NEXT: vrev64.8 q2, q0
519-
; CHECK-BE-NEXT: vand q0, q1, q2
520-
; CHECK-BE-NEXT: rsbs r1, r3, #0
521-
; CHECK-BE-NEXT: sbcs.w r1, r2, r3, asr #31
508+
; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff
509+
; CHECK-BE-NEXT: vand q0, q1, q0
510+
; CHECK-BE-NEXT: rsbs r3, r1, #0
522511
; CHECK-BE-NEXT: vmov r3, s13
512+
; CHECK-BE-NEXT: sbcs.w r1, r2, r1, asr #31
523513
; CHECK-BE-NEXT: it lt
524514
; CHECK-BE-NEXT: movlt.w r12, #1
525515
; CHECK-BE-NEXT: rsbs r1, r3, #0
@@ -539,13 +529,6 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32>
539529
; CHECK-BE-NEXT: vstrmi d1, [r0, #8]
540530
; CHECK-BE-NEXT: add sp, #4
541531
; CHECK-BE-NEXT: pop {r7, pc}
542-
; CHECK-BE-NEXT: .p2align 4
543-
; CHECK-BE-NEXT: @ %bb.5:
544-
; CHECK-BE-NEXT: .LCPI7_0:
545-
; CHECK-BE-NEXT: .long 0 @ 0x0
546-
; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff
547-
; CHECK-BE-NEXT: .long 0 @ 0x0
548-
; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff
549532
entry:
550533
%0 = load <2 x i32>, <2 x i32>* %mask, align 4
551534
%1 = icmp sgt <2 x i32> %0, zeroinitializer
@@ -565,6 +548,7 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
565548
; CHECK-LE-NEXT: ldrd lr, r12, [r1]
566549
; CHECK-LE-NEXT: movs r1, #0
567550
; CHECK-LE-NEXT: @ implicit-def: $q1
551+
; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff
568552
; CHECK-LE-NEXT: rsbs.w r3, lr, #0
569553
; CHECK-LE-NEXT: vmov.32 q0[0], lr
570554
; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31
@@ -581,23 +565,21 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
581565
; CHECK-LE-NEXT: bfi r1, lr, #0, #1
582566
; CHECK-LE-NEXT: vmov.32 q0[2], r12
583567
; CHECK-LE-NEXT: and r3, r1, #3
584-
; CHECK-LE-NEXT: adr.w r12, .LCPI8_0
568+
; CHECK-LE-NEXT: mov.w r12, #0
585569
; CHECK-LE-NEXT: lsls r1, r1, #31
586570
; CHECK-LE-NEXT: itt ne
587571
; CHECK-LE-NEXT: ldrne r1, [r2]
588572
; CHECK-LE-NEXT: vmovne.32 q1[0], r1
589573
; CHECK-LE-NEXT: lsls r1, r3, #30
590-
; CHECK-LE-NEXT: vmov r3, s0
591574
; CHECK-LE-NEXT: itt mi
592575
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
593576
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
577+
; CHECK-LE-NEXT: vmov r1, s0
594578
; CHECK-LE-NEXT: movs r2, #0
595-
; CHECK-LE-NEXT: vldrw.u32 q2, [r12]
596-
; CHECK-LE-NEXT: mov.w r12, #0
597579
; CHECK-LE-NEXT: vand q1, q1, q2
598-
; CHECK-LE-NEXT: rsbs r1, r3, #0
599-
; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31
580+
; CHECK-LE-NEXT: rsbs r3, r1, #0
600581
; CHECK-LE-NEXT: vmov r3, s2
582+
; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31
601583
; CHECK-LE-NEXT: it lt
602584
; CHECK-LE-NEXT: movlt.w r12, #1
603585
; CHECK-LE-NEXT: rsbs r1, r3, #0
@@ -619,13 +601,6 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
619601
; CHECK-LE-NEXT: strdmi r1, r2, [r0, #8]
620602
; CHECK-LE-NEXT: add sp, #4
621603
; CHECK-LE-NEXT: pop {r7, pc}
622-
; CHECK-LE-NEXT: .p2align 4
623-
; CHECK-LE-NEXT: @ %bb.1:
624-
; CHECK-LE-NEXT: .LCPI8_0:
625-
; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff
626-
; CHECK-LE-NEXT: .long 0 @ 0x0
627-
; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff
628-
; CHECK-LE-NEXT: .long 0 @ 0x0
629604
;
630605
; CHECK-BE-LABEL: foo_zext_v2i64_v2i32_unaligned:
631606
; CHECK-BE: @ %bb.0: @ %entry
@@ -670,15 +645,13 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
670645
; CHECK-BE-NEXT: .LBB8_4: @ %else2
671646
; CHECK-BE-NEXT: vrev64.32 q3, q2
672647
; CHECK-BE-NEXT: movs r2, #0
673-
; CHECK-BE-NEXT: vmov r3, s15
674-
; CHECK-BE-NEXT: adr.w r12, .LCPI8_0
675-
; CHECK-BE-NEXT: vldrb.u8 q0, [r12]
648+
; CHECK-BE-NEXT: vmov r1, s15
676649
; CHECK-BE-NEXT: mov.w r12, #0
677-
; CHECK-BE-NEXT: vrev64.8 q2, q0
678-
; CHECK-BE-NEXT: vand q0, q1, q2
679-
; CHECK-BE-NEXT: rsbs r1, r3, #0
680-
; CHECK-BE-NEXT: sbcs.w r1, r2, r3, asr #31
650+
; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff
651+
; CHECK-BE-NEXT: vand q0, q1, q0
652+
; CHECK-BE-NEXT: rsbs r3, r1, #0
681653
; CHECK-BE-NEXT: vmov r3, s13
654+
; CHECK-BE-NEXT: sbcs.w r1, r2, r1, asr #31
682655
; CHECK-BE-NEXT: it lt
683656
; CHECK-BE-NEXT: movlt.w r12, #1
684657
; CHECK-BE-NEXT: rsbs r1, r3, #0
@@ -700,13 +673,6 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask,
700673
; CHECK-BE-NEXT: strdmi r2, r1, [r0, #8]
701674
; CHECK-BE-NEXT: add sp, #4
702675
; CHECK-BE-NEXT: pop {r7, pc}
703-
; CHECK-BE-NEXT: .p2align 4
704-
; CHECK-BE-NEXT: @ %bb.5:
705-
; CHECK-BE-NEXT: .LCPI8_0:
706-
; CHECK-BE-NEXT: .long 0 @ 0x0
707-
; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff
708-
; CHECK-BE-NEXT: .long 0 @ 0x0
709-
; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff
710676
entry:
711677
%0 = load <2 x i32>, <2 x i32>* %mask, align 4
712678
%1 = icmp sgt <2 x i32> %0, zeroinitializer

llvm/test/CodeGen/Thumb2/mve-sext.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -430,17 +430,9 @@ entry:
430430
define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
431431
; CHECK-LABEL: zext_v2i32_v2i64:
432432
; CHECK: @ %bb.0: @ %entry
433-
; CHECK-NEXT: adr r0, .LCPI20_0
434-
; CHECK-NEXT: vldrw.u32 q1, [r0]
433+
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
435434
; CHECK-NEXT: vand q0, q0, q1
436435
; CHECK-NEXT: bx lr
437-
; CHECK-NEXT: .p2align 4
438-
; CHECK-NEXT: @ %bb.1:
439-
; CHECK-NEXT: .LCPI20_0:
440-
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
441-
; CHECK-NEXT: .long 0 @ 0x0
442-
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
443-
; CHECK-NEXT: .long 0 @ 0x0
444436
entry:
445437
%0 = zext <2 x i32> %src to <2 x i64>
446438
ret <2 x i64> %0

0 commit comments

Comments
 (0)