Skip to content

Commit 94b195f

Browse files
committed
[ARM][LowOverheadLoops] Add horizontal reduction support
Add a bit more logic into the 'FalseLaneZeros' tracking to enable horizontal reductions and also make the VADDV variants validForTailPredication. Differential Revision: https://reviews.llvm.org/D76708
1 parent f757ecb commit 94b195f

File tree

4 files changed

+76
-109
lines changed

4 files changed

+76
-109
lines changed

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
607607
let Inst{3-1} = Qm{2-0};
608608
let Inst{0} = 0b0;
609609
let horizontalReduction = 1;
610+
let validForTailPredication = 1;
610611
}
611612

612613
def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>;

llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,12 @@ static bool producesDoubleWidthResult(const MachineInstr &MI) {
538538
return (Flags & ARMII::DoubleWidthResult) != 0;
539539
}
540540

541+
static bool isHorizontalReduction(const MachineInstr &MI) {
542+
const MCInstrDesc &MCID = MI.getDesc();
543+
uint64_t Flags = MCID.TSFlags;
544+
return (Flags & ARMII::HorizontalReduction) != 0;
545+
}
546+
541547
// Can this instruction generate a non-zero result when given only zeroed
542548
// operands? This allows us to know that, given operands with false bytes
543549
// zeroed by masked loads, that the result will also contain zeros in those
@@ -569,20 +575,24 @@ static bool canGenerateNonZeros(const MachineInstr &MI) {
569575

570576
// Look at its register uses to see if it only can only receive zeros
571577
// into its false lanes which would then produce zeros. Also check that
572-
// the output register is also defined by an FalseLaneZeros instruction
578+
// the output register is also defined by an FalseLanesZero instruction
573579
// so that if tail-predication happens, the lanes that aren't updated will
574580
// still be zeros.
575-
static bool producesFalseLaneZeros(MachineInstr &MI,
581+
static bool producesFalseLanesZero(MachineInstr &MI,
576582
const TargetRegisterClass *QPRs,
577583
const ReachingDefAnalysis &RDA,
578-
InstSet &FalseLaneZeros) {
584+
InstSet &FalseLanesZero) {
579585
if (canGenerateNonZeros(MI))
580586
return false;
587+
588+
bool AllowScalars = isHorizontalReduction(MI);
581589
for (auto &MO : MI.operands()) {
582590
if (!MO.isReg() || !MO.getReg())
583591
continue;
592+
if (!isRegInClass(MO, QPRs) && AllowScalars)
593+
continue;
584594
if (auto *OpDef = RDA.getMIOperand(&MI, MO))
585-
if (FalseLaneZeros.count(OpDef))
595+
if (FalseLanesZero.count(OpDef))
586596
continue;
587597
return false;
588598
}
@@ -613,8 +623,8 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
613623
// loads, stores and other predicated instructions into our Predicated
614624
// set and build from there.
615625
const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID);
616-
SetVector<MachineInstr *> Unknown;
617-
SmallPtrSet<MachineInstr *, 4> FalseLaneZeros;
626+
SetVector<MachineInstr *> FalseLanesUnknown;
627+
SmallPtrSet<MachineInstr *, 4> FalseLanesZero;
618628
SmallPtrSet<MachineInstr *, 4> Predicated;
619629
MachineBasicBlock *MBB = ML.getHeader();
620630

@@ -624,22 +634,31 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
624634
if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
625635
continue;
626636

637+
if (isVCTP(&MI) || MI.getOpcode() == ARM::MVE_VPST)
638+
continue;
639+
640+
// Predicated loads will write zeros to the falsely predicated bytes of the
641+
// destination register.
627642
if (isVectorPredicated(&MI)) {
628643
if (MI.mayLoad())
629-
FalseLaneZeros.insert(&MI);
644+
FalseLanesZero.insert(&MI);
630645
Predicated.insert(&MI);
631646
continue;
632647
}
633648

634649
if (MI.getNumDefs() == 0)
635650
continue;
636651

637-
if (producesFalseLaneZeros(MI, QPRs, RDA, FalseLaneZeros))
638-
FalseLaneZeros.insert(&MI);
639-
else if (retainsPreviousHalfElement(MI))
640-
return false;
641-
else
642-
Unknown.insert(&MI);
652+
if (!producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero)) {
653+
// We require retaining and horizontal operations to operate upon zero'd
654+
// false lanes to ensure the conversion doesn't change the output.
655+
if (retainsPreviousHalfElement(MI) || isHorizontalReduction(MI))
656+
return false;
657+
// Otherwise we need to evaluate this instruction later to see whether
658+
// unknown false lanes will get masked away by their user(s).
659+
FalseLanesUnknown.insert(&MI);
660+
} else if (!isHorizontalReduction(MI))
661+
FalseLanesZero.insert(&MI);
643662
}
644663

645664
auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
@@ -655,8 +674,9 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
655674

656675
// Visit the unknowns in reverse so that we can start at the values being
657676
// stored and then we can work towards the leaves, hopefully adding more
658-
// instructions to Predicated.
659-
for (auto *MI : reverse(Unknown)) {
677+
// instructions to Predicated. Successfully terminating the loop means that
678+
// all the unknown values have to found to be masked by predicated user(s).
679+
for (auto *MI : reverse(FalseLanesUnknown)) {
660680
for (auto &MO : MI->operands()) {
661681
if (!isRegInClass(MO, QPRs) || !MO.isDef())
662682
continue;

llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir

Lines changed: 28 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -853,25 +853,14 @@ body: |
853853
; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
854854
; CHECK: t2IT 0, 8, implicit-def $itstate
855855
; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
856-
; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
857-
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
858-
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
859-
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
860-
; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
861-
; CHECK: dead $lr = t2DLS renamable $r12
862-
; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
856+
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2
863857
; CHECK: bb.1.vector.body:
864858
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
865-
; CHECK: liveins: $r0, $r1, $r2, $r3
866-
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
867-
; CHECK: MVE_VPST 8, implicit $vpr
868-
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2)
859+
; CHECK: liveins: $lr, $r0, $r1
860+
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv17, align 2)
869861
; CHECK: renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg
870-
; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg
871862
; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr)
872-
; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
873-
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
874-
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
863+
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
875864
; CHECK: bb.2.exit:
876865
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
877866
bb.0.entry:
@@ -955,25 +944,14 @@ body: |
955944
; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
956945
; CHECK: t2IT 0, 8, implicit-def $itstate
957946
; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
958-
; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
959-
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
960-
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
961-
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
962-
; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
963-
; CHECK: dead $lr = t2DLS renamable $r12
964-
; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
947+
; CHECK: $lr = MVE_DLSTP_16 killed renamable $r2
965948
; CHECK: bb.1.vector.body:
966949
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
967-
; CHECK: liveins: $r0, $r1, $r2, $r3
968-
; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg
969-
; CHECK: MVE_VPST 8, implicit $vpr
970-
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2)
950+
; CHECK: liveins: $lr, $r0, $r1
951+
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.lsr.iv17, align 2)
971952
; CHECK: renamable $r12 = MVE_VADDVs16no_acc killed renamable $q0, 0, $noreg
972-
; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg
973953
; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr)
974-
; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
975-
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg
976-
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
954+
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
977955
; CHECK: bb.2.exit:
978956
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
979957
bb.0.entry:
@@ -1057,25 +1035,14 @@ body: |
10571035
; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
10581036
; CHECK: t2IT 0, 8, implicit-def $itstate
10591037
; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
1060-
; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg
1061-
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg
1062-
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg
1063-
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
1064-
; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
1065-
; CHECK: dead $lr = t2DLS renamable $r12
1066-
; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
1038+
; CHECK: $lr = MVE_DLSTP_8 killed renamable $r2
10671039
; CHECK: bb.1.vector.body:
10681040
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
1069-
; CHECK: liveins: $r0, $r1, $r2, $r3
1070-
; CHECK: renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg
1071-
; CHECK: MVE_VPST 8, implicit $vpr
1072-
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1)
1041+
; CHECK: liveins: $lr, $r0, $r1
1042+
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.lsr.iv17, align 1)
10731043
; CHECK: renamable $r12 = MVE_VADDVs8no_acc killed renamable $q0, 0, $noreg
1074-
; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg
10751044
; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr)
1076-
; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
1077-
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg
1078-
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
1045+
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
10791046
; CHECK: bb.2.exit:
10801047
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
10811048
bb.0.entry:
@@ -1159,25 +1126,14 @@ body: |
11591126
; CHECK: bb.1.vector.ph:
11601127
; CHECK: successors: %bb.2(0x80000000)
11611128
; CHECK: liveins: $r0, $r1
1162-
; CHECK: renamable $r2, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg
1163-
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
1164-
; CHECK: renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
1165-
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
1166-
; CHECK: renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
1167-
; CHECK: dead $lr = t2DLS renamable $r2
1168-
; CHECK: $r3 = tMOVr killed $r2, 14 /* CC::al */, $noreg
1129+
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1
11691130
; CHECK: renamable $r2, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
11701131
; CHECK: bb.2.vector.body:
11711132
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
1172-
; CHECK: liveins: $r0, $r1, $r2, $r3
1173-
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg
1174-
; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg
1175-
; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
1176-
; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
1177-
; CHECK: MVE_VPST 8, implicit $vpr
1178-
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2)
1133+
; CHECK: liveins: $lr, $r0, $r2
1134+
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv17, align 2)
11791135
; CHECK: renamable $r2 = MVE_VADDVu32acc killed renamable $r2, killed renamable $q0, 0, $noreg
1180-
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
1136+
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
11811137
; CHECK: bb.3.exit:
11821138
; CHECK: liveins: $r2
11831139
; CHECK: $r0 = tMOVr killed $r2, 14 /* CC::al */, $noreg
@@ -2705,32 +2661,21 @@ body: |
27052661
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
27062662
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
27072663
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
2708-
; CHECK: tCMPi8 renamable $r2, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
2709-
; CHECK: $r3 = tMOVr $r2, 14 /* CC::al */, $noreg
2710-
; CHECK: t2IT 10, 8, implicit-def $itstate
2711-
; CHECK: renamable $r3 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
27122664
; CHECK: tCMPi8 renamable $r2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
27132665
; CHECK: tBcc %bb.4, 11 /* CC::lt */, killed $cpsr
27142666
; CHECK: bb.1.while.body.preheader:
27152667
; CHECK: successors: %bb.2(0x80000000)
2716-
; CHECK: liveins: $r0, $r1, $r2, $r3
2717-
; CHECK: renamable $r3, dead $cpsr = tSUBrr renamable $r2, killed renamable $r3, 14 /* CC::al */, $noreg
2718-
; CHECK: renamable $r12 = t2ADDri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
2719-
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
2720-
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
2668+
; CHECK: liveins: $r0, $r1, $r2
27212669
; CHECK: renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
2722-
; CHECK: $lr = t2DLS killed renamable $lr
2670+
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2
27232671
; CHECK: bb.2.while.body:
27242672
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
2725-
; CHECK: liveins: $lr, $r0, $r1, $r2, $r12
2726-
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
2727-
; CHECK: MVE_VPST 4, implicit $vpr
2728-
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.tmp3, align 2)
2729-
; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.tmp1, align 2)
2673+
; CHECK: liveins: $lr, $r0, $r1, $r12
2674+
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 0, $noreg :: (load 8 from %ir.tmp3, align 2)
2675+
; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.tmp1, align 2)
27302676
; CHECK: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
2731-
; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
27322677
; CHECK: renamable $r12 = MVE_VADDVu32acc killed renamable $r12, killed renamable $q0, 0, $noreg
2733-
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
2678+
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
27342679
; CHECK: bb.3.while.end:
27352680
; CHECK: liveins: $r12
27362681
; CHECK: $r0 = tMOVr killed $r12, 14 /* CC::al */, $noreg
@@ -2831,33 +2776,22 @@ body: |
28312776
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
28322777
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
28332778
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
2834-
; CHECK: tCMPi8 renamable $r2, 8, 14 /* CC::al */, $noreg, implicit-def $cpsr
2835-
; CHECK: $r3 = tMOVr $r2, 14 /* CC::al */, $noreg
2836-
; CHECK: t2IT 10, 8, implicit-def $itstate
2837-
; CHECK: renamable $r3 = tMOVi8 $noreg, 8, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
28382779
; CHECK: tCMPi8 renamable $r2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
28392780
; CHECK: tBcc %bb.4, 11 /* CC::lt */, killed $cpsr
28402781
; CHECK: bb.1.while.body.preheader:
28412782
; CHECK: successors: %bb.2(0x80000000)
2842-
; CHECK: liveins: $r0, $r1, $r2, $r3
2843-
; CHECK: renamable $r3, dead $cpsr = tSUBrr renamable $r2, killed renamable $r3, 14 /* CC::al */, $noreg
2844-
; CHECK: renamable $r12 = t2ADDri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg
2845-
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
2846-
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
2783+
; CHECK: liveins: $r0, $r1, $r2
28472784
; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
2848-
; CHECK: $lr = t2DLS killed renamable $lr
2785+
; CHECK: $lr = MVE_DLSTP_16 killed renamable $r2
28492786
; CHECK: bb.2.while.body:
28502787
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
2851-
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3
2852-
; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg
2853-
; CHECK: MVE_VPST 4, implicit $vpr
2854-
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.tmp3, align 2)
2855-
; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.tmp1, align 2)
2788+
; CHECK: liveins: $lr, $r0, $r1, $r3
2789+
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.tmp3, align 2)
2790+
; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.tmp1, align 2)
28562791
; CHECK: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
2857-
; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg
28582792
; CHECK: renamable $r12 = MVE_VADDVu16no_acc killed renamable $q0, 0, $noreg
28592793
; CHECK: renamable $r3 = t2UXTAH killed renamable $r3, killed renamable $r12, 0, 14 /* CC::al */, $noreg
2860-
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
2794+
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
28612795
; CHECK: bb.3.while.end:
28622796
; CHECK: liveins: $r3
28632797
; CHECK: $r0 = tMOVr killed $r3, 14 /* CC::al */, $noreg

llvm/unittests/Target/ARM/MachineInstrTest.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,18 @@ TEST(MachineInstrValidTailPredication, IsCorrect) {
407407
case MVE_VADD_qr_i16:
408408
case MVE_VADD_qr_i32:
409409
case MVE_VADD_qr_i8:
410+
case MVE_VADDVs16acc:
411+
case MVE_VADDVs16no_acc:
412+
case MVE_VADDVs32acc:
413+
case MVE_VADDVs32no_acc:
414+
case MVE_VADDVs8acc:
415+
case MVE_VADDVs8no_acc:
416+
case MVE_VADDVu16acc:
417+
case MVE_VADDVu16no_acc:
418+
case MVE_VADDVu32acc:
419+
case MVE_VADDVu32no_acc:
420+
case MVE_VADDVu8acc:
421+
case MVE_VADDVu8no_acc:
410422
case MVE_VADDf16:
411423
case MVE_VADDf32:
412424
case MVE_VADDi16:

0 commit comments

Comments
 (0)