@@ -61,6 +61,8 @@ using namespace llvm;
61
61
62
62
namespace {
63
63
64
+ using InstSet = SmallPtrSetImpl<MachineInstr *>;
65
+
64
66
class PostOrderLoopTraversal {
65
67
MachineLoop &ML;
66
68
MachineLoopInfo &MLI;
@@ -518,6 +520,59 @@ static bool isRegInClass(const MachineOperand &MO,
518
520
return MO.isReg () && MO.getReg () && Class->contains (MO.getReg ());
519
521
}
520
522
523
+ // Can this instruction generate a non-zero result when given only zeroed
524
+ // operands? This allows us to know that, given operands with false bytes
525
+ // zeroed by masked loads, that the result will also contain zeros in those
526
+ // bytes.
527
+ static bool canGenerateNonZeros (const MachineInstr &MI) {
528
+ switch (MI.getOpcode ()) {
529
+ default :
530
+ break ;
531
+ // FIXME: FP minus 0?
532
+ // case ARM::MVE_VNEGf16:
533
+ // case ARM::MVE_VNEGf32:
534
+ case ARM::MVE_VMVN:
535
+ case ARM::MVE_VORN:
536
+ case ARM::MVE_VCLZs8:
537
+ case ARM::MVE_VCLZs16:
538
+ case ARM::MVE_VCLZs32:
539
+ return true ;
540
+ }
541
+ return false ;
542
+ }
543
+
544
+ // MVE 'narrowing' operate on half a lane, reading from half and writing
545
+ // to half, which are referred to has the top and bottom half. The other
546
+ // half retains its previous value.
547
+ static bool retainsPreviousHalfElement (const MachineInstr &MI) {
548
+ const MCInstrDesc &MCID = MI.getDesc ();
549
+ uint64_t Flags = MCID.TSFlags ;
550
+ return (Flags & ARMII::RetainsPreviousHalfElement) != 0 ;
551
+ }
552
+
553
+ // Look at its register uses to see if it only can only receive zeros
554
+ // into its false lanes which would then produce zeros. Also check that
555
+ // the output register is also defined by an FalseLaneZeros instruction
556
+ // so that if tail-predication happens, the lanes that aren't updated will
557
+ // still be zeros.
558
+ static bool producesFalseLaneZeros (MachineInstr &MI,
559
+ const TargetRegisterClass *QPRs,
560
+ const ReachingDefAnalysis &RDA,
561
+ InstSet &FalseLaneZeros) {
562
+ if (canGenerateNonZeros (MI))
563
+ return false ;
564
+ for (auto &MO : MI.operands ()) {
565
+ if (!MO.isReg () || !MO.getReg ())
566
+ continue ;
567
+ if (auto *OpDef = RDA.getMIOperand (&MI, MO))
568
+ if (FalseLaneZeros.count (OpDef))
569
+ continue ;
570
+ return false ;
571
+ }
572
+ LLVM_DEBUG (dbgs () << " ARM Loops: Always False Zeros: " << MI);
573
+ return true ;
574
+ }
575
+
521
576
bool LowOverheadLoop::ValidateLiveOuts () const {
522
577
// We want to find out if the tail-predicated version of this loop will
523
578
// produce the same values as the loop in its original form. For this to
@@ -538,76 +593,64 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
538
593
// operands, or stored results are equivalent already. Other explicitly
539
594
// predicated instructions will perform the same operation in the original
540
595
// loop and the tail-predicated form too. Because of this, we can insert
541
- // loads, stores and other predicated instructions into our KnownFalseZeros
596
+ // loads, stores and other predicated instructions into our Predicated
542
597
// set and build from there.
543
598
const TargetRegisterClass *QPRs = TRI.getRegClass (ARM::MQPRRegClassID);
544
- SetVector<MachineInstr *> UnknownFalseLanes;
545
- SmallPtrSet<MachineInstr *, 4 > KnownFalseZeros;
599
+ SetVector<MachineInstr *> Unknown;
600
+ SmallPtrSet<MachineInstr *, 4 > FalseLaneZeros;
601
+ SmallPtrSet<MachineInstr *, 4 > Predicated;
546
602
MachineBasicBlock *MBB = ML.getHeader ();
603
+
547
604
for (auto &MI : *MBB) {
548
605
const MCInstrDesc &MCID = MI.getDesc ();
549
606
uint64_t Flags = MCID.TSFlags ;
550
607
if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
551
608
continue ;
552
609
553
610
if (isVectorPredicated (&MI)) {
554
- KnownFalseZeros.insert (&MI);
611
+ if (MI.mayLoad ())
612
+ FalseLaneZeros.insert (&MI);
613
+ Predicated.insert (&MI);
555
614
continue ;
556
615
}
557
616
558
617
if (MI.getNumDefs () == 0 )
559
618
continue ;
560
619
561
- // Only evaluate instructions which produce a single value.
562
- assert ((MI.getNumDefs () == 1 && MI.defs ().begin ()->isReg ()) &&
563
- " Expected no more than one register def" );
564
-
565
- Register DefReg = MI.defs ().begin ()->getReg ();
566
- for (auto &MO : MI.operands ()) {
567
- if (!isRegInClass (MO, QPRs) || !MO.isUse () || MO.getReg () != DefReg)
568
- continue ;
569
-
570
- // If this instruction overwrites one of its operands, and that register
571
- // has known lanes, then this instruction also has known predicated false
572
- // lanes.
573
- if (auto *OpDef = RDA.getMIOperand (&MI, MO)) {
574
- if (KnownFalseZeros.count (OpDef)) {
575
- KnownFalseZeros.insert (&MI);
576
- break ;
577
- }
578
- }
579
- }
580
- if (!KnownFalseZeros.count (&MI))
581
- UnknownFalseLanes.insert (&MI);
620
+ if (producesFalseLaneZeros (MI, QPRs, RDA, FalseLaneZeros))
621
+ FalseLaneZeros.insert (&MI);
622
+ else if (retainsPreviousHalfElement (MI))
623
+ return false ;
624
+ else
625
+ Unknown.insert (&MI);
582
626
}
583
627
584
- auto HasKnownUsers = [this ](MachineInstr *MI, const MachineOperand &MO,
585
- SmallPtrSetImpl<MachineInstr *> &Knowns ) {
628
+ auto HasPredicatedUsers = [this ](MachineInstr *MI, const MachineOperand &MO,
629
+ SmallPtrSetImpl<MachineInstr *> &Predicated ) {
586
630
SmallPtrSet<MachineInstr *, 2 > Uses;
587
631
RDA.getGlobalUses (MI, MO.getReg (), Uses);
588
632
for (auto *Use : Uses) {
589
- if (Use != MI && !Knowns .count (Use))
633
+ if (Use != MI && !Predicated .count (Use))
590
634
return false ;
591
635
}
592
636
return true ;
593
637
};
594
638
595
- // Now for all the unknown values, see if they're only consumed by known
596
- // instructions. Visit in reverse so that we can start at the values being
639
+ // Visit the unknowns in reverse so that we can start at the values being
597
640
// stored and then we can work towards the leaves, hopefully adding more
598
- // instructions to KnownFalseZeros .
599
- for (auto *MI : reverse (UnknownFalseLanes )) {
641
+ // instructions to Predicated .
642
+ for (auto *MI : reverse (Unknown )) {
600
643
for (auto &MO : MI->operands ()) {
601
644
if (!isRegInClass (MO, QPRs) || !MO.isDef ())
602
645
continue ;
603
- if (!HasKnownUsers (MI, MO, KnownFalseZeros )) {
646
+ if (!HasPredicatedUsers (MI, MO, Predicated )) {
604
647
LLVM_DEBUG (dbgs () << " ARM Loops: Found an unknown def of : "
605
648
<< TRI.getRegAsmName (MO.getReg ()) << " at " << *MI);
606
649
return false ;
607
650
}
608
651
}
609
652
// Any unknown false lanes have been masked away by the user(s).
610
- KnownFalseZeros .insert (MI);
653
+ Predicated .insert (MI);
611
654
}
612
655
613
656
// Collect Q-regs that are live in the exit blocks. We don't collect scalars
0 commit comments