|
51 | 51 | #include "AMDGPUSubtarget.h"
|
52 | 52 | #include "SIInstrInfo.h"
|
53 | 53 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
| 54 | +#include "llvm/ADT/SetVector.h" |
54 | 55 | #include "llvm/ADT/SmallSet.h"
|
55 | 56 | #include "llvm/ADT/SmallVector.h"
|
56 | 57 | #include "llvm/ADT/StringRef.h"
|
@@ -86,7 +87,7 @@ class SILowerControlFlow : public MachineFunctionPass {
|
86 | 87 | const SIInstrInfo *TII = nullptr;
|
87 | 88 | LiveIntervals *LIS = nullptr;
|
88 | 89 | MachineRegisterInfo *MRI = nullptr;
|
89 |
| - DenseSet<const MachineInstr*> LoweredEndCf; |
| 90 | + SetVector<MachineInstr*> LoweredEndCf; |
90 | 91 | DenseSet<Register> LoweredIf;
|
91 | 92 |
|
92 | 93 | const TargetRegisterClass *BoolRC = nullptr;
|
@@ -117,6 +118,9 @@ class SILowerControlFlow : public MachineFunctionPass {
|
117 | 118 | skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB,
|
118 | 119 | MachineBasicBlock::iterator It) const;
|
119 | 120 |
|
| 121 | + // Remove redundant SI_END_CF instructions. |
| 122 | + void optimizeEndCf(); |
| 123 | + |
120 | 124 | public:
|
121 | 125 | static char ID;
|
122 | 126 |
|
@@ -448,29 +452,6 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
|
448 | 452 | MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
|
449 | 453 | const DebugLoc &DL = MI.getDebugLoc();
|
450 | 454 |
|
451 |
| - // If the only instruction immediately following this END_CF is an another |
452 |
| - // END_CF in the only successor we can avoid emitting exec mask restore here. |
453 |
| - if (RemoveRedundantEndcf) { |
454 |
| - auto Next = |
455 |
| - skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI.getIterator())); |
456 |
| - if (Next != MBB.end() && (Next->getOpcode() == AMDGPU::SI_END_CF || |
457 |
| - LoweredEndCf.count(&*Next))) { |
458 |
| - // Only skip inner END_CF if outer ENDCF belongs to SI_IF. |
459 |
| - // If that belongs to SI_ELSE then saved mask has an inverted value. |
460 |
| - Register SavedExec = Next->getOperand(0).getReg(); |
461 |
| - const MachineInstr *Def = MRI.getUniqueVRegDef(SavedExec); |
462 |
| - // A lowered SI_IF turns definition into COPY of exec. |
463 |
| - if (Def && (Def->getOpcode() == AMDGPU::SI_IF || |
464 |
| - LoweredIf.count(SavedExec))) { |
465 |
| - LLVM_DEBUG(dbgs() << "Skip redundant "; MI.dump()); |
466 |
| - if (LIS) |
467 |
| - LIS->RemoveMachineInstrFromMaps(MI); |
468 |
| - MI.eraseFromParent(); |
469 |
| - return; |
470 |
| - } |
471 |
| - } |
472 |
| - } |
473 |
| - |
474 | 455 | MachineBasicBlock::iterator InsPt =
|
475 | 456 | Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
|
476 | 457 | : MBB.begin();
|
@@ -544,6 +525,34 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
|
544 | 525 | MRI->getUniqueVRegDef(Reg)->eraseFromParent();
|
545 | 526 | }
|
546 | 527 |
|
| 528 | +void SILowerControlFlow::optimizeEndCf() { |
| 529 | + // If the only instruction immediately following this END_CF is an another |
| 530 | + // END_CF in the only successor we can avoid emitting exec mask restore here. |
| 531 | + if (!RemoveRedundantEndcf) |
| 532 | + return; |
| 533 | + |
| 534 | + for (MachineInstr *MI : LoweredEndCf) { |
| 535 | + MachineBasicBlock &MBB = *MI->getParent(); |
| 536 | + auto Next = |
| 537 | + skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator())); |
| 538 | + if (Next == MBB.end() || !LoweredEndCf.count(&*Next)) |
| 539 | + continue; |
| 540 | + // Only skip inner END_CF if outer ENDCF belongs to SI_IF. |
| 541 | + // If that belongs to SI_ELSE then saved mask has an inverted value. |
| 542 | + Register SavedExec |
| 543 | + = TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg(); |
| 544 | + assert(SavedExec.isVirtual() && "Expected saved exec to be src1!"); |
| 545 | + |
| 546 | + const MachineInstr *Def = MRI->getUniqueVRegDef(SavedExec); |
| 547 | + if (Def && LoweredIf.count(SavedExec)) { |
| 548 | + LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump()); |
| 549 | + if (LIS) |
| 550 | + LIS->RemoveMachineInstrFromMaps(*MI); |
| 551 | + MI->eraseFromParent(); |
| 552 | + } |
| 553 | + } |
| 554 | +} |
| 555 | + |
547 | 556 | bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
548 | 557 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
549 | 558 | TII = ST.getInstrInfo();
|
@@ -626,6 +635,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
626 | 635 | }
|
627 | 636 | }
|
628 | 637 |
|
| 638 | + optimizeEndCf(); |
| 639 | + |
629 | 640 | LoweredEndCf.clear();
|
630 | 641 | LoweredIf.clear();
|
631 | 642 |
|
|
0 commit comments