Skip to content

Commit 12a3243

Browse files
committed
[AMDGPU] Limit endcf-collapase to simple if
We can only collapse adjacent SI_END_CF if outer statement belongs to a simple SI_IF, otherwise correct mask is not in the register we expect, but is an argument of an S_XOR instruction. Even if SI_IF is simple it might be lowered using S_XOR because lowering is dependent on a basic block layout. It is not considered simple if instruction consuming its output is not an SI_END_CF. Since that SI_END_CF might have already been lowered to an S_OR isSimpleIf() check may return false. This situation is an opportunity for a further optimization of SI_IF lowering, but that is a separate optimization. In the meanwhile move SI_END_CF post the lowering when we already know how the rest of the CFG was lowered since a non-simple SI_IF case still needs to be handled. Differential Revision: https://reviews.llvm.org/D77610
1 parent 03c825c commit 12a3243

File tree

2 files changed

+95
-24
lines changed

2 files changed

+95
-24
lines changed

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "AMDGPUSubtarget.h"
5252
#include "SIInstrInfo.h"
5353
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
54+
#include "llvm/ADT/SetVector.h"
5455
#include "llvm/ADT/SmallSet.h"
5556
#include "llvm/ADT/SmallVector.h"
5657
#include "llvm/ADT/StringRef.h"
@@ -86,7 +87,7 @@ class SILowerControlFlow : public MachineFunctionPass {
8687
const SIInstrInfo *TII = nullptr;
8788
LiveIntervals *LIS = nullptr;
8889
MachineRegisterInfo *MRI = nullptr;
89-
DenseSet<const MachineInstr*> LoweredEndCf;
90+
SetVector<MachineInstr*> LoweredEndCf;
9091
DenseSet<Register> LoweredIf;
9192

9293
const TargetRegisterClass *BoolRC = nullptr;
@@ -117,6 +118,9 @@ class SILowerControlFlow : public MachineFunctionPass {
117118
skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB,
118119
MachineBasicBlock::iterator It) const;
119120

121+
// Remove redundant SI_END_CF instructions.
122+
void optimizeEndCf();
123+
120124
public:
121125
static char ID;
122126

@@ -448,29 +452,6 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
448452
MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
449453
const DebugLoc &DL = MI.getDebugLoc();
450454

451-
// If the only instruction immediately following this END_CF is an another
452-
// END_CF in the only successor we can avoid emitting exec mask restore here.
453-
if (RemoveRedundantEndcf) {
454-
auto Next =
455-
skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI.getIterator()));
456-
if (Next != MBB.end() && (Next->getOpcode() == AMDGPU::SI_END_CF ||
457-
LoweredEndCf.count(&*Next))) {
458-
// Only skip inner END_CF if outer ENDCF belongs to SI_IF.
459-
// If that belongs to SI_ELSE then saved mask has an inverted value.
460-
Register SavedExec = Next->getOperand(0).getReg();
461-
const MachineInstr *Def = MRI.getUniqueVRegDef(SavedExec);
462-
// A lowered SI_IF turns definition into COPY of exec.
463-
if (Def && (Def->getOpcode() == AMDGPU::SI_IF ||
464-
LoweredIf.count(SavedExec))) {
465-
LLVM_DEBUG(dbgs() << "Skip redundant "; MI.dump());
466-
if (LIS)
467-
LIS->RemoveMachineInstrFromMaps(MI);
468-
MI.eraseFromParent();
469-
return;
470-
}
471-
}
472-
}
473-
474455
MachineBasicBlock::iterator InsPt =
475456
Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
476457
: MBB.begin();
@@ -544,6 +525,34 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
544525
MRI->getUniqueVRegDef(Reg)->eraseFromParent();
545526
}
546527

528+
void SILowerControlFlow::optimizeEndCf() {
529+
// If the only instruction immediately following this END_CF is an another
530+
// END_CF in the only successor we can avoid emitting exec mask restore here.
531+
if (!RemoveRedundantEndcf)
532+
return;
533+
534+
for (MachineInstr *MI : LoweredEndCf) {
535+
MachineBasicBlock &MBB = *MI->getParent();
536+
auto Next =
537+
skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator()));
538+
if (Next == MBB.end() || !LoweredEndCf.count(&*Next))
539+
continue;
540+
// Only skip inner END_CF if outer ENDCF belongs to SI_IF.
541+
// If that belongs to SI_ELSE then saved mask has an inverted value.
542+
Register SavedExec
543+
= TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
544+
assert(SavedExec.isVirtual() && "Expected saved exec to be src1!");
545+
546+
const MachineInstr *Def = MRI->getUniqueVRegDef(SavedExec);
547+
if (Def && LoweredIf.count(SavedExec)) {
548+
LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump());
549+
if (LIS)
550+
LIS->RemoveMachineInstrFromMaps(*MI);
551+
MI->eraseFromParent();
552+
}
553+
}
554+
}
555+
547556
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
548557
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
549558
TII = ST.getInstrInfo();
@@ -626,6 +635,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
626635
}
627636
}
628637

638+
optimizeEndCf();
639+
629640
LoweredEndCf.clear();
630641
LoweredIf.clear();
631642

llvm/test/CodeGen/AMDGPU/collapse-endcf.mir

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,3 +533,63 @@ body: |
533533
S_ENDPGM 0
534534
535535
...
536+
537+
---
538+
name: if_inside_loop
539+
tracksRegLiveness: true
540+
machineFunctionInfo:
541+
isEntryFunction: true
542+
body: |
543+
; GCN-LABEL: name: if_inside_loop
544+
; GCN: bb.0:
545+
; GCN: successors: %bb.6(0x80000000)
546+
; GCN: S_BRANCH %bb.6
547+
; GCN: bb.1:
548+
; GCN: successors: %bb.3(0x40000000), %bb.2(0x40000000)
549+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
550+
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc
551+
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
552+
; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec
553+
; GCN: bb.2:
554+
; GCN: successors: %bb.6(0x80000000)
555+
; GCN: S_BRANCH %bb.6
556+
; GCN: bb.3:
557+
; GCN: successors: %bb.4(0x80000000)
558+
; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
559+
; GCN: bb.4:
560+
; GCN: successors: %bb.5(0x80000000)
561+
; GCN: $exec = S_OR_B64 $exec, %2, implicit-def $scc
562+
; GCN: bb.5:
563+
; GCN: successors: %bb.6(0x80000000)
564+
; GCN: bb.6:
565+
; GCN: successors: %bb.4(0x40000000), %bb.0(0x40000000)
566+
; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
567+
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc
568+
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_1]], [[COPY1]], implicit-def dead $scc
569+
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]]
570+
; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
571+
; GCN: S_BRANCH %bb.0
572+
; GCN: S_ENDPGM 0
573+
bb.0:
574+
S_BRANCH %bb.6
575+
576+
bb.1:
577+
%0:sreg_64 = SI_IF undef %1:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
578+
579+
bb.2:
580+
S_BRANCH %bb.6
581+
582+
bb.3:
583+
SI_END_CF %0:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
584+
585+
bb.4:
586+
SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
587+
588+
bb.5:
589+
590+
bb.6:
591+
%2:sreg_64 = SI_IF undef %3:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
592+
S_BRANCH %bb.0
593+
S_ENDPGM 0
594+
595+
...

0 commit comments

Comments
 (0)