Skip to content

Commit bae8f13

Browse files
Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (#134408)
This tries to reland #123632 (previously reverted by commit 6b1db79) This PR aims to fix coalescing of SUBREG_TO_REG when sub-register liveness tracking is enabled and this is now the so-manieth reincarnation of this effort :) This change is needed in order to enable subreg liveness tracking for AArch64, because without the implicit-def, Machine Copy Propagation would remove a 'redundant' copy because it doesn't realise that the top 32-bits of the register are zeroed, which subsequent instructions rely on. Changes compared to previous PR: * Rather than updating all instructions that define the source register (SrcReg) of the SUBREG_TO_REG, this new approach only updates instructions that define SrcReg when they dominate the SUBREG_TO_REG. The live-ranges are updated accordingly.
1 parent 17c1921 commit bae8f13

24 files changed

+1408
-62
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 164 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
306306
/// number if it is not zero. If DstReg is a physical register and the
307307
/// existing subregister number of the def / use being updated is not zero,
308308
/// make sure to set it to the correct physical subregister.
309-
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
309+
///
310+
/// If \p SubregToRegSrcInst is not empty, we are coalescing a
311+
/// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
312+
/// implicit-def of DstReg on instructions that define SrcReg.
313+
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
314+
ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
310315

311316
/// If the given machine operand reads only undefined lanes add an undef
312317
/// flag.
@@ -1443,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14431448

14441449
// CopyMI may have implicit operands, save them so that we can transfer them
14451450
// over to the newly materialized instruction after CopyMI is removed.
1451+
LaneBitmask NewMIImplicitOpsMask;
14461452
SmallVector<MachineOperand, 4> ImplicitOps;
14471453
ImplicitOps.reserve(CopyMI->getNumOperands() -
14481454
CopyMI->getDesc().getNumOperands());
@@ -1457,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14571463
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
14581464
"unexpected implicit virtual register def");
14591465
ImplicitOps.push_back(MO);
1466+
if (MO.isDef() && MO.getReg().isVirtual() &&
1467+
MRI->shouldTrackSubRegLiveness(DstReg))
1468+
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
14601469
}
14611470
}
14621471

@@ -1499,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14991508
} else {
15001509
assert(MO.getReg() == NewMI.getOperand(0).getReg());
15011510

1502-
// We're only expecting another def of the main output, so the range
1503-
// should get updated with the regular output range.
1504-
//
1505-
// FIXME: The range updating below probably needs updating to look at
1506-
// the super register if subranges are tracked.
1507-
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
1508-
"subrange update for implicit-def of super register may not be "
1509-
"properly handled");
1511+
// If lanemasks need to be tracked, compile the lanemask of the NewMI
1512+
// implicit def operands to avoid subranges for the super-regs from
1513+
// being removed by code later on in this function.
1514+
if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
1515+
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
15101516
}
15111517
}
15121518
}
@@ -1606,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
16061612
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
16071613
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
16081614
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1609-
if ((SR.LaneMask & DstMask).none()) {
1615+
if ((SR.LaneMask & DstMask).none() &&
1616+
(SR.LaneMask & NewMIImplicitOpsMask).none()) {
16101617
LLVM_DEBUG(dbgs()
16111618
<< "Removing undefined SubRange "
16121619
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1870,11 +1877,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
18701877
}
18711878
}
18721879

1873-
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1874-
unsigned SubIdx) {
1880+
void RegisterCoalescer::updateRegDefsUses(
1881+
Register SrcReg, Register DstReg, unsigned SubIdx,
1882+
ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
18751883
bool DstIsPhys = DstReg.isPhysical();
18761884
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
18771885

1886+
// Coalescing a COPY may expose reads of 'undef' subregisters.
1887+
// If so, then explicitly propagate 'undef' to those operands.
18781888
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
18791889
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
18801890
if (MO.isUndef())
@@ -1891,6 +1901,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
18911901
}
18921902
}
18931903

1904+
// If DstInt already has a subrange for the unused lanes, then we shouldn't
1905+
// create duplicate subranges when we update the interval for unused lanes.
1906+
LaneBitmask DstIntLaneMask;
1907+
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
1908+
for (LiveInterval::SubRange &SR : DstInt->subranges())
1909+
DstIntLaneMask |= SR.LaneMask;
1910+
}
1911+
1912+
// Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
18941913
SmallPtrSet<MachineInstr *, 8> Visited;
18951914
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
18961915
E = MRI->reg_instr_end();
@@ -1914,6 +1933,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19141933
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
19151934
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
19161935

1936+
bool RequiresImplicitRedef = false;
1937+
if (!SubregToRegSrcInsts.empty()) {
1938+
// We can only add an implicit-def and undef if the sub registers match,
1939+
// e.g.
1940+
// %0:gr32 = INSTX
1941+
// %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
1942+
// %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
1943+
//
1944+
// This cannot be transformed into:
1945+
// %1.sub32:gr64 = INSTX
1946+
// undef %1.sub8:gr64 = INSTY , implicit-def %1
1947+
//
1948+
// Because that would thrash the top 24 bits of %1.sub32.
1949+
if (is_contained(SubregToRegSrcInsts, UseMI) &&
1950+
all_of(UseMI->defs(),
1951+
[&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
1952+
if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef())
1953+
return true;
1954+
return SubIdx == MO.getSubReg();
1955+
})) {
1956+
// Add implicit-def of super-register to express that the whole
1957+
// register is defined by the instruction.
1958+
MachineInstrBuilder MIB(*MF, UseMI);
1959+
MIB.addReg(DstReg, RegState::ImplicitDefine);
1960+
RequiresImplicitRedef = true;
1961+
}
1962+
1963+
// If the coalesed instruction doesn't fully define the register, we need
1964+
// to preserve the original super register liveness for SUBREG_TO_REG.
1965+
//
1966+
// We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
1967+
// but it introduces liveness for other subregisters. Downstream users may
1968+
// have been relying on those bits, so we need to ensure their liveness is
1969+
// captured with a def of other lanes.
1970+
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
1971+
// First check if there is sufficient granularity in terms of subranges.
1972+
LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1973+
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1974+
LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
1975+
if ((UnusedLanes & ~DstIntLaneMask).any()) {
1976+
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
1977+
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
1978+
DstIntLaneMask |= UnusedLanes;
1979+
}
1980+
1981+
// After duplicating the live ranges for the low/hi bits, we
1982+
// need to update the subranges of the DstReg interval such that
1983+
// for a case like this:
1984+
//
1985+
// entry:
1986+
// 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
1987+
// :
1988+
// if.then:
1989+
// 32B %1:gpr32 = MOVIMM32 ..
1990+
// 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
1991+
//
1992+
// Only the MOVIMM32 require a def of the top lanes and any intervals
1993+
// for the top 32-bits of the def at 16B should be removed.
1994+
for (LiveInterval::SubRange &SR : DstInt->subranges()) {
1995+
if (!Writes || RequiresImplicitRedef ||
1996+
(SR.LaneMask & UnusedLanes).none())
1997+
continue;
1998+
1999+
assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
2000+
"Unexpected lanemask. Subrange needs finer granularity");
2001+
2002+
SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false);
2003+
auto SegmentI = SR.find(UseIdx);
2004+
if (SegmentI != SR.end())
2005+
SR.removeSegment(SegmentI, true);
2006+
}
2007+
}
2008+
}
2009+
19172010
// Replace SrcReg with DstReg in all UseMI operands.
19182011
for (unsigned Op : Ops) {
19192012
MachineOperand &MO = UseMI->getOperand(Op);
@@ -1922,7 +2015,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19222015
// turn a full def into a read-modify-write sub-register def and vice
19232016
// versa.
19242017
if (SubIdx && MO.isDef())
1925-
MO.setIsUndef(!Reads);
2018+
MO.setIsUndef(!Reads || RequiresImplicitRedef);
19262019

19272020
// A subreg use of a partially undef (super) register may be a complete
19282021
// undef use now and then has to be marked that way.
@@ -2025,6 +2118,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
20252118
LIS->shrinkToUses(&LI);
20262119
}
20272120

2121+
/// For a given use of value \p Idx, it returns the def in the current block,
2122+
/// or otherwise all possible defs in preceding blocks.
2123+
static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
2124+
SmallVector<MachineInstr *> &Instrs,
2125+
LiveIntervals *LIS, LiveInterval &SrcInt,
2126+
MachineBasicBlock *MBB, VNInfo *Idx) {
2127+
if (!Idx->isPHIDef()) {
2128+
MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
2129+
assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
2130+
Instrs.push_back(Def);
2131+
return true;
2132+
}
2133+
2134+
bool Any = false;
2135+
if (VisitedBlocks.count(MBB))
2136+
return false;
2137+
VisitedBlocks.insert(MBB);
2138+
for (MachineBasicBlock *Pred : MBB->predecessors()) {
2139+
Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred,
2140+
SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred)));
2141+
}
2142+
return Any;
2143+
}
2144+
20282145
bool RegisterCoalescer::joinCopy(
20292146
MachineInstr *CopyMI, bool &Again,
20302147
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2156,6 +2273,35 @@ bool RegisterCoalescer::joinCopy(
21562273
});
21572274
}
21582275

2276+
SmallVector<MachineInstr *> SubregToRegSrcInsts;
2277+
if (CopyMI->isSubregToReg()) {
2278+
// For the case where the copy instruction is a SUBREG_TO_REG, e.g.
2279+
//
2280+
// %0:gpr32 = movimm32 ..
2281+
// %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
2282+
// ...
2283+
// %0:gpr32 = COPY <something>
2284+
//
2285+
// After joining liveranges, the original `movimm32` will need an
2286+
// implicit-def to make it explicit that the entire register is written,
2287+
// i.e.
2288+
//
2289+
// undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
2290+
// ...
2291+
// undef %0.sub32:gpr64 = COPY <something> // Note that this does not
2292+
// // require an implicit-def,
2293+
// // because it has nothing to
2294+
// // do with the SUBREG_TO_REG.
2295+
LiveInterval &SrcInt =
2296+
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
2297+
SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
2298+
SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
2299+
if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
2300+
CopyMI->getParent(),
2301+
SrcInt.Query(SubregToRegSlotIdx).valueIn()))
2302+
llvm_unreachable("SUBREG_TO_REG src requires a def");
2303+
}
2304+
21592305
ShrinkMask = LaneBitmask::getNone();
21602306
ShrinkMainRange = false;
21612307

@@ -2225,9 +2371,12 @@ bool RegisterCoalescer::joinCopy(
22252371

22262372
// Rewrite all SrcReg operands to DstReg.
22272373
// Also update DstReg operands to include DstIdx if it is set.
2228-
if (CP.getDstIdx())
2374+
if (CP.getDstIdx()) {
2375+
assert(SubregToRegSrcInsts.empty() && "can this happen?");
22292376
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2230-
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2377+
}
2378+
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
2379+
SubregToRegSrcInsts);
22312380

22322381
// Shrink subregister ranges if necessary.
22332382
if (ShrinkMask.any()) {

0 commit comments

Comments
 (0)