@@ -306,7 +306,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
306
306
// / number if it is not zero. If DstReg is a physical register and the
307
307
// / existing subregister number of the def / use being updated is not zero,
308
308
// / make sure to set it to the correct physical subregister.
309
- void updateRegDefsUses (Register SrcReg, Register DstReg, unsigned SubIdx);
309
+ // /
310
+ // / If \p SubregToRegSrcInst is not empty, we are coalescing a
311
+ // / `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
312
+ // / implicit-def of DstReg on instructions that define SrcReg.
313
+ void updateRegDefsUses (Register SrcReg, Register DstReg, unsigned SubIdx,
314
+ ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
310
315
311
316
// / If the given machine operand reads only undefined lanes add an undef
312
317
// / flag.
@@ -1443,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1443
1448
1444
1449
// CopyMI may have implicit operands, save them so that we can transfer them
1445
1450
// over to the newly materialized instruction after CopyMI is removed.
1451
+ LaneBitmask NewMIImplicitOpsMask;
1446
1452
SmallVector<MachineOperand, 4 > ImplicitOps;
1447
1453
ImplicitOps.reserve (CopyMI->getNumOperands () -
1448
1454
CopyMI->getDesc ().getNumOperands ());
@@ -1457,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1457
1463
(MO.getSubReg () == 0 && MO.getReg () == DstOperand.getReg ())) &&
1458
1464
" unexpected implicit virtual register def" );
1459
1465
ImplicitOps.push_back (MO);
1466
+ if (MO.isDef () && MO.getReg ().isVirtual () &&
1467
+ MRI->shouldTrackSubRegLiveness (DstReg))
1468
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg (MO.getReg ());
1460
1469
}
1461
1470
}
1462
1471
@@ -1499,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1499
1508
} else {
1500
1509
assert (MO.getReg () == NewMI.getOperand (0 ).getReg ());
1501
1510
1502
- // We're only expecting another def of the main output, so the range
1503
- // should get updated with the regular output range.
1504
- //
1505
- // FIXME: The range updating below probably needs updating to look at
1506
- // the super register if subranges are tracked.
1507
- assert (!MRI->shouldTrackSubRegLiveness (DstReg) &&
1508
- " subrange update for implicit-def of super register may not be "
1509
- " properly handled" );
1511
+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
1512
+ // implicit def operands to avoid subranges for the super-regs from
1513
+ // being removed by code later on in this function.
1514
+ if (MRI->shouldTrackSubRegLiveness (MO.getReg ()))
1515
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg (MO.getReg ());
1510
1516
}
1511
1517
}
1512
1518
}
@@ -1606,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1606
1612
CurrIdx.getRegSlot (NewMI.getOperand (0 ).isEarlyClobber ());
1607
1613
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator ();
1608
1614
for (LiveInterval::SubRange &SR : DstInt.subranges ()) {
1609
- if ((SR.LaneMask & DstMask).none ()) {
1615
+ if ((SR.LaneMask & DstMask).none () &&
1616
+ (SR.LaneMask & NewMIImplicitOpsMask).none ()) {
1610
1617
LLVM_DEBUG (dbgs ()
1611
1618
<< " Removing undefined SubRange "
1612
1619
<< PrintLaneMask (SR.LaneMask ) << " : " << SR << " \n " );
@@ -1870,11 +1877,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1870
1877
}
1871
1878
}
1872
1879
1873
- void RegisterCoalescer::updateRegDefsUses (Register SrcReg, Register DstReg,
1874
- unsigned SubIdx) {
1880
+ void RegisterCoalescer::updateRegDefsUses (
1881
+ Register SrcReg, Register DstReg, unsigned SubIdx,
1882
+ ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
1875
1883
bool DstIsPhys = DstReg.isPhysical ();
1876
1884
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval (DstReg);
1877
1885
1886
+ // Coalescing a COPY may expose reads of 'undef' subregisters.
1887
+ // If so, then explicitly propagate 'undef' to those operands.
1878
1888
if (DstInt && DstInt->hasSubRanges () && DstReg != SrcReg) {
1879
1889
for (MachineOperand &MO : MRI->reg_operands (DstReg)) {
1880
1890
if (MO.isUndef ())
@@ -1891,6 +1901,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1891
1901
}
1892
1902
}
1893
1903
1904
+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
1905
+ // create duplicate subranges when we update the interval for unused lanes.
1906
+ LaneBitmask DstIntLaneMask;
1907
+ if (DstInt && MRI->shouldTrackSubRegLiveness (DstReg)) {
1908
+ for (LiveInterval::SubRange &SR : DstInt->subranges ())
1909
+ DstIntLaneMask |= SR.LaneMask ;
1910
+ }
1911
+
1912
+ // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
1894
1913
SmallPtrSet<MachineInstr *, 8 > Visited;
1895
1914
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin (SrcReg),
1896
1915
E = MRI->reg_instr_end ();
@@ -1914,6 +1933,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1914
1933
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr ())
1915
1934
Reads = DstInt->liveAt (LIS->getInstructionIndex (*UseMI));
1916
1935
1936
+ bool RequiresImplicitRedef = false ;
1937
+ if (!SubregToRegSrcInsts.empty ()) {
1938
+ // We can only add an implicit-def and undef if the sub registers match,
1939
+ // e.g.
1940
+ // %0:gr32 = INSTX
1941
+ // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
1942
+ // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
1943
+ //
1944
+ // This cannot be transformed into:
1945
+ // %1.sub32:gr64 = INSTX
1946
+ // undef %1.sub8:gr64 = INSTY , implicit-def %1
1947
+ //
1948
+ // Because that would thrash the top 24 bits of %1.sub32.
1949
+ if (is_contained (SubregToRegSrcInsts, UseMI) &&
1950
+ all_of (UseMI->defs (),
1951
+ [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
1952
+ if (MO.getReg () != SrcReg || !MO.getSubReg () || MO.isUndef ())
1953
+ return true ;
1954
+ return SubIdx == MO.getSubReg ();
1955
+ })) {
1956
+ // Add implicit-def of super-register to express that the whole
1957
+ // register is defined by the instruction.
1958
+ MachineInstrBuilder MIB (*MF, UseMI);
1959
+ MIB.addReg (DstReg, RegState::ImplicitDefine);
1960
+ RequiresImplicitRedef = true ;
1961
+ }
1962
+
1963
+ // If the coalesed instruction doesn't fully define the register, we need
1964
+ // to preserve the original super register liveness for SUBREG_TO_REG.
1965
+ //
1966
+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
1967
+ // but it introduces liveness for other subregisters. Downstream users may
1968
+ // have been relying on those bits, so we need to ensure their liveness is
1969
+ // captured with a def of other lanes.
1970
+ if (DstInt && MRI->shouldTrackSubRegLiveness (DstReg)) {
1971
+ // First check if there is sufficient granularity in terms of subranges.
1972
+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg (DstInt->reg ());
1973
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask (SubIdx);
1974
+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
1975
+ if ((UnusedLanes & ~DstIntLaneMask).any ()) {
1976
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator ();
1977
+ DstInt->createSubRangeFrom (Allocator, UnusedLanes, *DstInt);
1978
+ DstIntLaneMask |= UnusedLanes;
1979
+ }
1980
+
1981
+ // After duplicating the live ranges for the low/hi bits, we
1982
+ // need to update the subranges of the DstReg interval such that
1983
+ // for a case like this:
1984
+ //
1985
+ // entry:
1986
+ // 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
1987
+ // :
1988
+ // if.then:
1989
+ // 32B %1:gpr32 = MOVIMM32 ..
1990
+ // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
1991
+ //
1992
+ // Only the MOVIMM32 require a def of the top lanes and any intervals
1993
+ // for the top 32-bits of the def at 16B should be removed.
1994
+ for (LiveInterval::SubRange &SR : DstInt->subranges ()) {
1995
+ if (!Writes || RequiresImplicitRedef ||
1996
+ (SR.LaneMask & UnusedLanes).none ())
1997
+ continue ;
1998
+
1999
+ assert ((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
2000
+ " Unexpected lanemask. Subrange needs finer granularity" );
2001
+
2002
+ SlotIndex UseIdx = LIS->getInstructionIndex (*UseMI).getRegSlot (false );
2003
+ auto SegmentI = SR.find (UseIdx);
2004
+ if (SegmentI != SR.end ())
2005
+ SR.removeSegment (SegmentI, true );
2006
+ }
2007
+ }
2008
+ }
2009
+
1917
2010
// Replace SrcReg with DstReg in all UseMI operands.
1918
2011
for (unsigned Op : Ops) {
1919
2012
MachineOperand &MO = UseMI->getOperand (Op);
@@ -1922,7 +2015,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1922
2015
// turn a full def into a read-modify-write sub-register def and vice
1923
2016
// versa.
1924
2017
if (SubIdx && MO.isDef ())
1925
- MO.setIsUndef (!Reads);
2018
+ MO.setIsUndef (!Reads || RequiresImplicitRedef );
1926
2019
1927
2020
// A subreg use of a partially undef (super) register may be a complete
1928
2021
// undef use now and then has to be marked that way.
@@ -2025,6 +2118,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
2025
2118
LIS->shrinkToUses (&LI);
2026
2119
}
2027
2120
2121
+ // / For a given use of value \p Idx, it returns the def in the current block,
2122
+ // / or otherwise all possible defs in preceding blocks.
2123
+ static bool FindDefInBlock (SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
2124
+ SmallVector<MachineInstr *> &Instrs,
2125
+ LiveIntervals *LIS, LiveInterval &SrcInt,
2126
+ MachineBasicBlock *MBB, VNInfo *Idx) {
2127
+ if (!Idx->isPHIDef ()) {
2128
+ MachineInstr *Def = LIS->getInstructionFromIndex (Idx->def );
2129
+ assert (Def && " Unable to find a def for SUBREG_TO_REG source operand" );
2130
+ Instrs.push_back (Def);
2131
+ return true ;
2132
+ }
2133
+
2134
+ bool Any = false ;
2135
+ if (VisitedBlocks.count (MBB))
2136
+ return false ;
2137
+ VisitedBlocks.insert (MBB);
2138
+ for (MachineBasicBlock *Pred : MBB->predecessors ()) {
2139
+ Any |= FindDefInBlock (VisitedBlocks, Instrs, LIS, SrcInt, Pred,
2140
+ SrcInt.getVNInfoBefore (LIS->getMBBEndIdx (Pred)));
2141
+ }
2142
+ return Any;
2143
+ }
2144
+
2028
2145
bool RegisterCoalescer::joinCopy (
2029
2146
MachineInstr *CopyMI, bool &Again,
2030
2147
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2156,6 +2273,35 @@ bool RegisterCoalescer::joinCopy(
2156
2273
});
2157
2274
}
2158
2275
2276
+ SmallVector<MachineInstr *> SubregToRegSrcInsts;
2277
+ if (CopyMI->isSubregToReg ()) {
2278
+ // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
2279
+ //
2280
+ // %0:gpr32 = movimm32 ..
2281
+ // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
2282
+ // ...
2283
+ // %0:gpr32 = COPY <something>
2284
+ //
2285
+ // After joining liveranges, the original `movimm32` will need an
2286
+ // implicit-def to make it explicit that the entire register is written,
2287
+ // i.e.
2288
+ //
2289
+ // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
2290
+ // ...
2291
+ // undef %0.sub32:gpr64 = COPY <something> // Note that this does not
2292
+ // // require an implicit-def,
2293
+ // // because it has nothing to
2294
+ // // do with the SUBREG_TO_REG.
2295
+ LiveInterval &SrcInt =
2296
+ LIS->getInterval (CP.isFlipped () ? CP.getDstReg () : CP.getSrcReg ());
2297
+ SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex (*CopyMI);
2298
+ SmallPtrSet<MachineBasicBlock *, 8 > VisitedBlocks;
2299
+ if (!FindDefInBlock (VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
2300
+ CopyMI->getParent (),
2301
+ SrcInt.Query (SubregToRegSlotIdx).valueIn ()))
2302
+ llvm_unreachable (" SUBREG_TO_REG src requires a def" );
2303
+ }
2304
+
2159
2305
ShrinkMask = LaneBitmask::getNone ();
2160
2306
ShrinkMainRange = false ;
2161
2307
@@ -2225,9 +2371,12 @@ bool RegisterCoalescer::joinCopy(
2225
2371
2226
2372
// Rewrite all SrcReg operands to DstReg.
2227
2373
// Also update DstReg operands to include DstIdx if it is set.
2228
- if (CP.getDstIdx ())
2374
+ if (CP.getDstIdx ()) {
2375
+ assert (SubregToRegSrcInsts.empty () && " can this happen?" );
2229
2376
updateRegDefsUses (CP.getDstReg (), CP.getDstReg (), CP.getDstIdx ());
2230
- updateRegDefsUses (CP.getSrcReg (), CP.getDstReg (), CP.getSrcIdx ());
2377
+ }
2378
+ updateRegDefsUses (CP.getSrcReg (), CP.getDstReg (), CP.getSrcIdx (),
2379
+ SubregToRegSrcInsts);
2231
2380
2232
2381
// Shrink subregister ranges if necessary.
2233
2382
if (ShrinkMask.any ()) {
0 commit comments