Skip to content

Commit 62ff996

Browse files
committed
[SystemZ] Improve foldMemoryOperandImpl().
Swap the compare operands if LHS is spilled while updating the CCMask:s of the CC users. This is relatively straight forward since the live-in lists for the CC register can be assumed to be correct during register allocation (thanks to 659efa2). Also fold a spilled operand of an LOCR/SELR into an LOC(G). Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D67437
1 parent 14219aa commit 62ff996

File tree

7 files changed

+585
-45
lines changed

7 files changed

+585
-45
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2190,15 +2190,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) {
21902190
return false;
21912191
}
21922192

2193-
// Return a version of comparison CC mask CCMask in which the LT and GT
2194-
// actions are swapped.
2195-
static unsigned reverseCCMask(unsigned CCMask) {
2196-
return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
2197-
(CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
2198-
(CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
2199-
(CCMask & SystemZ::CCMASK_CMP_UO));
2200-
}
2201-
22022193
// Check whether C tests for equality between X and Y and whether X - Y
22032194
// or Y - X is also computed. In that case it's better to compare the
22042195
// result of the subtraction against zero.
@@ -2234,7 +2225,7 @@ static void adjustForFNeg(Comparison &C) {
22342225
SDNode *N = *I;
22352226
if (N->getOpcode() == ISD::FNEG) {
22362227
C.Op0 = SDValue(N, 0);
2237-
C.CCMask = reverseCCMask(C.CCMask);
2228+
C.CCMask = SystemZ::reverseCCMask(C.CCMask);
22382229
return;
22392230
}
22402231
}
@@ -2601,7 +2592,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
26012592

26022593
if (shouldSwapCmpOperands(C)) {
26032594
std::swap(C.Op0, C.Op1);
2604-
C.CCMask = reverseCCMask(C.CCMask);
2595+
C.CCMask = SystemZ::reverseCCMask(C.CCMask);
26052596
}
26062597

26072598
adjustForTestUnderMask(DAG, DL, C);
@@ -6277,15 +6268,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
62776268
return false;
62786269

62796270
// Compute the effective CC mask for the new branch or select.
6280-
switch (CCMask) {
6281-
case SystemZ::CCMASK_CMP_EQ: break;
6282-
case SystemZ::CCMASK_CMP_NE: break;
6283-
case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
6284-
case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
6285-
case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
6286-
case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
6287-
default: return false;
6288-
}
6271+
CCMask = SystemZ::reverseCCMask(CCMask);
62896272

62906273
// Return the updated CCReg link.
62916274
CCReg = IPM->getOperand(0);

llvm/lib/Target/SystemZ/SystemZInstrFormats.td

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2807,6 +2807,10 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
28072807
let mayLoad = 1;
28082808
let AccessBytes = bytes;
28092809
let CCMaskLast = 1;
2810+
let OpKey = mnemonic#"r"#cls;
2811+
let OpType = "mem";
2812+
let MemKey = mnemonic#cls;
2813+
let MemType = "target";
28102814
}
28112815

28122816
// Like CondUnaryRSY, but used for the raw assembly form. The condition-code
@@ -3211,6 +3215,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
32113215
let CCMaskLast = 1;
32123216
let NumOpsKey = !subst("loc", "sel", mnemonic);
32133217
let NumOpsValue = "2";
3218+
let OpKey = mnemonic#cls1;
3219+
let OpType = "reg";
32143220
}
32153221

32163222
// Like CondBinaryRRF, but used for the raw assembly form. The condition-code
@@ -3252,6 +3258,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
32523258
let CCMaskLast = 1;
32533259
let NumOpsKey = mnemonic;
32543260
let NumOpsValue = "3";
3261+
let OpKey = mnemonic#cls1;
3262+
let OpType = "reg";
32553263
}
32563264

32573265
// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
@@ -4775,6 +4783,20 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
47754783
let hasNoSchedulingInfo = 1;
47764784
}
47774785

4786+
// Same as MemFoldPseudo but for Load On Condition with CC operands.
4787+
class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes,
4788+
AddressingMode mode>
4789+
: Pseudo<(outs cls:$R1),
4790+
(ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> {
4791+
let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls;
4792+
let OpType = "mem";
4793+
let MemKey = mnemonic#cls;
4794+
let MemType = "pseudo";
4795+
let mayLoad = 1;
4796+
let AccessBytes = bytes;
4797+
let hasNoSchedulingInfo = 1;
4798+
}
4799+
47784800
// Like CompareRI, but expanded after RA depending on the choice of register.
47794801
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
47804802
ImmOpWithPattern imm>
@@ -4813,6 +4835,8 @@ class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1,
48134835
let CCMaskLast = 1;
48144836
let NumOpsKey = !subst("loc", "sel", mnemonic);
48154837
let NumOpsValue = "2";
4838+
let OpKey = mnemonic#cls1;
4839+
let OpType = "reg";
48164840
}
48174841

48184842
// Like CondBinaryRRFa, but expanded after RA depending on the choice of
@@ -4826,6 +4850,8 @@ class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1,
48264850
let CCMaskLast = 1;
48274851
let NumOpsKey = mnemonic;
48284852
let NumOpsValue = "3";
4853+
let OpKey = mnemonic#cls1;
4854+
let OpType = "reg";
48294855
}
48304856

48314857
// Like CondBinaryRIE, but expanded after RA depending on the choice of
@@ -4842,8 +4868,9 @@ class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>
48424868

48434869
// Like CondUnaryRSY, but expanded after RA depending on the choice of
48444870
// register.
4845-
class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
4846-
bits<5> bytes, AddressingMode mode = bdaddr20only>
4871+
class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator,
4872+
RegisterOperand cls, bits<5> bytes,
4873+
AddressingMode mode = bdaddr20only>
48474874
: Pseudo<(outs cls:$R1),
48484875
(ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
48494876
[(set cls:$R1,
@@ -4854,6 +4881,10 @@ class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
48544881
let mayLoad = 1;
48554882
let AccessBytes = bytes;
48564883
let CCMaskLast = 1;
4884+
let OpKey = mnemonic#"r"#cls;
4885+
let OpType = "mem";
4886+
let MemKey = mnemonic#cls;
4887+
let MemType = "target";
48574888
}
48584889

48594890
// Like CondStoreRSY, but expanded after RA depending on the choice of
@@ -5066,6 +5097,22 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
50665097
def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
50675098
}
50685099

5100+
multiclass CondUnaryRSYPairAndMemFold<string mnemonic, bits<16> opcode,
5101+
SDPatternOperator operator,
5102+
RegisterOperand cls, bits<5> bytes,
5103+
AddressingMode mode = bdaddr20only> {
5104+
defm "" : CondUnaryRSYPair<mnemonic, opcode, operator, cls, bytes, mode>;
5105+
def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
5106+
}
5107+
5108+
multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic,
5109+
SDPatternOperator operator,
5110+
RegisterOperand cls, bits<5> bytes,
5111+
AddressingMode mode = bdaddr20only> {
5112+
def "" : CondUnaryRSYPseudo<mnemonic, operator, cls, bytes, mode>;
5113+
def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
5114+
}
5115+
50695116
// Define an instruction that operates on two fixed-length blocks of memory,
50705117
// and associated pseudo instructions for operating on blocks of any size.
50715118
// The Sequence form uses a straight-line sequence of instructions and

llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp

Lines changed: 96 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,14 +1150,31 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
11501150
// commutable, try to change <INSN>R into <INSN>.
11511151
unsigned NumOps = MI.getNumExplicitOperands();
11521152
int MemOpcode = SystemZ::getMemOpcode(Opcode);
1153+
if (MemOpcode == -1)
1154+
return nullptr;
1155+
1156+
// Try to swap compare operands if possible.
1157+
bool NeedsCommute = false;
1158+
if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR ||
1159+
MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR) &&
1160+
OpNum == 0 && prepareCompareSwapOperands(MI))
1161+
NeedsCommute = true;
1162+
1163+
bool CCOperands = false;
1164+
if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR ||
1165+
MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) {
1166+
assert(MI.getNumOperands() == 6 && NumOps == 5 &&
1167+
"LOCR/SELR instruction operands corrupt?");
1168+
NumOps -= 2;
1169+
CCOperands = true;
1170+
}
11531171

11541172
// See if this is a 3-address instruction that is convertible to 2-address
11551173
// and suitable for folding below. Only try this with virtual registers
11561174
// and a provided VRM (during regalloc).
1157-
bool NeedsCommute = false;
1158-
if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
1175+
if (SystemZ::getTwoOperandOpcode(Opcode) != -1) {
11591176
if (VRM == nullptr)
1160-
MemOpcode = -1;
1177+
return nullptr;
11611178
else {
11621179
assert(NumOps == 3 && "Expected two source registers.");
11631180
Register DstReg = MI.getOperand(0).getReg();
@@ -1172,32 +1189,42 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
11721189
DstPhys == VRM->getPhys(SrcReg))
11731190
NeedsCommute = (OpNum == 1);
11741191
else
1175-
MemOpcode = -1;
1192+
return nullptr;
11761193
}
11771194
}
11781195

1179-
if (MemOpcode >= 0) {
1180-
if ((OpNum == NumOps - 1) || NeedsCommute) {
1181-
const MCInstrDesc &MemDesc = get(MemOpcode);
1182-
uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
1183-
assert(AccessBytes != 0 && "Size of access should be known");
1184-
assert(AccessBytes <= Size && "Access outside the frame index");
1185-
uint64_t Offset = Size - AccessBytes;
1186-
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
1187-
MI.getDebugLoc(), get(MemOpcode));
1196+
if ((OpNum == NumOps - 1) || NeedsCommute) {
1197+
const MCInstrDesc &MemDesc = get(MemOpcode);
1198+
uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
1199+
assert(AccessBytes != 0 && "Size of access should be known");
1200+
assert(AccessBytes <= Size && "Access outside the frame index");
1201+
uint64_t Offset = Size - AccessBytes;
1202+
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
1203+
MI.getDebugLoc(), get(MemOpcode));
1204+
if (MI.isCompare()) {
1205+
assert(NumOps == 2 && "Expected 2 register operands for a compare.");
1206+
MIB.add(MI.getOperand(NeedsCommute ? 1 : 0));
1207+
}
1208+
else {
11881209
MIB.add(MI.getOperand(0));
11891210
if (NeedsCommute)
11901211
MIB.add(MI.getOperand(2));
11911212
else
11921213
for (unsigned I = 1; I < OpNum; ++I)
11931214
MIB.add(MI.getOperand(I));
1194-
MIB.addFrameIndex(FrameIndex).addImm(Offset);
1195-
if (MemDesc.TSFlags & SystemZII::HasIndex)
1196-
MIB.addReg(0);
1197-
transferDeadCC(&MI, MIB);
1198-
transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
1199-
return MIB;
12001215
}
1216+
MIB.addFrameIndex(FrameIndex).addImm(Offset);
1217+
if (MemDesc.TSFlags & SystemZII::HasIndex)
1218+
MIB.addReg(0);
1219+
if (CCOperands) {
1220+
unsigned CCValid = MI.getOperand(NumOps).getImm();
1221+
unsigned CCMask = MI.getOperand(NumOps + 1).getImm();
1222+
MIB.addImm(CCValid);
1223+
MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask);
1224+
}
1225+
transferDeadCC(&MI, MIB);
1226+
transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
1227+
return MIB;
12011228
}
12021229

12031230
return nullptr;
@@ -1706,6 +1733,56 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
17061733
return 0;
17071734
}
17081735

1736+
bool SystemZInstrInfo::
1737+
prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const {
1738+
assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() &&
1739+
MBBI->getOperand(1).isReg() && !MBBI->mayLoad() &&
1740+
"Not a compare reg/reg.");
1741+
1742+
MachineBasicBlock *MBB = MBBI->getParent();
1743+
bool CCLive = true;
1744+
SmallVector<MachineInstr *, 4> CCUsers;
1745+
for (MachineBasicBlock::iterator Itr = std::next(MBBI);
1746+
Itr != MBB->end(); ++Itr) {
1747+
if (Itr->readsRegister(SystemZ::CC)) {
1748+
unsigned Flags = Itr->getDesc().TSFlags;
1749+
if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast))
1750+
CCUsers.push_back(&*Itr);
1751+
else
1752+
return false;
1753+
}
1754+
if (Itr->definesRegister(SystemZ::CC)) {
1755+
CCLive = false;
1756+
break;
1757+
}
1758+
}
1759+
if (CCLive) {
1760+
LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo());
1761+
LiveRegs.addLiveOuts(*MBB);
1762+
if (LiveRegs.contains(SystemZ::CC))
1763+
return false;
1764+
}
1765+
1766+
// Update all CC users.
1767+
for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) {
1768+
unsigned Flags = CCUsers[Idx]->getDesc().TSFlags;
1769+
unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ?
1770+
0 : CCUsers[Idx]->getNumExplicitOperands() - 2);
1771+
MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1);
1772+
unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm());
1773+
CCMaskMO.setImm(NewCCMask);
1774+
}
1775+
1776+
return true;
1777+
}
1778+
1779+
unsigned SystemZ::reverseCCMask(unsigned CCMask) {
1780+
return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
1781+
(CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
1782+
(CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
1783+
(CCMask & SystemZ::CCMASK_CMP_UO));
1784+
}
1785+
17091786
unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
17101787
if (!STI.hasLoadAndTrap())
17111788
return 0;

llvm/lib/Target/SystemZ/SystemZInstrInfo.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,10 @@ enum FusedCompareType {
155155
namespace SystemZ {
156156
int getTwoOperandOpcode(uint16_t Opcode);
157157
int getTargetMemOpcode(uint16_t Opcode);
158+
159+
// Return a version of comparison CC mask CCMask in which the LT and GT
160+
// actions are swapped.
161+
unsigned reverseCCMask(unsigned CCMask);
158162
}
159163

160164
class SystemZInstrInfo : public SystemZGenInstrInfo {
@@ -314,6 +318,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
314318
SystemZII::FusedCompareType Type,
315319
const MachineInstr *MI = nullptr) const;
316320

321+
// Try to find all CC users of the compare instruction (MBBI) and update
322+
// all of them to maintain equivalent behavior after swapping the compare
323+
// operands. Return false if not all users can be conclusively found and
324+
// handled. The compare instruction is *not* changed.
325+
bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const;
326+
317327
// If Opcode is a LOAD opcode for with an associated LOAD AND TRAP
318328
// operation exists, returh the opcode for the latter, otherwise return 0.
319329
unsigned getLoadAndTrap(unsigned Opcode) const;

llvm/lib/Target/SystemZ/SystemZInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
492492
let isCommutable = 1 in {
493493
// Expands to SELR or SELFHR or a branch-and-move sequence,
494494
// depending on the choice of registers.
495-
def SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>;
495+
def SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>;
496496
defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
497497
defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
498498
defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
@@ -525,13 +525,13 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
525525
let isCommutable = 1 in {
526526
// Expands to LOCR or LOCFHR or a branch-and-move sequence,
527527
// depending on the choice of registers.
528-
def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>;
528+
def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>;
529529
defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
530530
}
531531

532532
// Load on condition. Matched via DAG pattern.
533533
// Expands to LOC or LOCFH, depending on the choice of register.
534-
def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>;
534+
defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>;
535535
defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>;
536536

537537
// Store on condition. Expanded from CondStore* pseudos.
@@ -564,7 +564,7 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
564564

565565
// Load on condition. Matched via DAG pattern.
566566
defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>;
567-
defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>;
567+
defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>;
568568

569569
// Store on condition. Expanded from CondStore* pseudos.
570570
defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>;

0 commit comments

Comments
 (0)