Skip to content

Commit 48988de

Browse files
committed
Allow commuting cmn
This will require modifying the outcc, so I had to make that change.
1 parent a719091 commit 48988de

File tree

5 files changed

+86
-103
lines changed

5 files changed

+86
-103
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3540,7 +3540,8 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
35403540
}
35413541

35423542
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3543-
const SDLoc &DL, SelectionDAG &DAG) {
3543+
AArch64CC::CondCode &OutCC, const SDLoc &DL,
3544+
SelectionDAG &DAG) {
35443545
EVT VT = LHS.getValueType();
35453546
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
35463547

@@ -3563,12 +3564,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
35633564
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
35643565
Opcode = AArch64ISD::ADDS;
35653566
RHS = RHS.getOperand(1);
3566-
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3567-
isIntEqualitySetCC(CC)) {
3567+
} else if (isCMN(LHS, CC, DAG)) {
35683568
// As we are looking for EQ/NE compares, the operands can be commuted ; can
35693569
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
35703570
Opcode = AArch64ISD::ADDS;
35713571
LHS = LHS.getOperand(1);
3572+
OutCC = getSwappedCondition(OutCC);
35723573
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
35733574
if (LHS.getOpcode() == ISD::AND) {
35743575
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -3646,7 +3647,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
36463647
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
36473648
ISD::CondCode CC, SDValue CCOp,
36483649
AArch64CC::CondCode Predicate,
3649-
AArch64CC::CondCode OutCC,
3650+
AArch64CC::CondCode &OutCC,
36503651
const SDLoc &DL, SelectionDAG &DAG) {
36513652
unsigned Opcode = 0;
36523653
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
@@ -3668,12 +3669,11 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
36683669
} else if (isCMN(RHS, CC, DAG)) {
36693670
Opcode = AArch64ISD::CCMN;
36703671
RHS = RHS.getOperand(1);
3671-
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3672-
isIntEqualitySetCC(CC)) {
3673-
// As we are looking for EQ/NE compares, the operands can be commuted ; can
3674-
// we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
3672+
} else if (isCMN(LHS, CC, DAG)) {
3673+
// Can we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
36753674
Opcode = AArch64ISD::CCMN;
36763675
LHS = LHS.getOperand(1);
3676+
OutCC = getSwappedCondition(OutCC);
36773677
}
36783678
if (Opcode == 0)
36793679
Opcode = AArch64ISD::CCMP;
@@ -3786,7 +3786,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
37863786
if (ExtraCC != AArch64CC::AL) {
37873787
SDValue ExtraCmp;
37883788
if (!CCOp.getNode())
3789-
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
3789+
ExtraCmp = emitComparison(LHS, RHS, CC, ExtraCC, DL, DAG);
37903790
else
37913791
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
37923792
ExtraCC, DL, DAG);
@@ -3797,7 +3797,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
37973797

37983798
// Produce a normal comparison if we are first in the chain
37993799
if (!CCOp)
3800-
return emitComparison(LHS, RHS, CC, DL, DAG);
3800+
return emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
38013801
// Otherwise produce a ccmp.
38023802
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
38033803
DAG);
@@ -4014,13 +4014,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
40144014
// can be turned into:
40154015
// cmp w12, w11, lsl #1
40164016
if (!isa<ConstantSDNode>(RHS) || !isLegalCmpImmed(RHS->getAsAPIntVal())) {
4017-
bool LHSIsCMN = isCMN(LHS, CC, DAG);
4018-
bool RHSIsCMN = isCMN(RHS, CC, DAG);
4019-
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
4020-
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
4017+
SDValue TheLHS = isCMN(LHS, CC, DAG) ? LHS.getOperand(1) : LHS;
4018+
SDValue TheRHS = isCMN(RHS, CC, DAG) ? RHS.getOperand(1) : RHS;
40214019

4022-
if (getCmpOperandFoldingProfit(TheLHS) + (LHSIsCMN ? 1 : 0) >
4023-
getCmpOperandFoldingProfit(TheRHS) + (RHSIsCMN ? 1 : 0)) {
4020+
if (getCmpOperandFoldingProfit(TheLHS) >
4021+
getCmpOperandFoldingProfit(TheRHS)) {
40244022
std::swap(LHS, RHS);
40254023
CC = ISD::getSetCCSwappedOperands(CC);
40264024
}
@@ -4056,10 +4054,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
40564054
SDValue SExt =
40574055
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(), LHS,
40584056
DAG.getValueType(MVT::i16));
4057+
4058+
AArch64CC = changeIntCCToAArch64CC(CC);
40594059
Cmp = emitComparison(
40604060
SExt, DAG.getSignedConstant(ValueofRHS, DL, RHS.getValueType()), CC,
4061-
DL, DAG);
4062-
AArch64CC = changeIntCCToAArch64CC(CC);
4061+
AArch64CC, DL, DAG);
40634062
}
40644063
}
40654064

@@ -4072,8 +4071,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
40724071
}
40734072

40744073
if (!Cmp) {
4075-
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
40764074
AArch64CC = changeIntCCToAArch64CC(CC);
4075+
Cmp = emitComparison(LHS, RHS, CC, AArch64CC, DL, DAG);
40774076
}
40784077
AArch64cc = DAG.getConstant(AArch64CC, DL, MVT_CC);
40794078
return Cmp;
@@ -10664,8 +10663,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
1066410663

1066510664
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1066610665
// clean. Some of them require two branches to implement.
10667-
SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
10668-
AArch64CC::CondCode CC1, CC2;
10666+
AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
10667+
SDValue Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
1066910668
changeFPCCToAArch64CC(CC, CC1, CC2);
1067010669
SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
1067110670
SDValue BR1 =
@@ -11149,12 +11148,12 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1114911148
// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
1115011149
// and do the comparison.
1115111150
SDValue Cmp;
11151+
AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
1115211152
if (IsStrict)
1115311153
Cmp = emitStrictFPComparison(LHS, RHS, DL, DAG, Chain, IsSignaling);
1115411154
else
11155-
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
11155+
Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
1115611156

11157-
AArch64CC::CondCode CC1, CC2;
1115811157
changeFPCCToAArch64CC(CC, CC1, CC2);
1115911158
SDValue Res;
1116011159
if (CC2 == AArch64CC::AL) {
@@ -11550,12 +11549,11 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
1155011549
if (VectorCmp)
1155111550
return VectorCmp;
1155211551
}
11553-
11554-
SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
11552+
AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
11553+
SDValue Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
1155511554

1155611555
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1155711556
// clean. Some of them require two CSELs to implement.
11558-
AArch64CC::CondCode CC1, CC2;
1155911557
changeFPCCToAArch64CC(CC, CC1, CC2);
1156011558

1156111559
if (Flags.hasNoSignedZeros()) {

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ class AArch64InstructionSelector : public InstructionSelector {
352352
MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
353353
CmpInst::Predicate CC,
354354
AArch64CC::CondCode Predicate,
355-
AArch64CC::CondCode OutCC,
355+
AArch64CC::CondCode &OutCC,
356356
MachineIRBuilder &MIB) const;
357357
MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
358358
bool Negate, Register CCOp,
@@ -4869,7 +4869,7 @@ static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
48694869

48704870
MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
48714871
Register LHS, Register RHS, CmpInst::Predicate CC,
4872-
AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4872+
AArch64CC::CondCode Predicate, AArch64CC::CondCode &OutCC,
48734873
MachineIRBuilder &MIB) const {
48744874
auto &MRI = *MIB.getMRI();
48754875
LLT OpTy = MRI.getType(LHS);
@@ -4878,7 +4878,25 @@ MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
48784878
if (CmpInst::isIntPredicate(CC)) {
48794879
assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
48804880
C = getIConstantVRegValWithLookThrough(RHS, MRI);
4881-
if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4881+
if (!C) {
4882+
MachineInstr *Def = getDefIgnoringCopies(RHS, MRI);
4883+
if (isCMN(Def, CC, MRI)) {
4884+
RHS = Def->getOperand(2).getReg();
4885+
CCmpOpc =
4886+
OpTy.getSizeInBits() == 32 ? AArch64::CCMNWr : AArch64::CCMNXr;
4887+
} else {
4888+
Def = getDefIgnoringCopies(LHS, MRI);
4889+
if (isCMN(Def, CC, MRI)) {
4890+
LHS = Def->getOperand(2).getReg();
4891+
OutCC = getSwappedCondition(OutCC);
4892+
CCmpOpc =
4893+
OpTy.getSizeInBits() == 32 ? AArch64::CCMNWr : AArch64::CCMNXr;
4894+
} else {
4895+
CCmpOpc =
4896+
OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4897+
}
4898+
}
4899+
} else if (C->Value.sgt(31) || C->Value.slt(-31))
48824900
CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
48834901
else if (C->Value.ule(31))
48844902
CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
@@ -4904,8 +4922,7 @@ MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
49044922
}
49054923
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
49064924
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4907-
auto CCmp =
4908-
MIB.buildInstr(CCmpOpc, {}, {LHS});
4925+
auto CCmp = MIB.buildInstr(CCmpOpc, {}, {LHS});
49094926
if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
49104927
CCmp.addImm(C->Value.getZExtValue());
49114928
else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)

llvm/test/CodeGen/AArch64/cmp-chains.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -270,14 +270,13 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
270270
;
271271
; CHECK-GI-LABEL: neg_range_int_comp:
272272
; CHECK-GI: // %bb.0:
273-
; CHECK-GI-NEXT: orr w8, w3, #0x1
274273
; CHECK-GI-NEXT: cmp w0, w2
275-
; CHECK-GI-NEXT: neg w8, w8
276-
; CHECK-GI-NEXT: ccmp w1, w8, #4, lt
274+
; CHECK-GI-NEXT: orr w8, w3, #0x1
275+
; CHECK-GI-NEXT: ccmn w1, w8, #4, lt
277276
; CHECK-GI-NEXT: csel w0, w1, w0, gt
278277
; CHECK-GI-NEXT: ret
279278
%dor = or i32 %d, 1
280-
%negd = sub i32 0, %dor
279+
%negd = sub nsw i32 0, %dor
281280
%cmp = icmp sgt i32 %b, %negd
282281
%cmp1 = icmp slt i32 %a, %c
283282
%or.cond = and i1 %cmp, %cmp1
@@ -373,14 +372,13 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
373372
;
374373
; CHECK-GI-LABEL: neg_range_int_comp2:
375374
; CHECK-GI: // %bb.0:
376-
; CHECK-GI-NEXT: orr w8, w3, #0x1
377375
; CHECK-GI-NEXT: cmp w0, w2
378-
; CHECK-GI-NEXT: neg w8, w8
379-
; CHECK-GI-NEXT: ccmp w1, w8, #0, ge
376+
; CHECK-GI-NEXT: orr w8, w3, #0x1
377+
; CHECK-GI-NEXT: ccmn w1, w8, #0, ge
380378
; CHECK-GI-NEXT: csel w0, w1, w0, lt
381379
; CHECK-GI-NEXT: ret
382380
%dor = or i32 %d, 1
383-
%negd = sub i32 0, %dor
381+
%negd = sub nsw i32 0, %dor
384382
%cmp = icmp slt i32 %b, %negd
385383
%cmp1 = icmp sge i32 %a, %c
386384
%or.cond = and i1 %cmp, %cmp1
@@ -407,7 +405,7 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
407405
; CHECK-GI-NEXT: csel w0, w1, w0, lo
408406
; CHECK-GI-NEXT: ret
409407
%dor = or i32 %d, 1
410-
%negd = sub i32 0, %dor
408+
%negd = sub nsw i32 0, %dor
411409
%cmp = icmp ult i32 %b, %negd
412410
%cmp1 = icmp sgt i32 %a, %c
413411
%or.cond = and i1 %cmp, %cmp1

llvm/test/CodeGen/AArch64/cmp-select-sign.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ define i32 @or_neg(i32 %x, i32 %y) {
261261
; CHECK-LABEL: or_neg:
262262
; CHECK: // %bb.0:
263263
; CHECK-NEXT: orr w8, w0, #0x1
264-
; CHECK-NEXT: cmn w1, w8
264+
; CHECK-NEXT: cmn w8, w1
265265
; CHECK-NEXT: cset w0, lt
266266
; CHECK-NEXT: ret
267267
%3 = or i32 %x, 1
@@ -275,7 +275,7 @@ define i32 @or_neg_ugt(i32 %x, i32 %y) {
275275
; CHECK-LABEL: or_neg_ugt:
276276
; CHECK: // %bb.0:
277277
; CHECK-NEXT: orr w8, w0, #0x1
278-
; CHECK-NEXT: cmn w1, w8
278+
; CHECK-NEXT: cmn w8, w1
279279
; CHECK-NEXT: cset w0, lo
280280
; CHECK-NEXT: ret
281281
%3 = or i32 %x, 1
@@ -319,7 +319,7 @@ define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) {
319319
; CHECK-LABEL: or_neg_no_smin_but_zero:
320320
; CHECK: // %bb.0:
321321
; CHECK-NEXT: bic w8, w0, w0, asr #31
322-
; CHECK-NEXT: cmn w1, w8
322+
; CHECK-NEXT: cmn w8, w1
323323
; CHECK-NEXT: cset w0, lt
324324
; CHECK-NEXT: ret
325325
%3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -350,7 +350,7 @@ define i32 @or_neg2(i32 %x, i32 %y) {
350350
; CHECK-LABEL: or_neg2:
351351
; CHECK: // %bb.0:
352352
; CHECK-NEXT: orr w8, w0, #0x1
353-
; CHECK-NEXT: cmn w1, w8
353+
; CHECK-NEXT: cmn w8, w1
354354
; CHECK-NEXT: cset w0, le
355355
; CHECK-NEXT: ret
356356
%3 = or i32 %x, 1
@@ -364,7 +364,7 @@ define i32 @or_neg3(i32 %x, i32 %y) {
364364
; CHECK-LABEL: or_neg3:
365365
; CHECK: // %bb.0:
366366
; CHECK-NEXT: orr w8, w0, #0x1
367-
; CHECK-NEXT: cmn w1, w8
367+
; CHECK-NEXT: cmn w8, w1
368368
; CHECK-NEXT: cset w0, gt
369369
; CHECK-NEXT: ret
370370
%3 = or i32 %x, 1
@@ -378,7 +378,7 @@ define i32 @or_neg4(i32 %x, i32 %y) {
378378
; CHECK-LABEL: or_neg4:
379379
; CHECK: // %bb.0:
380380
; CHECK-NEXT: orr w8, w0, #0x1
381-
; CHECK-NEXT: cmn w1, w8
381+
; CHECK-NEXT: cmn w8, w1
382382
; CHECK-NEXT: cset w0, ge
383383
; CHECK-NEXT: ret
384384
%3 = or i32 %x, 1
@@ -392,7 +392,7 @@ define i32 @or_neg_ult(i32 %x, i32 %y) {
392392
; CHECK-LABEL: or_neg_ult:
393393
; CHECK: // %bb.0:
394394
; CHECK-NEXT: orr w8, w0, #0x1
395-
; CHECK-NEXT: cmn w1, w8
395+
; CHECK-NEXT: cmn w8, w1
396396
; CHECK-NEXT: cset w0, lo
397397
; CHECK-NEXT: ret
398398
%3 = or i32 %x, 1
@@ -434,7 +434,7 @@ define i32 @or_neg_no_smin_but_zero2(i32 %x, i32 %y) {
434434
; CHECK-LABEL: or_neg_no_smin_but_zero2:
435435
; CHECK: // %bb.0:
436436
; CHECK-NEXT: bic w8, w0, w0, asr #31
437-
; CHECK-NEXT: cmn w1, w8
437+
; CHECK-NEXT: cmn w8, w1
438438
; CHECK-NEXT: cset w0, ge
439439
; CHECK-NEXT: ret
440440
%3 = call i32 @llvm.smax.i32(i32 %x, i32 0)

0 commit comments

Comments
 (0)