Allow commuting cmn

AZero13 · AZero13 · commit 48988dedd801 · 2025-07-31T09:59:53.000-04:00
This will require modifying the outcc, so I had to make that change.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3540,7 +3540,8 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
 }
 
 static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                              const SDLoc &DL, SelectionDAG &DAG) {
+                              AArch64CC::CondCode &OutCC, const SDLoc &DL,
+                              SelectionDAG &DAG) {
   EVT VT = LHS.getValueType();
   const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
 
@@ -3563,12 +3564,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
     Opcode = AArch64ISD::ADDS;
     RHS = RHS.getOperand(1);
-  } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
-             isIntEqualitySetCC(CC)) {
+  } else if (isCMN(LHS, CC, DAG)) {
     // As we are looking for EQ/NE compares, the operands can be commuted ; can
     // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
     Opcode = AArch64ISD::ADDS;
     LHS = LHS.getOperand(1);
+    OutCC = getSwappedCondition(OutCC);
   } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
     if (LHS.getOpcode() == ISD::AND) {
       // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -3646,7 +3647,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
 static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
                                          ISD::CondCode CC, SDValue CCOp,
                                          AArch64CC::CondCode Predicate,
-                                         AArch64CC::CondCode OutCC,
+                                         AArch64CC::CondCode &OutCC,
                                          const SDLoc &DL, SelectionDAG &DAG) {
   unsigned Opcode = 0;
   const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
@@ -3668,12 +3669,11 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
   } else if (isCMN(RHS, CC, DAG)) {
     Opcode = AArch64ISD::CCMN;
     RHS = RHS.getOperand(1);
-  } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
-             isIntEqualitySetCC(CC)) {
-    // As we are looking for EQ/NE compares, the operands can be commuted ; can
-    // we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
+  } else if (isCMN(LHS, CC, DAG)) {
+    // Can we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
     Opcode = AArch64ISD::CCMN;
     LHS = LHS.getOperand(1);
+    OutCC = getSwappedCondition(OutCC);
   }
   if (Opcode == 0)
     Opcode = AArch64ISD::CCMP;
@@ -3786,7 +3786,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
       if (ExtraCC != AArch64CC::AL) {
         SDValue ExtraCmp;
         if (!CCOp.getNode())
-          ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
+          ExtraCmp = emitComparison(LHS, RHS, CC, ExtraCC, DL, DAG);
         else
           ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
                                                ExtraCC, DL, DAG);
@@ -3797,7 +3797,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
 
     // Produce a normal comparison if we are first in the chain
     if (!CCOp)
-      return emitComparison(LHS, RHS, CC, DL, DAG);
+      return emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
     // Otherwise produce a ccmp.
     return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
                                      DAG);
@@ -4014,13 +4014,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   // can be turned into:
   //    cmp     w12, w11, lsl #1
   if (!isa<ConstantSDNode>(RHS) || !isLegalCmpImmed(RHS->getAsAPIntVal())) {
-    bool LHSIsCMN = isCMN(LHS, CC, DAG);
-    bool RHSIsCMN = isCMN(RHS, CC, DAG);
-    SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
-    SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
+    SDValue TheLHS = isCMN(LHS, CC, DAG) ? LHS.getOperand(1) : LHS;
+    SDValue TheRHS = isCMN(RHS, CC, DAG) ? RHS.getOperand(1) : RHS;
 
-    if (getCmpOperandFoldingProfit(TheLHS) + (LHSIsCMN ? 1 : 0) >
-        getCmpOperandFoldingProfit(TheRHS) + (RHSIsCMN ? 1 : 0)) {
+    if (getCmpOperandFoldingProfit(TheLHS) >
+        getCmpOperandFoldingProfit(TheRHS)) {
       std::swap(LHS, RHS);
       CC = ISD::getSetCCSwappedOperands(CC);
     }
@@ -4056,10 +4054,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
         SDValue SExt =
             DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(), LHS,
                         DAG.getValueType(MVT::i16));
+
+        AArch64CC = changeIntCCToAArch64CC(CC);
         Cmp = emitComparison(
             SExt, DAG.getSignedConstant(ValueofRHS, DL, RHS.getValueType()), CC,
-            DL, DAG);
-        AArch64CC = changeIntCCToAArch64CC(CC);
+            AArch64CC, DL, DAG);
       }
     }
 
@@ -4072,8 +4071,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   }
 
   if (!Cmp) {
-    Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
     AArch64CC = changeIntCCToAArch64CC(CC);
+    Cmp = emitComparison(LHS, RHS, CC, AArch64CC, DL, DAG);
   }
   AArch64cc = DAG.getConstant(AArch64CC, DL, MVT_CC);
   return Cmp;
@@ -10664,8 +10663,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 
   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two branches to implement.
-  SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
-  AArch64CC::CondCode CC1, CC2;
+  AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
+  SDValue Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
   changeFPCCToAArch64CC(CC, CC1, CC2);
   SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
   SDValue BR1 =
@@ -11149,12 +11148,12 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go ahead
   // and do the comparison.
   SDValue Cmp;
+  AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
   if (IsStrict)
     Cmp = emitStrictFPComparison(LHS, RHS, DL, DAG, Chain, IsSignaling);
   else
-    Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+    Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
 
-  AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
   SDValue Res;
   if (CC2 == AArch64CC::AL) {
@@ -11550,12 +11549,11 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
     if (VectorCmp)
       return VectorCmp;
   }
-
-  SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+  AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
+  SDValue Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
 
   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two CSELs to implement.
-  AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
 
   if (Flags.hasNoSignedZeros()) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -352,7 +352,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
                                           CmpInst::Predicate CC,
                                           AArch64CC::CondCode Predicate,
-                                          AArch64CC::CondCode OutCC,
+                                          AArch64CC::CondCode &OutCC,
                                           MachineIRBuilder &MIB) const;
   MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
                                    bool Negate, Register CCOp,
@@ -4869,7 +4869,7 @@ static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
 
 MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
     Register LHS, Register RHS, CmpInst::Predicate CC,
-    AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
+    AArch64CC::CondCode Predicate, AArch64CC::CondCode &OutCC,
     MachineIRBuilder &MIB) const {
   auto &MRI = *MIB.getMRI();
   LLT OpTy = MRI.getType(LHS);
@@ -4878,7 +4878,25 @@ MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
   if (CmpInst::isIntPredicate(CC)) {
     assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
     C = getIConstantVRegValWithLookThrough(RHS, MRI);
-    if (!C || C->Value.sgt(31) || C->Value.slt(-31))
+    if (!C) {
+      MachineInstr *Def = getDefIgnoringCopies(RHS, MRI);
+      if (isCMN(Def, CC, MRI)) {
+        RHS = Def->getOperand(2).getReg();
+        CCmpOpc =
+            OpTy.getSizeInBits() == 32 ? AArch64::CCMNWr : AArch64::CCMNXr;
+      } else {
+        Def = getDefIgnoringCopies(LHS, MRI);
+        if (isCMN(Def, CC, MRI)) {
+          LHS = Def->getOperand(2).getReg();
+          OutCC = getSwappedCondition(OutCC);
+          CCmpOpc =
+              OpTy.getSizeInBits() == 32 ? AArch64::CCMNWr : AArch64::CCMNXr;
+        } else {
+          CCmpOpc =
+              OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
+        }
+      }
+    } else if (C->Value.sgt(31) || C->Value.slt(-31))
       CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
     else if (C->Value.ule(31))
       CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
@@ -4904,8 +4922,7 @@ MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
   }
   AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
   unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
-  auto CCmp =
-      MIB.buildInstr(CCmpOpc, {}, {LHS});
+  auto CCmp = MIB.buildInstr(CCmpOpc, {}, {LHS});
   if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
     CCmp.addImm(C->Value.getZExtValue());
   else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -270,14 +270,13 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
 ;
 ; CHECK-GI-LABEL: neg_range_int_comp:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    orr w8, w3, #0x1
 ; CHECK-GI-NEXT:    cmp w0, w2
-; CHECK-GI-NEXT:    neg w8, w8
-; CHECK-GI-NEXT:    ccmp w1, w8, #4, lt
+; CHECK-GI-NEXT:    orr w8, w3, #0x1
+; CHECK-GI-NEXT:    ccmn w1, w8, #4, lt
 ; CHECK-GI-NEXT:    csel w0, w1, w0, gt
 ; CHECK-GI-NEXT:    ret
   %dor = or i32 %d, 1
-  %negd = sub i32 0, %dor
+  %negd = sub nsw i32 0, %dor
   %cmp = icmp sgt i32 %b, %negd
   %cmp1 = icmp slt i32 %a, %c
   %or.cond = and i1 %cmp, %cmp1
@@ -373,14 +372,13 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
 ;
 ; CHECK-GI-LABEL: neg_range_int_comp2:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    orr w8, w3, #0x1
 ; CHECK-GI-NEXT:    cmp w0, w2
-; CHECK-GI-NEXT:    neg w8, w8
-; CHECK-GI-NEXT:    ccmp w1, w8, #0, ge
+; CHECK-GI-NEXT:    orr w8, w3, #0x1
+; CHECK-GI-NEXT:    ccmn w1, w8, #0, ge
 ; CHECK-GI-NEXT:    csel w0, w1, w0, lt
 ; CHECK-GI-NEXT:    ret
   %dor = or i32 %d, 1
-  %negd = sub i32 0, %dor
+  %negd = sub nsw i32 0, %dor
   %cmp = icmp slt i32 %b, %negd
   %cmp1 = icmp sge i32 %a, %c
   %or.cond = and i1 %cmp, %cmp1
@@ -407,7 +405,7 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-GI-NEXT:    csel w0, w1, w0, lo
 ; CHECK-GI-NEXT:    ret
   %dor = or i32 %d, 1
-  %negd = sub i32 0, %dor
+  %negd = sub nsw i32 0, %dor
   %cmp = icmp ult i32 %b, %negd
   %cmp1 = icmp sgt i32 %a, %c
   %or.cond = and i1 %cmp, %cmp1
diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -261,7 +261,7 @@ define i32 @or_neg(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cmn w8, w1
 ; CHECK-NEXT:    cset w0, lt
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
@@ -275,7 +275,7 @@ define i32 @or_neg_ugt(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg_ugt:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cmn w8, w1
 ; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
@@ -319,7 +319,7 @@ define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg_no_smin_but_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bic w8, w0, w0, asr #31
-; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cmn w8, w1
 ; CHECK-NEXT:    cset w0, lt
 ; CHECK-NEXT:    ret
   %3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -350,7 +350,7 @@ define i32 @or_neg2(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cmn w8, w1
 ; CHECK-NEXT:    cset w0, le
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
@@ -364,7 +364,7 @@ define i32 @or_neg3(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cmn w8, w1
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
@@ -378,7 +378,7 @@ define i32 @or_neg4(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cmn w8, w1
 ; CHECK-NEXT:    cset w0, ge
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
@@ -392,7 +392,7 @@ define i32 @or_neg_ult(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg_ult:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cmn w8, w1
 ; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
@@ -434,7 +434,7 @@ define i32 @or_neg_no_smin_but_zero2(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg_no_smin_but_zero2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bic w8, w0, w0, asr #31
-; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cmn w8, w1
 ; CHECK-NEXT:    cset w0, ge
 ; CHECK-NEXT:    ret
   %3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
diff --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll