[PowerPC] use inst-level fast-math-flags to drive MachineCombiner

Chen Zheng · Chen Zheng · commit 45d92806eaf8 · 2020-04-28T03:31:12.000-04:00
Currently, on PowerPC target, it uses function scope UnsafeFPMath option to drive Machine Combiner pass. This is not accurate in two ways: 1: the scope is not accurate. Machine Combiner pass only requires instruction-level flags instead of the function scope. 2: the float point flag is not accurate. Machine Combiner pass only requires float point flags reassoc and nsz. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D78183
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -225,6 +225,26 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   return Latency;
 }
 
+/// This is an architecture-specific helper function of reassociateOps.
+/// Set special operand attributes for new instructions after reassociation.
+void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
+                                         MachineInstr &OldMI2,
+                                         MachineInstr &NewMI1,
+                                         MachineInstr &NewMI2) const {
+  // Propagate FP flags from the original instructions.
+  // But clear poison-generating flags because those may not be valid now.
+  uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+  NewMI1.setFlags(IntersectedFlags);
+  NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
+  NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
+  NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
+
+  NewMI2.setFlags(IntersectedFlags);
+  NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
+  NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
+  NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
+}
+
 // This function does not list all associative and commutative operations, but
 // only those worth feeding through the machine combiner in an attempt to
 // reduce the critical path. Mostly, this means floating-point operations,
@@ -258,7 +278,8 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   case PPC::QVFMUL:
   case PPC::QVFMULS:
   case PPC::QVFMULSs:
-    return true;
+    return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+           Inst.getFlag(MachineInstr::MIFlag::FmNsz);
   default:
     return false;
   }
@@ -272,10 +293,6 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
   if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
     return false;
 
-  // FP reassociation is only legal when we don't need strict IEEE semantics.
-  if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
-    return false;
-
   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -248,6 +248,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
 
   bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
 
+  void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+                             MachineInstr &NewMI1,
+                             MachineInstr &NewMI2) const override;
+
   bool isCoalescableExtInstr(const MachineInstr &MI,
                              Register &SrcReg, Register &DstReg,
                              unsigned &SubIdx) const override;
diff --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck  %s -check-prefix=CHECK -check-prefix=CHECK-PWR
-; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck  %s -check-prefix=CHECK -check-prefix=CHECK-QPX
+; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck  %s -check-prefix=CHECK -check-prefix=CHECK-PWR
+; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck  %s -check-prefix=CHECK -check-prefix=CHECK-QPX
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -14,9 +14,9 @@ define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
 ; CHECK:       fadds 1, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
 
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %t1, %x3
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %t0, %x2
+  %t2 = fadd reassoc nsz float %t1, %x3
   ret float %t2
 }
 
@@ -28,9 +28,9 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
 ; CHECK:       fadds 1, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
 
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %x2, %t0
-  %t2 = fadd float %t1, %x3
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %x2, %t0
+  %t2 = fadd reassoc nsz float %t1, %x3
   ret float %t2
 }
 
@@ -42,9 +42,9 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
 ; CHECK:       fadds 1, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
 
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %x3, %t1
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %t0, %x2
+  %t2 = fadd reassoc nsz float %x3, %t1
   ret float %t2
 }
 
@@ -56,9 +56,9 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
 ; CHECK:       fadds 1, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
 
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %x2, %t0
-  %t2 = fadd float %x3, %t1
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %x2, %t0
+  %t2 = fadd reassoc nsz float %x3, %t1
   ret float %t2
 }
 
@@ -77,13 +77,13 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
 ; CHECK:       fadds 1, [[REG2]], 8
 ; CHECK-NEXT:    blr
 
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %t1, %x3
-  %t3 = fadd float %t2, %x4
-  %t4 = fadd float %t3, %x5
-  %t5 = fadd float %t4, %x6
-  %t6 = fadd float %t5, %x7
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %t0, %x2
+  %t2 = fadd reassoc nsz float %t1, %x3
+  %t3 = fadd reassoc nsz float %t2, %x4
+  %t4 = fadd reassoc nsz float %t3, %x5
+  %t5 = fadd reassoc nsz float %t4, %x6
+  %t6 = fadd reassoc nsz float %t5, %x7
   ret float %t6
 }
 
@@ -100,9 +100,9 @@ define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
 
-  %t0 = fadd <4 x float> %x0, %x1
-  %t1 = fadd <4 x float> %t0, %x2
-  %t2 = fadd <4 x float> %t1, %x3
+  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+  %t1 = fadd reassoc nsz <4 x float> %t0, %x2
+  %t2 = fadd reassoc nsz <4 x float> %t1, %x3
   ret <4 x float> %t2
 }
 
@@ -117,9 +117,9 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
 
-  %t0 = fadd <4 x float> %x0, %x1
-  %t1 = fadd <4 x float> %x2, %t0
-  %t2 = fadd <4 x float> %t1, %x3
+  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+  %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+  %t2 = fadd reassoc nsz <4 x float> %t1, %x3
   ret <4 x float> %t2
 }
 
@@ -134,9 +134,9 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
 
-  %t0 = fadd <4 x float> %x0, %x1
-  %t1 = fadd <4 x float> %t0, %x2
-  %t2 = fadd <4 x float> %x3, %t1
+  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+  %t1 = fadd reassoc nsz <4 x float> %t0, %x2
+  %t2 = fadd reassoc nsz <4 x float> %x3, %t1
   ret <4 x float> %t2
 }
 
@@ -151,9 +151,9 @@ define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
 
-  %t0 = fadd <4 x float> %x0, %x1
-  %t1 = fadd <4 x float> %x2, %t0
-  %t2 = fadd <4 x float> %x3, %t1
+  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+  %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+  %t2 = fadd reassoc nsz <4 x float> %x3, %t1
   ret <4 x float> %t2
 }