Skip to content

Commit 45d9280

Browse files
author
Chen Zheng
committed
[PowerPC] use inst-level fast-math-flags to drive MachineCombiner
Currently, on PowerPC target, it uses function scope UnsafeFPMath option to drive Machine Combiner pass. This is not accurate in two ways: 1: the scope is not accurate. Machine Combiner pass only requires instruction-level flags instead of the function scope. 2: the float point flag is not accurate. Machine Combiner pass only requires float point flags reassoc and nsz. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D78183
1 parent b73290b commit 45d9280

File tree

3 files changed

+59
-38
lines changed

3 files changed

+59
-38
lines changed

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,26 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
225225
return Latency;
226226
}
227227

228+
/// This is an architecture-specific helper function of reassociateOps.
229+
/// Set special operand attributes for new instructions after reassociation.
230+
void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
231+
MachineInstr &OldMI2,
232+
MachineInstr &NewMI1,
233+
MachineInstr &NewMI2) const {
234+
// Propagate FP flags from the original instructions.
235+
// But clear poison-generating flags because those may not be valid now.
236+
uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
237+
NewMI1.setFlags(IntersectedFlags);
238+
NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
239+
NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
240+
NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
241+
242+
NewMI2.setFlags(IntersectedFlags);
243+
NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
244+
NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
245+
NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
246+
}
247+
228248
// This function does not list all associative and commutative operations, but
229249
// only those worth feeding through the machine combiner in an attempt to
230250
// reduce the critical path. Mostly, this means floating-point operations,
@@ -258,7 +278,8 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
258278
case PPC::QVFMUL:
259279
case PPC::QVFMULS:
260280
case PPC::QVFMULSs:
261-
return true;
281+
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
282+
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
262283
default:
263284
return false;
264285
}
@@ -272,10 +293,6 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
272293
if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
273294
return false;
274295

275-
// FP reassociation is only legal when we don't need strict IEEE semantics.
276-
if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
277-
return false;
278-
279296
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
280297
}
281298

llvm/lib/Target/PowerPC/PPCInstrInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
248248

249249
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
250250

251+
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
252+
MachineInstr &NewMI1,
253+
MachineInstr &NewMI2) const override;
254+
251255
bool isCoalescableExtInstr(const MachineInstr &MI,
252256
Register &SrcReg, Register &DstReg,
253257
unsigned &SubIdx) const override;

llvm/test/CodeGen/PowerPC/machine-combiner.ll

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
2-
; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
1+
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
2+
; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
33
target datalayout = "E-m:e-i64:64-n32:64"
44
target triple = "powerpc64-unknown-linux-gnu"
55

@@ -14,9 +14,9 @@ define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
1414
; CHECK: fadds 1, [[REG0]], [[REG1]]
1515
; CHECK-NEXT: blr
1616

17-
%t0 = fadd float %x0, %x1
18-
%t1 = fadd float %t0, %x2
19-
%t2 = fadd float %t1, %x3
17+
%t0 = fadd reassoc nsz float %x0, %x1
18+
%t1 = fadd reassoc nsz float %t0, %x2
19+
%t2 = fadd reassoc nsz float %t1, %x3
2020
ret float %t2
2121
}
2222

@@ -28,9 +28,9 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
2828
; CHECK: fadds 1, [[REG0]], [[REG1]]
2929
; CHECK-NEXT: blr
3030

31-
%t0 = fadd float %x0, %x1
32-
%t1 = fadd float %x2, %t0
33-
%t2 = fadd float %t1, %x3
31+
%t0 = fadd reassoc nsz float %x0, %x1
32+
%t1 = fadd reassoc nsz float %x2, %t0
33+
%t2 = fadd reassoc nsz float %t1, %x3
3434
ret float %t2
3535
}
3636

@@ -42,9 +42,9 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
4242
; CHECK: fadds 1, [[REG0]], [[REG1]]
4343
; CHECK-NEXT: blr
4444

45-
%t0 = fadd float %x0, %x1
46-
%t1 = fadd float %t0, %x2
47-
%t2 = fadd float %x3, %t1
45+
%t0 = fadd reassoc nsz float %x0, %x1
46+
%t1 = fadd reassoc nsz float %t0, %x2
47+
%t2 = fadd reassoc nsz float %x3, %t1
4848
ret float %t2
4949
}
5050

@@ -56,9 +56,9 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
5656
; CHECK: fadds 1, [[REG0]], [[REG1]]
5757
; CHECK-NEXT: blr
5858

59-
%t0 = fadd float %x0, %x1
60-
%t1 = fadd float %x2, %t0
61-
%t2 = fadd float %x3, %t1
59+
%t0 = fadd reassoc nsz float %x0, %x1
60+
%t1 = fadd reassoc nsz float %x2, %t0
61+
%t2 = fadd reassoc nsz float %x3, %t1
6262
ret float %t2
6363
}
6464

@@ -77,13 +77,13 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
7777
; CHECK: fadds 1, [[REG2]], 8
7878
; CHECK-NEXT: blr
7979

80-
%t0 = fadd float %x0, %x1
81-
%t1 = fadd float %t0, %x2
82-
%t2 = fadd float %t1, %x3
83-
%t3 = fadd float %t2, %x4
84-
%t4 = fadd float %t3, %x5
85-
%t5 = fadd float %t4, %x6
86-
%t6 = fadd float %t5, %x7
80+
%t0 = fadd reassoc nsz float %x0, %x1
81+
%t1 = fadd reassoc nsz float %t0, %x2
82+
%t2 = fadd reassoc nsz float %t1, %x3
83+
%t3 = fadd reassoc nsz float %t2, %x4
84+
%t4 = fadd reassoc nsz float %t3, %x5
85+
%t5 = fadd reassoc nsz float %t4, %x6
86+
%t6 = fadd reassoc nsz float %t5, %x7
8787
ret float %t6
8888
}
8989

@@ -100,9 +100,9 @@ define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <
100100
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
101101
; CHECK-NEXT: blr
102102

103-
%t0 = fadd <4 x float> %x0, %x1
104-
%t1 = fadd <4 x float> %t0, %x2
105-
%t2 = fadd <4 x float> %t1, %x3
103+
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
104+
%t1 = fadd reassoc nsz <4 x float> %t0, %x2
105+
%t2 = fadd reassoc nsz <4 x float> %t1, %x3
106106
ret <4 x float> %t2
107107
}
108108

@@ -117,9 +117,9 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
117117
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
118118
; CHECK-NEXT: blr
119119

120-
%t0 = fadd <4 x float> %x0, %x1
121-
%t1 = fadd <4 x float> %x2, %t0
122-
%t2 = fadd <4 x float> %t1, %x3
120+
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
121+
%t1 = fadd reassoc nsz <4 x float> %x2, %t0
122+
%t2 = fadd reassoc nsz <4 x float> %t1, %x3
123123
ret <4 x float> %t2
124124
}
125125

@@ -134,9 +134,9 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
134134
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
135135
; CHECK-NEXT: blr
136136

137-
%t0 = fadd <4 x float> %x0, %x1
138-
%t1 = fadd <4 x float> %t0, %x2
139-
%t2 = fadd <4 x float> %x3, %t1
137+
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
138+
%t1 = fadd reassoc nsz <4 x float> %t0, %x2
139+
%t2 = fadd reassoc nsz <4 x float> %x3, %t1
140140
ret <4 x float> %t2
141141
}
142142

@@ -151,9 +151,9 @@ define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <
151151
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
152152
; CHECK-NEXT: blr
153153

154-
%t0 = fadd <4 x float> %x0, %x1
155-
%t1 = fadd <4 x float> %x2, %t0
156-
%t2 = fadd <4 x float> %x3, %t1
154+
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
155+
%t1 = fadd reassoc nsz <4 x float> %x2, %t0
156+
%t2 = fadd reassoc nsz <4 x float> %x3, %t1
157157
ret <4 x float> %t2
158158
}
159159

0 commit comments

Comments
 (0)