Skip to content

Commit adcad6a

Browse files
authored
[AArch64] Remove UnsafeFPMath (#150876)
We should always use fast math flags, remove these global flags incrementally. See also https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast/80797
1 parent 769b0e6 commit adcad6a

11 files changed

+640
-492
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11325,7 +11325,7 @@ static SDValue emitFloatCompareMask(SDValue LHS, SDValue RHS, SDValue TVal,
1132511325

1132611326
SDValue AArch64TargetLowering::LowerSELECT_CC(
1132711327
ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal,
11328-
iterator_range<SDNode::user_iterator> Users, bool HasNoNaNs,
11328+
iterator_range<SDNode::user_iterator> Users, SDNodeFlags Flags,
1132911329
const SDLoc &DL, SelectionDAG &DAG) const {
1133011330
// Handle f128 first, because it will result in a comparison of some RTLIB
1133111331
// call result against zero.
@@ -11523,7 +11523,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
1152311523
return true;
1152411524
}
1152511525
})) {
11526-
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || HasNoNaNs;
11526+
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Flags.hasNoNaNs();
1152711527
SDValue VectorCmp =
1152811528
emitFloatCompareMask(LHS, RHS, TVal, FVal, CC, NoNaNs, DL, DAG);
1152911529
if (VectorCmp)
@@ -11537,7 +11537,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
1153711537
AArch64CC::CondCode CC1, CC2;
1153811538
changeFPCCToAArch64CC(CC, CC1, CC2);
1153911539

11540-
if (DAG.getTarget().Options.UnsafeFPMath) {
11540+
if (Flags.hasNoSignedZeros()) {
1154111541
// Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
1154211542
// "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
1154311543
ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
@@ -11616,18 +11616,16 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
1161611616
SDValue RHS = Op.getOperand(1);
1161711617
SDValue TVal = Op.getOperand(2);
1161811618
SDValue FVal = Op.getOperand(3);
11619-
bool HasNoNans = Op->getFlags().hasNoNaNs();
11619+
SDNodeFlags Flags = Op->getFlags();
1162011620
SDLoc DL(Op);
11621-
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), HasNoNans, DL,
11622-
DAG);
11621+
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), Flags, DL, DAG);
1162311622
}
1162411623

1162511624
SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
1162611625
SelectionDAG &DAG) const {
1162711626
SDValue CCVal = Op->getOperand(0);
1162811627
SDValue TVal = Op->getOperand(1);
1162911628
SDValue FVal = Op->getOperand(2);
11630-
bool HasNoNans = Op->getFlags().hasNoNaNs();
1163111629
SDLoc DL(Op);
1163211630

1163311631
EVT Ty = Op.getValueType();
@@ -11694,8 +11692,8 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
1169411692
DAG.getUNDEF(MVT::f32), FVal);
1169511693
}
1169611694

11697-
SDValue Res =
11698-
LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), HasNoNans, DL, DAG);
11695+
SDValue Res = LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(),
11696+
Op->getFlags(), DL, DAG);
1169911697

1170011698
if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
1170111699
return DAG.getTargetExtractSubreg(AArch64::hsub, DL, Ty, Res);
@@ -12292,7 +12290,9 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
1229212290
SDLoc DL(Operand);
1229312291
EVT VT = Operand.getValueType();
1229412292

12295-
SDNodeFlags Flags = SDNodeFlags::AllowReassociation;
12293+
// Ensure nodes can be recognized by isAssociativeAndCommutative.
12294+
SDNodeFlags Flags =
12295+
SDNodeFlags::AllowReassociation | SDNodeFlags::NoSignedZeros;
1229612296

1229712297
// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
1229812298
// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
@@ -16674,7 +16674,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
1667416674
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
1667516675
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
1667616676
(Options.AllowFPOpFusion == FPOpFusion::Fast ||
16677-
Options.UnsafeFPMath));
16677+
I->getFastMathFlags().allowContract()));
1667816678
}
1667916679

1668016680
// All 32-bit GPR operations implicitly zero the high-half of the corresponding

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ class AArch64TargetLowering : public TargetLowering {
662662
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
663663
SDValue TVal, SDValue FVal,
664664
iterator_range<SDNode::user_iterator> Users,
665-
bool HasNoNans, const SDLoc &dl,
665+
SDNodeFlags Flags, const SDLoc &dl,
666666
SelectionDAG &DAG) const;
667667
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
668668
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6574,10 +6574,8 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
65746574
TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
65756575
// We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
65766576
// the target options or if FADD/FSUB has the contract fast-math flag.
6577-
return Options.UnsafeFPMath ||
6578-
Options.AllowFPOpFusion == FPOpFusion::Fast ||
6577+
return Options.AllowFPOpFusion == FPOpFusion::Fast ||
65796578
Inst.getFlag(MachineInstr::FmContract);
6580-
return true;
65816579
}
65826580
return false;
65836581
}
@@ -6680,9 +6678,8 @@ bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
66806678
case AArch64::FMUL_ZZZ_H:
66816679
case AArch64::FMUL_ZZZ_S:
66826680
case AArch64::FMUL_ZZZ_D:
6683-
return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
6684-
(Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
6685-
Inst.getFlag(MachineInstr::MIFlag::FmNsz));
6681+
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
6682+
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
66866683

66876684
// == Integer types ==
66886685
// -- Base instructions --

llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
2-
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
3-
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
4-
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
5-
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
1+
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
2+
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
3+
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
4+
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
5+
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
66
#
77
name: f1_2s
88
registers:
@@ -16,18 +16,18 @@ body: |
1616
%2:fpr64 = COPY $d2
1717
%1:fpr64 = COPY $d1
1818
%0:fpr64 = COPY $d0
19-
%3:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
20-
%4:fpr64 = FSUBv2f32 killed %3, %2, implicit $fpcr
19+
%3:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
20+
%4:fpr64 = contract FSUBv2f32 killed %3, %2, implicit $fpcr
2121
$d0 = COPY %4
2222
RET_ReallyLR implicit $d0
2323
2424
...
2525
# UNPROFITABLE-LABEL: name: f1_2s
26-
# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2
26+
# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2
2727
# UNPROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr
2828
#
2929
# PROFITABLE-LABEL: name: f1_2s
30-
# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2
30+
# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2
3131
# PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr
3232
---
3333
name: f1_4s
@@ -42,18 +42,18 @@ body: |
4242
%2:fpr128 = COPY $q2
4343
%1:fpr128 = COPY $q1
4444
%0:fpr128 = COPY $q0
45-
%3:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
46-
%4:fpr128 = FSUBv4f32 killed %3, %2, implicit $fpcr
45+
%3:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
46+
%4:fpr128 = contract FSUBv4f32 killed %3, %2, implicit $fpcr
4747
$q0 = COPY %4
4848
RET_ReallyLR implicit $q0
4949
5050
...
5151
# UNPROFITABLE-LABEL: name: f1_4s
52-
# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
52+
# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
5353
# UNPROFITABLE-NEXT: FSUBv4f32 killed [[R1]], %2, implicit $fpcr
5454
#
5555
# PROFITABLE-LABEL: name: f1_4s
56-
# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv4f32 %2
56+
# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv4f32 %2
5757
# PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1, implicit $fpcr
5858
---
5959
name: f1_2d
@@ -68,18 +68,18 @@ body: |
6868
%2:fpr128 = COPY $q2
6969
%1:fpr128 = COPY $q1
7070
%0:fpr128 = COPY $q0
71-
%3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
72-
%4:fpr128 = FSUBv2f64 killed %3, %2, implicit $fpcr
71+
%3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
72+
%4:fpr128 = contract FSUBv2f64 killed %3, %2, implicit $fpcr
7373
$q0 = COPY %4
7474
RET_ReallyLR implicit $q0
7575
7676
...
7777
# UNPROFITABLE-LABEL: name: f1_2d
78-
# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
78+
# UNPROFITABLE: %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
7979
# UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2, implicit $fpcr
8080
#
8181
# PROFITABLE-LABEL: name: f1_2d
82-
# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv2f64 %2
82+
# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv2f64 %2
8383
# PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1, implicit $fpcr
8484
---
8585
name: f1_both_fmul_2s
@@ -97,15 +97,15 @@ body: |
9797
%2:fpr64 = COPY $q2
9898
%1:fpr64 = COPY $q1
9999
%0:fpr64 = COPY $q0
100-
%4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
101-
%5:fpr64 = FMULv2f32 %2, %3, implicit $fpcr
102-
%6:fpr64 = FSUBv2f32 killed %4, %5, implicit $fpcr
100+
%4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
101+
%5:fpr64 = contract FMULv2f32 %2, %3, implicit $fpcr
102+
%6:fpr64 = contract FSUBv2f32 killed %4, %5, implicit $fpcr
103103
$q0 = COPY %6
104104
RET_ReallyLR implicit $q0
105105
106106
...
107107
# ALL-LABEL: name: f1_both_fmul_2s
108-
# ALL: %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
108+
# ALL: %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
109109
# ALL-NEXT: FMLSv2f32 killed %4, %2, %3, implicit $fpcr
110110
---
111111
name: f1_both_fmul_4s
@@ -123,15 +123,15 @@ body: |
123123
%2:fpr128 = COPY $q2
124124
%1:fpr128 = COPY $q1
125125
%0:fpr128 = COPY $q0
126-
%4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
127-
%5:fpr128 = FMULv4f32 %2, %3, implicit $fpcr
128-
%6:fpr128 = FSUBv4f32 killed %4, %5, implicit $fpcr
126+
%4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
127+
%5:fpr128 = contract FMULv4f32 %2, %3, implicit $fpcr
128+
%6:fpr128 = contract FSUBv4f32 killed %4, %5, implicit $fpcr
129129
$q0 = COPY %6
130130
RET_ReallyLR implicit $q0
131131
132132
...
133133
# ALL-LABEL: name: f1_both_fmul_4s
134-
# ALL: %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
134+
# ALL: %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
135135
# ALL-NEXT: FMLSv4f32 killed %4, %2, %3, implicit $fpcr
136136
---
137137
name: f1_both_fmul_2d
@@ -149,14 +149,14 @@ body: |
149149
%2:fpr128 = COPY $q2
150150
%1:fpr128 = COPY $q1
151151
%0:fpr128 = COPY $q0
152-
%4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
153-
%5:fpr128 = FMULv2f64 %2, %3, implicit $fpcr
154-
%6:fpr128 = FSUBv2f64 killed %4, %5, implicit $fpcr
152+
%4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
153+
%5:fpr128 = contract FMULv2f64 %2, %3, implicit $fpcr
154+
%6:fpr128 = contract FSUBv2f64 killed %4, %5, implicit $fpcr
155155
$q0 = COPY %6
156156
RET_ReallyLR implicit $q0
157157
158158
...
159159
# ALL-LABEL: name: f1_both_fmul_2d
160-
# ALL: %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
160+
# ALL: %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
161161
# ALL-NEXT: FMLSv2f64 killed %4, %2, %3, implicit $fpcr
162162

llvm/test/CodeGen/AArch64/arm64-fml-combines.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -enable-unsafe-fp-math -mattr=+fullfp16 | FileCheck %s
1+
; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -mattr=+fullfp16 | FileCheck %s
22
; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -fp-contract=fast -mattr=+fullfp16 | FileCheck %s
33

44
define void @foo_2d(ptr %src) {
@@ -130,29 +130,29 @@ for.end: ; preds = %for.body
130130
; CHECK: fnmadd h0, h0, h1, h2
131131
define half @test0(half %a, half %b, half %c) {
132132
entry:
133-
%0 = fmul half %a, %b
134-
%mul = fsub half -0.000000e+00, %0
135-
%sub1 = fsub half %mul, %c
133+
%0 = fmul contract half %a, %b
134+
%mul = fsub contract half -0.000000e+00, %0
135+
%sub1 = fsub contract half %mul, %c
136136
ret half %sub1
137137
}
138138

139139
; CHECK-LABEL: test1:
140140
; CHECK: fnmadd s0, s0, s1, s2
141141
define float @test1(float %a, float %b, float %c) {
142142
entry:
143-
%0 = fmul float %a, %b
144-
%mul = fsub float -0.000000e+00, %0
145-
%sub1 = fsub float %mul, %c
143+
%0 = fmul contract float %a, %b
144+
%mul = fsub contract float -0.000000e+00, %0
145+
%sub1 = fsub contract float %mul, %c
146146
ret float %sub1
147147
}
148148

149149
; CHECK-LABEL: test2:
150150
; CHECK: fnmadd d0, d0, d1, d2
151151
define double @test2(double %a, double %b, double %c) {
152152
entry:
153-
%0 = fmul double %a, %b
154-
%mul = fsub double -0.000000e+00, %0
155-
%sub1 = fsub double %mul, %c
153+
%0 = fmul contract double %a, %b
154+
%mul = fsub contract double -0.000000e+00, %0
155+
%sub1 = fsub contract double %mul, %c
156156
ret double %sub1
157157
}
158158

llvm/test/CodeGen/AArch64/fcsel-zero.ll

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@
22

33
; RUN: llc -mtriple=aarch64-linux-gnu -o - < %s | FileCheck %s
44

5-
define float @foeq(float %a, float %b) #0 {
6-
%t = fcmp oeq float %a, 0.0
5+
define float @foeq(float %a, float %b) {
6+
%t = fcmp nsz oeq float %a, 0.0
77
%v = select i1 %t, float 0.0, float %b
88
ret float %v
99
; CHECK-LABEL: foeq
1010
; CHECK: fcmp [[R:s[0-9]+]], #0.0
1111
; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq
1212
}
1313

14-
define float @fueq(float %a, float %b) #0 {
15-
%t = fcmp ueq float %a, 0.0
14+
define float @fueq(float %a, float %b) {
15+
%t = fcmp nsz ueq float %a, 0.0
1616
%v = select i1 %t, float 0.0, float %b
1717
ret float %v
1818
; CHECK-LABEL: fueq
@@ -21,8 +21,8 @@ define float @fueq(float %a, float %b) #0 {
2121
; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, vs
2222
}
2323

24-
define float @fone(float %a, float %b) #0 {
25-
%t = fcmp one float %a, 0.0
24+
define float @fone(float %a, float %b) {
25+
%t = fcmp nsz one float %a, 0.0
2626
%v = select i1 %t, float %b, float 0.0
2727
ret float %v
2828
; CHECK-LABEL: fone
@@ -31,26 +31,26 @@ define float @fone(float %a, float %b) #0 {
3131
; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], gt
3232
}
3333

34-
define float @fune(float %a, float %b) #0 {
35-
%t = fcmp une float %a, 0.0
34+
define float @fune(float %a, float %b) {
35+
%t = fcmp nsz une float %a, 0.0
3636
%v = select i1 %t, float %b, float 0.0
3737
ret float %v
3838
; CHECK-LABEL: fune
3939
; CHECK: fcmp [[R:s[0-9]+]], #0.0
4040
; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], ne
4141
}
4242

43-
define double @doeq(double %a, double %b) #0 {
44-
%t = fcmp oeq double %a, 0.0
43+
define double @doeq(double %a, double %b) {
44+
%t = fcmp nsz oeq double %a, 0.0
4545
%v = select i1 %t, double 0.0, double %b
4646
ret double %v
4747
; CHECK-LABEL: doeq
4848
; CHECK: fcmp [[R:d[0-9]+]], #0.0
4949
; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq
5050
}
5151

52-
define double @dueq(double %a, double %b) #0 {
53-
%t = fcmp ueq double %a, 0.0
52+
define double @dueq(double %a, double %b) {
53+
%t = fcmp nsz ueq double %a, 0.0
5454
%v = select i1 %t, double 0.0, double %b
5555
ret double %v
5656
; CHECK-LABEL: dueq
@@ -59,8 +59,8 @@ define double @dueq(double %a, double %b) #0 {
5959
; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, vs
6060
}
6161

62-
define double @done(double %a, double %b) #0 {
63-
%t = fcmp one double %a, 0.0
62+
define double @done(double %a, double %b) {
63+
%t = fcmp nsz one double %a, 0.0
6464
%v = select i1 %t, double %b, double 0.0
6565
ret double %v
6666
; CHECK-LABEL: done
@@ -69,14 +69,11 @@ define double @done(double %a, double %b) #0 {
6969
; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], gt
7070
}
7171

72-
define double @dune(double %a, double %b) #0 {
73-
%t = fcmp une double %a, 0.0
72+
define double @dune(double %a, double %b) {
73+
%t = fcmp nsz une double %a, 0.0
7474
%v = select i1 %t, double %b, double 0.0
7575
ret double %v
7676
; CHECK-LABEL: dune
7777
; CHECK: fcmp [[R:d[0-9]+]], #0.0
7878
; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], ne
7979
}
80-
81-
attributes #0 = { nounwind "unsafe-fp-math"="true" }
82-

0 commit comments

Comments
 (0)