-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[NVPTX] Remove UnsafeFPMath
uses
#151479
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[NVPTX] Remove UnsafeFPMath
uses
#151479
Conversation
@llvm/pr-subscribers-backend-nvptx Author: None (paperchalice) ChangesRemove Patch is 63.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151479.diff 13 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 95abcded46485..cf9758c80c5c2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -82,11 +82,6 @@ bool NVPTXDAGToDAGISel::allowFMA() const {
return TL->allowFMA(*MF, OptLevel);
}
-bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
- const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
- return TL->allowUnsafeFPMath(*MF);
-}
-
bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
/// Select - Select instructions not customized! Used for
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 9e0f88e544980..dd05c4df8a3ee 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -44,7 +44,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool usePrecSqrtF32(const SDNode *N) const;
bool useF32FTZ() const;
bool allowFMA() const;
- bool allowUnsafeFPMath() const;
bool doRsqrtOpt() const;
NVPTXScopes Scopes{};
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 4fd362303b6e5..16a5cf71f44cd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -125,10 +125,6 @@ NVPTXTargetLowering::getDivF32Level(const MachineFunction &MF,
if (UsePrecDivF32.getNumOccurrences() > 0)
return UsePrecDivF32;
- // Otherwise, use div.approx if fast math is enabled
- if (allowUnsafeFPMath(MF))
- return NVPTX::DivPrecisionLevel::Approx;
-
const SDNodeFlags Flags = N.getFlags();
if (Flags.hasApproximateFuncs())
return NVPTX::DivPrecisionLevel::Approx;
@@ -142,10 +138,6 @@ bool NVPTXTargetLowering::usePrecSqrtF32(const MachineFunction &MF,
if (UsePrecSqrtF32.getNumOccurrences() > 0)
return UsePrecSqrtF32;
- // Otherwise, use sqrt.approx if fast math is enabled
- if (allowUnsafeFPMath(MF))
- return false;
-
if (N) {
const SDNodeFlags Flags = N->getFlags();
if (Flags.hasApproximateFuncs())
@@ -2687,8 +2679,7 @@ static SDValue lowerROT(SDValue Op, SelectionDAG &DAG) {
SDLoc(Op), Opcode, DAG);
}
-static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
- bool AllowUnsafeFPMath) {
+static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG) {
// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
// i.e. "poor man's fmod()". When y is infinite, x is returned. This matches
// the semantics of LLVM's frem.
@@ -2705,7 +2696,7 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
Flags | SDNodeFlags::AllowContract);
- if (AllowUnsafeFPMath || Flags.hasNoInfs())
+ if (Flags.hasNoInfs())
return Sub;
// If Y is infinite, return X
@@ -2845,7 +2836,7 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ:
return lowerCTLZCTPOP(Op, DAG);
case ISD::FREM:
- return lowerFREM(Op, DAG, allowUnsafeFPMath(DAG.getMachineFunction()));
+ return lowerFREM(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
@@ -4718,17 +4709,7 @@ bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
return true;
- return allowUnsafeFPMath(MF);
-}
-
-bool NVPTXTargetLowering::allowUnsafeFPMath(const MachineFunction &MF) const {
- // Honor TargetOptions flags that explicitly say unsafe math is okay.
- if (MF.getTarget().Options.UnsafeFPMath)
- return true;
-
- // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
- const Function &F = MF.getFunction();
- return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+ return false;
}
static bool isConstZero(const SDValue &Operand) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index cf72a1e6db89c..71c15695b1988 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -215,7 +215,6 @@ class NVPTXTargetLowering : public TargetLowering {
unsigned combineRepeatedFPDivisors() const override { return 2; }
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
- bool allowUnsafeFPMath(const MachineFunction &MF) const;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT) const override {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6000b40694763..020d42f217f75 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1181,9 +1181,8 @@ defm FMA_F64 : FMA<F64RT, allow_ftz = false>;
// sin/cos/tanh
class UnaryOpAllowsApproxFn<SDPatternOperator operator>
- : PatFrag<(ops node:$A),
- (operator node:$A), [{
- return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
+ : PatFrag<(ops node:$A), (operator node:$A), [{
+ return N->getFlags().hasApproximateFuncs();
}]>;
def SIN_APPROX_f32 :
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
index 80627a03354a0..e1d4ef1073a78 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
-; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | FileCheck --check-prefixes=CHECK %s
+; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | %ptxas-verify -arch=sm_80 %}
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -22,7 +22,7 @@ define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 {
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r5, %r4, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
+ %r = call afn <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
ret <2 x bfloat> %r
}
@@ -41,7 +41,7 @@ define <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 {
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r5, %r4, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
+ %r = call afn <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
ret <2 x bfloat> %r
}
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 2b7e4184670c7..d4aec4f16f1ab 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -886,8 +886,8 @@ define half @test_sqrt(half %a) #0 {
; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
-define half @test_sin(half %a) #0 #1 {
- %r = call half @llvm.sin.f16(half %a)
+define half @test_sin(half %a) #0 {
+ %r = call afn half @llvm.sin.f16(half %a)
ret half %r
}
@@ -900,8 +900,8 @@ define half @test_sin(half %a) #0 #1 {
; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
-define half @test_cos(half %a) #0 #1 {
- %r = call half @llvm.cos.f16(half %a)
+define half @test_cos(half %a) #0 {
+ %r = call afn half @llvm.cos.f16(half %a)
ret half %r
}
@@ -1183,4 +1183,3 @@ define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
}
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index d4fcea320f3ad..991311f9492b9 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -1674,7 +1674,7 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 {
; ret <2 x half> %r
;}
-define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
+define <2 x half> @test_sin(<2 x half> %a) #0 {
; CHECK-LABEL: test_sin(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -1692,11 +1692,11 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
; CHECK-NEXT: mov.b32 %r6, {%rs4, %rs3};
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
- %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
+ %r = call afn <2 x half> @llvm.sin.f16(<2 x half> %a)
ret <2 x half> %r
}
-define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
+define <2 x half> @test_cos(<2 x half> %a) #0 {
; CHECK-LABEL: test_cos(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -1714,7 +1714,7 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
; CHECK-NEXT: mov.b32 %r6, {%rs4, %rs3};
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
- %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
+ %r = call afn <2 x half> @llvm.cos.f16(<2 x half> %a)
ret <2 x half> %r
}
@@ -2330,4 +2330,3 @@ define void @test_store_2xhalf(ptr %p1, ptr %p2, <2 x half> %v) {
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index b84a0ec7155e2..1a73b22e83be7 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -1627,7 +1627,7 @@ define <2 x float> @test_sqrt(<2 x float> %a) #0 {
; ret <2 x float> %r
;}
-define <2 x float> @test_sin(<2 x float> %a) #0 #1 {
+define <2 x float> @test_sin(<2 x float> %a) #0 {
; CHECK-LABEL: test_sin(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -1640,11 +1640,11 @@ define <2 x float> @test_sin(<2 x float> %a) #0 #1 {
; CHECK-NEXT: sin.approx.f32 %r4, %r1;
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3};
; CHECK-NEXT: ret;
- %r = call <2 x float> @llvm.sin(<2 x float> %a)
+ %r = call afn <2 x float> @llvm.sin(<2 x float> %a)
ret <2 x float> %r
}
-define <2 x float> @test_cos(<2 x float> %a) #0 #1 {
+define <2 x float> @test_cos(<2 x float> %a) #0 {
; CHECK-LABEL: test_cos(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -1657,7 +1657,7 @@ define <2 x float> @test_cos(<2 x float> %a) #0 #1 {
; CHECK-NEXT: cos.approx.f32 %r4, %r1;
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3};
; CHECK-NEXT: ret;
- %r = call <2 x float> @llvm.cos(<2 x float> %a)
+ %r = call afn <2 x float> @llvm.cos(<2 x float> %a)
ret <2 x float> %r
}
@@ -2146,5 +2146,4 @@ define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) {
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
attributes #2 = { "denormal-fp-math"="preserve-sign" }
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index 5eda3a1e2dda1..8561c60a46948 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -22,7 +22,7 @@ define float @sqrt_div(float %a, float %b) {
ret float %t2
}
-define float @sqrt_div_fast(float %a, float %b) #0 {
+define float @sqrt_div_fast(float %a, float %b) {
; CHECK-LABEL: sqrt_div_fast(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -34,29 +34,25 @@ define float @sqrt_div_fast(float %a, float %b) #0 {
; CHECK-NEXT: div.approx.f32 %r4, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
- %t1 = tail call float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t1 = tail call afn float @llvm.sqrt.f32(float %a)
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
-define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
+define float @sqrt_div_fast_ninf(float %a, float %b) {
; CHECK-LABEL: sqrt_div_fast_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [sqrt_div_fast_ninf_param_0];
; CHECK-NEXT: sqrt.approx.f32 %r2, %r1;
-; CHECK-NEXT: abs.f32 %r3, %r1;
-; CHECK-NEXT: setp.lt.f32 %p1, %r3, 0f00800000;
-; CHECK-NEXT: selp.f32 %r4, 0f00000000, %r2, %p1;
-; CHECK-NEXT: ld.param.b32 %r5, [sqrt_div_fast_ninf_param_1];
-; CHECK-NEXT: div.approx.f32 %r6, %r4, %r5;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT: ld.param.b32 %r3, [sqrt_div_fast_ninf_param_1];
+; CHECK-NEXT: div.approx.f32 %r4, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
@@ -77,7 +73,7 @@ define float @sqrt_div_ftz(float %a, float %b) #1 {
ret float %t2
}
-define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
+define float @sqrt_div_fast_ftz(float %a, float %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -89,35 +85,32 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
; CHECK-NEXT: div.approx.ftz.f32 %r4, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
- %t1 = tail call float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t1 = tail call afn float @llvm.sqrt.f32(float %a)
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
-define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
+define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b32 %r<6>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [sqrt_div_fast_ftz_ninf_param_0];
-; CHECK-NEXT: setp.eq.ftz.f32 %p1, %r1, 0f00000000;
; CHECK-NEXT: sqrt.approx.ftz.f32 %r2, %r1;
-; CHECK-NEXT: selp.f32 %r3, 0f00000000, %r2, %p1;
-; CHECK-NEXT: ld.param.b32 %r4, [sqrt_div_fast_ftz_ninf_param_1];
-; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: ld.param.b32 %r3, [sqrt_div_fast_ftz_ninf_param_1];
+; CHECK-NEXT: div.approx.ftz.f32 %r4, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
; There are no fast-math or ftz versions of sqrt and div for f64. We use
; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
-define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
+define double @sqrt_div_fast_ftz_f64(double %a, double %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_f64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<5>;
@@ -134,22 +127,17 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
ret double %t2
}
-define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
+define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [sqrt_div_fast_ftz_f64_ninf_param_0];
-; CHECK-NEXT: abs.f64 %rd2, %rd1;
-; CHECK-NEXT: setp.lt.f64 %p1, %rd2, 0d0010000000000000;
-; CHECK-NEXT: rsqrt.approx.f64 %rd3, %rd1;
-; CHECK-NEXT: rcp.approx.ftz.f64 %rd4, %rd3;
-; CHECK-NEXT: selp.f64 %rd5, 0d0000000000000000, %rd4, %p1;
-; CHECK-NEXT: ld.param.b64 %rd6, [sqrt_div_fast_ftz_f64_ninf_param_1];
-; CHECK-NEXT: div.rn.f64 %rd7, %rd5, %rd6;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
+; CHECK-NEXT: sqrt.rn.f64 %rd2, %rd1;
+; CHECK-NEXT: ld.param.b64 %rd3, [sqrt_div_fast_ftz_f64_ninf_param_1];
+; CHECK-NEXT: div.rn.f64 %rd4, %rd2, %rd3;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn double @llvm.sqrt.f64(double %a)
%t2 = fdiv double %t1, %b
@@ -172,7 +160,7 @@ define float @rsqrt(float %a) {
ret float %ret
}
-define float @rsqrt_fast(float %a) #0 {
+define float @rsqrt_fast(float %a) {
; CHECK-LABEL: rsqrt_fast(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -182,12 +170,12 @@ define float @rsqrt_fast(float %a) #0 {
; CHECK-NEXT: rsqrt.approx.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %b = tail call float @llvm.sqrt.f32(float %a)
- %ret = fdiv float 1.0, %b
+ %b = tail call afn float @llvm.sqrt.f32(float %a)
+ %ret = fdiv afn float 1.0, %b
ret float %ret
}
-define float @rsqrt_fast_ftz(float %a) #0 #1 {
+define float @rsqrt_fast_ftz(float %a) #1 {
; CHECK-LABEL: rsqrt_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -197,8 +185,8 @@ define float @rsqrt_fast_ftz(float %a) #0 #1 {
; CHECK-NEXT: rsqrt.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %b = tail call float @llvm.sqrt.f32(float %a)
- %ret = fdiv float 1.0, %b
+ %b = tail call afn float @llvm.sqrt.f32(float %a)
+ %ret = fdiv afn float 1.0, %b
ret float %ret
}
@@ -263,35 +251,7 @@ define float @fcos_approx_afn(float %a) {
ret float %r
}
-define float @fsin_approx(float %a) #0 {
-; CHECK-LABEL: fsin_approx(
-; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [fsin_approx_param_0];
-; CHECK-NEXT: sin.approx.f32 %r2, %r1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
- %r = tail call float @llvm.sin.f32(float %a)
- ret float %r
-}
-
-define float @fcos_approx(float %a) #0 {
-; CHECK-LABEL: fcos_approx(
-; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [fcos_approx_param_0];
-; CHECK-NEXT: cos.approx.f32 %r2, %r1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
- %r = tail call float @llvm.cos.f32(float %a)
- ret float %r
-}
-
-define float @fsin_approx_ftz(float %a) #0 #1 {
+define float @fsin_approx_ftz(float %a) #1 {
; CHECK-LABEL: fsin_approx_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -301,11 +261,11 @@ define float @fsin_approx_ftz(float %a) #0 #1 {
; CHECK-NEXT: sin.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %r = tail call float @llvm.sin.f32(float %a)
+ %r = tail call afn float @llvm.sin.f32(float %a)
ret float %r
}
-define float @fcos_approx_ftz(float %a) #0 #1 {
+define float @fcos_approx_ftz(float %a) #1 {
; CHECK-LABEL: fcos_approx_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -315,7 +275,7 @@ define float @fcos_approx_ftz(float %a) #0 #1 {
; CHECK-NEXT: cos.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %r = tail call float @llvm.cos.f32(float %a)
+ %r = tail call afn float @llvm.cos.f32(float %a)
ret float %r
}
@@ -423,7 +383,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f
ret float %w
}
-define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) {
; CHECK-LABEL: repeated_div_fast(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -444,14 +404,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0
; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
- %z = fmul float %x, %y
+ %x = fdiv afn arcp float %a, %divisor
+ %y = fdiv afn arcp contract float %b, %divisor
+ %z = fmul cont...
[truncated]
|
Remove
UnsafeFPMath
in NVPTX part, it blocks some bugfixes related to clang and the ultimate goal is to removeresetTargetOptions
method inTargetMachine
, see FIXME inresetTargetOptions
.See also https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast
https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract