From 9695804bc2c58a0fdc97ac31f61bb35a0ec1361d Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Thu, 31 Jul 2025 17:27:13 +0800 Subject: [PATCH 1/2] Remove UnsafeFPMath uses --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 5 ---- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 - llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 27 +++------------------ llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 1 - llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 5 ++-- 5 files changed, 6 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 95abcded46485..cf9758c80c5c2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -82,11 +82,6 @@ bool NVPTXDAGToDAGISel::allowFMA() const { return TL->allowFMA(*MF, OptLevel); } -bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const { - const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); - return TL->allowUnsafeFPMath(*MF); -} - bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; } /// Select - Select instructions not customized! Used for diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 9e0f88e544980..dd05c4df8a3ee 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -44,7 +44,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { bool usePrecSqrtF32(const SDNode *N) const; bool useF32FTZ() const; bool allowFMA() const; - bool allowUnsafeFPMath() const; bool doRsqrtOpt() const; NVPTXScopes Scopes{}; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 4fd362303b6e5..16a5cf71f44cd 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -125,10 +125,6 @@ NVPTXTargetLowering::getDivF32Level(const MachineFunction &MF, if (UsePrecDivF32.getNumOccurrences() > 0) return UsePrecDivF32; - // Otherwise, use div.approx if fast math is enabled - if (allowUnsafeFPMath(MF)) - return NVPTX::DivPrecisionLevel::Approx; - const SDNodeFlags Flags = N.getFlags(); if (Flags.hasApproximateFuncs()) return NVPTX::DivPrecisionLevel::Approx; @@ -142,10 +138,6 @@ bool NVPTXTargetLowering::usePrecSqrtF32(const MachineFunction &MF, if (UsePrecSqrtF32.getNumOccurrences() > 0) return UsePrecSqrtF32; - // Otherwise, use sqrt.approx if fast math is enabled - if (allowUnsafeFPMath(MF)) - return false; - if (N) { const SDNodeFlags Flags = N->getFlags(); if (Flags.hasApproximateFuncs()) @@ -2687,8 +2679,7 @@ static SDValue lowerROT(SDValue Op, SelectionDAG &DAG) { SDLoc(Op), Opcode, DAG); } -static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG, - bool AllowUnsafeFPMath) { +static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG) { // Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)), // i.e. "poor man's fmod()". When y is infinite, x is returned. This matches // the semantics of LLVM's frem. @@ -2705,7 +2696,7 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG, SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul, Flags | SDNodeFlags::AllowContract); - if (AllowUnsafeFPMath || Flags.hasNoInfs()) + if (Flags.hasNoInfs()) return Sub; // If Y is infinite, return X @@ -2845,7 +2836,7 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ: return lowerCTLZCTPOP(Op, DAG); case ISD::FREM: - return lowerFREM(Op, DAG, allowUnsafeFPMath(DAG.getMachineFunction())); + return lowerFREM(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); @@ -4718,17 +4709,7 @@ bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast) return true; - return allowUnsafeFPMath(MF); -} - -bool NVPTXTargetLowering::allowUnsafeFPMath(const MachineFunction &MF) const { - // Honor TargetOptions flags that explicitly say unsafe math is okay. - if (MF.getTarget().Options.UnsafeFPMath) - return true; - - // Allow unsafe math if unsafe-fp-math attribute explicitly says so. - const Function &F = MF.getFunction(); - return F.getFnAttribute("unsafe-fp-math").getValueAsBool(); + return false; } static bool isConstZero(const SDValue &Operand) { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index cf72a1e6db89c..71c15695b1988 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -215,7 +215,6 @@ class NVPTXTargetLowering : public TargetLowering { unsigned combineRepeatedFPDivisors() const override { return 2; } bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const; - bool allowUnsafeFPMath(const MachineFunction &MF) const; bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const override { diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6000b40694763..020d42f217f75 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1181,9 +1181,8 @@ defm FMA_F64 : FMA; // sin/cos/tanh class UnaryOpAllowsApproxFn - : PatFrag<(ops node:$A), - (operator node:$A), [{ - return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs(); + : PatFrag<(ops node:$A), (operator node:$A), [{ + return N->getFlags().hasApproximateFuncs(); }]>; def SIN_APPROX_f32 : From ecbaee77ad5d361cb17801cb9a6ee48c3737493c Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Fri, 1 Aug 2025 15:45:24 +0800 Subject: [PATCH 2/2] fix tests --- .../NVPTX/bf16x2-instructions-approx.ll | 8 +- llvm/test/CodeGen/NVPTX/f16-instructions.ll | 9 +- llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 9 +- llvm/test/CodeGen/NVPTX/f32x2-instructions.ll | 9 +- llvm/test/CodeGen/NVPTX/fast-math.ll | 151 ++--- .../CodeGen/NVPTX/fma-relu-fma-intrinsic.ll | 77 ++- llvm/test/CodeGen/NVPTX/frem.ll | 533 +++++++++--------- llvm/test/CodeGen/NVPTX/sqrt-approx.ll | 61 +- 8 files changed, 406 insertions(+), 451 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll index 80627a03354a0..e1d4ef1073a78 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s -; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %} +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | FileCheck --check-prefixes=CHECK %s +; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | %ptxas-verify -arch=sm_80 %} target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -22,7 +22,7 @@ define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 { ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r5, %r4, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; - %r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a) + %r = call afn <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a) ret <2 x bfloat> %r } @@ -41,7 +41,7 @@ define <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 { ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r5, %r4, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; - %r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a) + %r = call afn <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a) ret <2 x bfloat> %r } diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll index 2b7e4184670c7..d4aec4f16f1ab 100644 --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -886,8 +886,8 @@ define half @test_sqrt(half %a) #0 { ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; -define half @test_sin(half %a) #0 #1 { - %r = call half @llvm.sin.f16(half %a) +define half @test_sin(half %a) #0 { + %r = call afn half @llvm.sin.f16(half %a) ret half %r } @@ -900,8 +900,8 @@ define half @test_sin(half %a) #0 #1 { ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; -define half @test_cos(half %a) #0 #1 { - %r = call half @llvm.cos.f16(half %a) +define half @test_cos(half %a) #0 { + %r = call afn half @llvm.cos.f16(half %a) ret half %r } @@ -1183,4 +1183,3 @@ define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 { } attributes #0 = { nounwind } -attributes #1 = { "unsafe-fp-math" = "true" } diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index d4fcea320f3ad..991311f9492b9 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -1674,7 +1674,7 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 { ; ret <2 x half> %r ;} -define <2 x half> @test_sin(<2 x half> %a) #0 #1 { +define <2 x half> @test_sin(<2 x half> %a) #0 { ; CHECK-LABEL: test_sin( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<5>; @@ -1692,11 +1692,11 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 { ; CHECK-NEXT: mov.b32 %r6, {%rs4, %rs3}; ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; - %r = call <2 x half> @llvm.sin.f16(<2 x half> %a) + %r = call afn <2 x half> @llvm.sin.f16(<2 x half> %a) ret <2 x half> %r } -define <2 x half> @test_cos(<2 x half> %a) #0 #1 { +define <2 x half> @test_cos(<2 x half> %a) #0 { ; CHECK-LABEL: test_cos( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<5>; @@ -1714,7 +1714,7 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 { ; CHECK-NEXT: mov.b32 %r6, {%rs4, %rs3}; ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; - %r = call <2 x half> @llvm.cos.f16(<2 x half> %a) + %r = call afn <2 x half> @llvm.cos.f16(<2 x half> %a) ret <2 x half> %r } @@ -2330,4 +2330,3 @@ define void @test_store_2xhalf(ptr %p1, ptr %p2, <2 x half> %v) { attributes #0 = { nounwind } -attributes #1 = { "unsafe-fp-math" = "true" } diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll index b84a0ec7155e2..1a73b22e83be7 100644 --- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -1627,7 +1627,7 @@ define <2 x float> @test_sqrt(<2 x float> %a) #0 { ; ret <2 x float> %r ;} -define <2 x float> @test_sin(<2 x float> %a) #0 #1 { +define <2 x float> @test_sin(<2 x float> %a) #0 { ; CHECK-LABEL: test_sin( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; @@ -1640,11 +1640,11 @@ define <2 x float> @test_sin(<2 x float> %a) #0 #1 { ; CHECK-NEXT: sin.approx.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; ; CHECK-NEXT: ret; - %r = call <2 x float> @llvm.sin(<2 x float> %a) + %r = call afn <2 x float> @llvm.sin(<2 x float> %a) ret <2 x float> %r } -define <2 x float> @test_cos(<2 x float> %a) #0 #1 { +define <2 x float> @test_cos(<2 x float> %a) #0 { ; CHECK-LABEL: test_cos( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; @@ -1657,7 +1657,7 @@ define <2 x float> @test_cos(<2 x float> %a) #0 #1 { ; CHECK-NEXT: cos.approx.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; ; CHECK-NEXT: ret; - %r = call <2 x float> @llvm.cos(<2 x float> %a) + %r = call afn <2 x float> @llvm.cos(<2 x float> %a) ret <2 x float> %r } @@ -2146,5 +2146,4 @@ define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) { attributes #0 = { nounwind } -attributes #1 = { "unsafe-fp-math" = "true" } attributes #2 = { "denormal-fp-math"="preserve-sign" } diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll index 5eda3a1e2dda1..8561c60a46948 100644 --- a/llvm/test/CodeGen/NVPTX/fast-math.ll +++ b/llvm/test/CodeGen/NVPTX/fast-math.ll @@ -22,7 +22,7 @@ define float @sqrt_div(float %a, float %b) { ret float %t2 } -define float @sqrt_div_fast(float %a, float %b) #0 { +define float @sqrt_div_fast(float %a, float %b) { ; CHECK-LABEL: sqrt_div_fast( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; @@ -34,29 +34,25 @@ define float @sqrt_div_fast(float %a, float %b) #0 { ; CHECK-NEXT: div.approx.f32 %r4, %r2, %r3; ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; - %t1 = tail call float @llvm.sqrt.f32(float %a) - %t2 = fdiv float %t1, %b + %t1 = tail call afn float @llvm.sqrt.f32(float %a) + %t2 = fdiv afn float %t1, %b ret float %t2 } -define float @sqrt_div_fast_ninf(float %a, float %b) #0 { +define float @sqrt_div_fast_ninf(float %a, float %b) { ; CHECK-LABEL: sqrt_div_fast_ninf( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<2>; -; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [sqrt_div_fast_ninf_param_0]; ; CHECK-NEXT: sqrt.approx.f32 %r2, %r1; -; CHECK-NEXT: abs.f32 %r3, %r1; -; CHECK-NEXT: setp.lt.f32 %p1, %r3, 0f00800000; -; CHECK-NEXT: selp.f32 %r4, 0f00000000, %r2, %p1; -; CHECK-NEXT: ld.param.b32 %r5, [sqrt_div_fast_ninf_param_1]; -; CHECK-NEXT: div.approx.f32 %r6, %r4, %r5; -; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: ld.param.b32 %r3, [sqrt_div_fast_ninf_param_1]; +; CHECK-NEXT: div.approx.f32 %r4, %r2, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a) - %t2 = fdiv float %t1, %b + %t2 = fdiv afn float %t1, %b ret float %t2 } @@ -77,7 +73,7 @@ define float @sqrt_div_ftz(float %a, float %b) #1 { ret float %t2 } -define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 { +define float @sqrt_div_fast_ftz(float %a, float %b) #1 { ; CHECK-LABEL: sqrt_div_fast_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; @@ -89,35 +85,32 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 { ; CHECK-NEXT: div.approx.ftz.f32 %r4, %r2, %r3; ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; - %t1 = tail call float @llvm.sqrt.f32(float %a) - %t2 = fdiv float %t1, %b + %t1 = tail call afn float @llvm.sqrt.f32(float %a) + %t2 = fdiv afn float %t1, %b ret float %t2 } -define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 { +define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #1 { ; CHECK-LABEL: sqrt_div_fast_ftz_ninf( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<2>; -; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [sqrt_div_fast_ftz_ninf_param_0]; -; CHECK-NEXT: setp.eq.ftz.f32 %p1, %r1, 0f00000000; ; CHECK-NEXT: sqrt.approx.ftz.f32 %r2, %r1; -; CHECK-NEXT: selp.f32 %r3, 0f00000000, %r2, %p1; -; CHECK-NEXT: ld.param.b32 %r4, [sqrt_div_fast_ftz_ninf_param_1]; -; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4; -; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: ld.param.b32 %r3, [sqrt_div_fast_ftz_ninf_param_1]; +; CHECK-NEXT: div.approx.ftz.f32 %r4, %r2, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a) - %t2 = fdiv float %t1, %b + %t2 = fdiv afn float %t1, %b ret float %t2 } ; There are no fast-math or ftz versions of sqrt and div for f64. We use ; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide. -define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 { +define double @sqrt_div_fast_ftz_f64(double %a, double %b) #1 { ; CHECK-LABEL: sqrt_div_fast_ftz_f64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<5>; @@ -134,22 +127,17 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 { ret double %t2 } -define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 { +define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #1 { ; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<2>; -; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [sqrt_div_fast_ftz_f64_ninf_param_0]; -; CHECK-NEXT: abs.f64 %rd2, %rd1; -; CHECK-NEXT: setp.lt.f64 %p1, %rd2, 0d0010000000000000; -; CHECK-NEXT: rsqrt.approx.f64 %rd3, %rd1; -; CHECK-NEXT: rcp.approx.ftz.f64 %rd4, %rd3; -; CHECK-NEXT: selp.f64 %rd5, 0d0000000000000000, %rd4, %p1; -; CHECK-NEXT: ld.param.b64 %rd6, [sqrt_div_fast_ftz_f64_ninf_param_1]; -; CHECK-NEXT: div.rn.f64 %rd7, %rd5, %rd6; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; +; CHECK-NEXT: sqrt.rn.f64 %rd2, %rd1; +; CHECK-NEXT: ld.param.b64 %rd3, [sqrt_div_fast_ftz_f64_ninf_param_1]; +; CHECK-NEXT: div.rn.f64 %rd4, %rd2, %rd3; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %t1 = tail call ninf afn double @llvm.sqrt.f64(double %a) %t2 = fdiv double %t1, %b @@ -172,7 +160,7 @@ define float @rsqrt(float %a) { ret float %ret } -define float @rsqrt_fast(float %a) #0 { +define float @rsqrt_fast(float %a) { ; CHECK-LABEL: rsqrt_fast( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -182,12 +170,12 @@ define float @rsqrt_fast(float %a) #0 { ; CHECK-NEXT: rsqrt.approx.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; - %b = tail call float @llvm.sqrt.f32(float %a) - %ret = fdiv float 1.0, %b + %b = tail call afn float @llvm.sqrt.f32(float %a) + %ret = fdiv afn float 1.0, %b ret float %ret } -define float @rsqrt_fast_ftz(float %a) #0 #1 { +define float @rsqrt_fast_ftz(float %a) #1 { ; CHECK-LABEL: rsqrt_fast_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -197,8 +185,8 @@ define float @rsqrt_fast_ftz(float %a) #0 #1 { ; CHECK-NEXT: rsqrt.approx.ftz.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; - %b = tail call float @llvm.sqrt.f32(float %a) - %ret = fdiv float 1.0, %b + %b = tail call afn float @llvm.sqrt.f32(float %a) + %ret = fdiv afn float 1.0, %b ret float %ret } @@ -263,35 +251,7 @@ define float @fcos_approx_afn(float %a) { ret float %r } -define float @fsin_approx(float %a) #0 { -; CHECK-LABEL: fsin_approx( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [fsin_approx_param_0]; -; CHECK-NEXT: sin.approx.f32 %r2, %r1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; -; CHECK-NEXT: ret; - %r = tail call float @llvm.sin.f32(float %a) - ret float %r -} - -define float @fcos_approx(float %a) #0 { -; CHECK-LABEL: fcos_approx( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [fcos_approx_param_0]; -; CHECK-NEXT: cos.approx.f32 %r2, %r1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; -; CHECK-NEXT: ret; - %r = tail call float @llvm.cos.f32(float %a) - ret float %r -} - -define float @fsin_approx_ftz(float %a) #0 #1 { +define float @fsin_approx_ftz(float %a) #1 { ; CHECK-LABEL: fsin_approx_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -301,11 +261,11 @@ define float @fsin_approx_ftz(float %a) #0 #1 { ; CHECK-NEXT: sin.approx.ftz.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; - %r = tail call float @llvm.sin.f32(float %a) + %r = tail call afn float @llvm.sin.f32(float %a) ret float %r } -define float @fcos_approx_ftz(float %a) #0 #1 { +define float @fcos_approx_ftz(float %a) #1 { ; CHECK-LABEL: fcos_approx_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -315,7 +275,7 @@ define float @fcos_approx_ftz(float %a) #0 #1 { ; CHECK-NEXT: cos.approx.ftz.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; - %r = tail call float @llvm.cos.f32(float %a) + %r = tail call afn float @llvm.cos.f32(float %a) ret float %r } @@ -423,7 +383,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f ret float %w } -define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 { +define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) { ; CHECK-LABEL: repeated_div_fast( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -444,14 +404,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 ; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; - %x = fdiv float %a, %divisor - %y = fdiv float %b, %divisor - %z = fmul float %x, %y + %x = fdiv afn arcp float %a, %divisor + %y = fdiv afn arcp contract float %b, %divisor + %z = fmul contract float %x, %y %w = select i1 %pred, float %z, float %y ret float %w } -define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) #0 { +define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) { ; CHECK-LABEL: repeated_div_fast_sel( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -469,13 +429,13 @@ define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor ; CHECK-NEXT: div.approx.f32 %r5, %r3, %r4; ; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; - %x = fdiv float %a, %divisor - %y = fdiv float %b, %divisor + %x = fdiv afn float %a, %divisor + %y = fdiv afn float %b, %divisor %w = select i1 %pred, float %x, float %y ret float %w } -define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 { +define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #1 { ; CHECK-LABEL: repeated_div_fast_ftz( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -496,14 +456,14 @@ define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor ; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; - %x = fdiv float %a, %divisor - %y = fdiv float %b, %divisor - %z = fmul float %x, %y + %x = fdiv afn arcp float %a, %divisor + %y = fdiv afn arcp contract float %b, %divisor + %z = fmul contract float %x, %y %w = select i1 %pred, float %z, float %y ret float %w } -define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #0 #1 { +define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #1 { ; CHECK-LABEL: repeated_div_fast_ftz_sel( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -521,13 +481,13 @@ define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %div ; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4; ; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; - %x = fdiv float %a, %divisor - %y = fdiv float %b, %divisor + %x = fdiv afn float %a, %divisor + %y = fdiv afn float %b, %divisor %w = select i1 %pred, float %x, float %y ret float %w } -define float @frem(float %a, float %b) #0 { +define float @frem(float %a, float %b) { ; CHECK-LABEL: frem( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<7>; @@ -541,11 +501,11 @@ define float @frem(float %a, float %b) #0 { ; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; - %rem = frem float %a, %b + %rem = frem afn arcp contract ninf float %a, %b ret float %rem } -define float @frem_ftz(float %a, float %b) #0 #1 { +define float @frem_ftz(float %a, float %b) #1 { ; CHECK-LABEL: frem_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<7>; @@ -559,11 +519,11 @@ define float @frem_ftz(float %a, float %b) #0 #1 { ; CHECK-NEXT: fma.rn.ftz.f32 %r6, %r5, %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; - %rem = frem float %a, %b + %rem = frem afn contract ninf float %a, %b ret float %rem } -define double @frem_f64(double %a, double %b) #0 { +define double @frem_f64(double %a, double %b) { ; CHECK-LABEL: frem_f64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<7>; @@ -577,9 +537,8 @@ define double @frem_f64(double %a, double %b) #0 { ; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; ; CHECK-NEXT: ret; - %rem = frem double %a, %b + %rem = frem ninf double %a, %b ret double %rem } -attributes #0 = { "unsafe-fp-math" = "true" } attributes #1 = { "denormal-fp-math-f32" = "preserve-sign" } diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll index 2f1d7d6321438..6d983ba6bf0ff 100644 --- a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll +++ b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll @@ -9,7 +9,7 @@ ; SM < 80 or (which needs PTX version >= 70) should not emit fma{.ftz}.relu ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s --check-prefixes=CHECK-SM70 -define half @fma_f16_no_nans(half %a, half %b, half %c) #0 { +define half @fma_f16_no_nans(half %a, half %b, half %c) { ; CHECK-LABEL: fma_f16_no_nans( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<5>; @@ -49,14 +49,14 @@ define half @fma_f16_no_nans(half %a, half %b, half %c) #0 { ; CHECK-SM70-NEXT: selp.b16 %rs6, %rs4, 0x0000, %p1; ; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs6; ; CHECK-SM70-NEXT: ret; - %1 = call half @llvm.fma.f16(half %a, half %b, half %c) + %1 = call nnan half @llvm.fma.f16(half %a, half %b, half %c) %2 = fcmp ogt half %1, 0.0 - %3 = select i1 %2, half %1, half 0.0 + %3 = select nsz i1 %2, half %1, half 0.0 ret half %3 } ; FMA relu shouldn't be selected if the FMA operation has multiple uses -define half @fma_f16_no_nans_multiple_uses_of_fma(half %a, half %b, half %c) #0 { +define half @fma_f16_no_nans_multiple_uses_of_fma(half %a, half %b, half %c) { ; CHECK-LABEL: fma_f16_no_nans_multiple_uses_of_fma( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<8>; @@ -103,13 +103,13 @@ define half @fma_f16_no_nans_multiple_uses_of_fma(half %a, half %b, half %c) #0 ; CHECK-SM70-NEXT: ret; %1 = call half @llvm.fma.f16(half %a, half %b, half %c) %2 = fcmp ogt half %1, 0.0 - %3 = select i1 %2, half %1, half 0.0 - %4 = fadd half %1, 7.0 - %5 = fadd half %4, %1 + %3 = select i1 %2, half %1, half 0.0 + %4 = fadd contract half %1, 7.0 + %5 = fadd contract half %4, %1 ret half %5 } -define half @fma_f16_maxnum_no_nans(half %a, half %b, half %c) #0 { +define half @fma_f16_maxnum_no_nans(half %a, half %b, half %c) { ; CHECK-LABEL: fma_f16_maxnum_no_nans( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<5>; @@ -149,12 +149,12 @@ define half @fma_f16_maxnum_no_nans(half %a, half %b, half %c) #0 { ; CHECK-SM70-NEXT: cvt.rn.f16.f32 %rs5, %r2; ; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs5; ; CHECK-SM70-NEXT: ret; - %1 = call half @llvm.fma.f16(half %a, half %b, half %c) - %2 = call half @llvm.maxnum.f16(half %1, half 0.0) + %1 = call nnan half @llvm.fma.f16(half %a, half %b, half %c) + %2 = call nsz half @llvm.maxnum.f16(half %1, half 0.0) ret half %2 } -define bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 { +define bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c) { ; CHECK-LABEL: fma_bf16_no_nans( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<5>; @@ -205,14 +205,14 @@ define bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 { ; CHECK-SM70-NEXT: selp.b16 %rs2, %rs1, 0x0000, %p2; ; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs2; ; CHECK-SM70-NEXT: ret; - %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) + %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) %2 = fcmp ogt bfloat %1, 0.0 - %3 = select i1 %2, bfloat %1, bfloat 0.0 + %3 = select nsz i1 %2, bfloat %1, bfloat 0.0 ret bfloat %3 } ; FMA_relu shouldn't be selected if the FMA operation has multiple uses -define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloat %c) #0 { +define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloat %c) { ; CHECK-LABEL: fma_bf16_no_nans_multiple_uses_of_fma( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<9>; @@ -291,12 +291,12 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) %2 = fcmp ogt bfloat %1, 0.0 %3 = select i1 %2, bfloat %1, bfloat 0.0 - %4 = fadd bfloat %1, 7.0 - %5 = fadd bfloat %4, %1 + %4 = fadd contract bfloat %1, 7.0 + %5 = fadd contract bfloat %4, %1 ret bfloat %5 } -define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 { +define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) { ; CHECK-LABEL: fma_bf16_maxnum_no_nans( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<5>; @@ -351,12 +351,12 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 { ; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16; ; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20; ; CHECK-SM70-NEXT: ret; - %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) - %2 = call bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0) + %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) + %2 = call nsz bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0) ret bfloat %2 } -define <2 x half> @fma_f16x2_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { +define <2 x half> @fma_f16x2_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) { ; CHECK-LABEL: fma_f16x2_no_nans( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; @@ -399,14 +399,14 @@ define <2 x half> @fma_f16x2_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c ; CHECK-SM70-NEXT: selp.b16 %rs4, %rs1, 0x0000, %p1; ; CHECK-SM70-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; ; CHECK-SM70-NEXT: ret; - %1 = call <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c) + %1 = call nnan <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c) %2 = fcmp ogt <2 x half> %1, - %3 = select <2 x i1> %2, <2 x half> %1, <2 x half> + %3 = select nsz <2 x i1> %2, <2 x half> %1, <2 x half> ret <2 x half> %3 } ; FMA relu shouldn't be selected if the FMA operation has multiple uses -define <2 x half> @fma_f16x2_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { +define <2 x half> @fma_f16x2_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) { ; CHECK-LABEL: fma_f16x2_no_nans_multiple_uses_of_fma( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<8>; @@ -454,12 +454,12 @@ define <2 x half> @fma_f16x2_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x ha %1 = call <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c) %2 = fcmp ogt <2 x half> %1, %3 = select <2 x i1> %2, <2 x half> %1, <2 x half> - %4 = fadd <2 x half> %1, - %5 = fadd <2 x half> %4, %1 + %4 = fadd contract <2 x half> %1, + %5 = fadd contract <2 x half> %4, %1 ret <2 x half> %5 } -define <2 x half> @fma_f16x2_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { +define <2 x half> @fma_f16x2_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) { ; CHECK-LABEL: fma_f16x2_maxnum_no_nans( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; @@ -504,12 +504,12 @@ define <2 x half> @fma_f16x2_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x h ; CHECK-SM70-NEXT: mov.b32 %r9, {%rs4, %rs3}; ; CHECK-SM70-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-SM70-NEXT: ret; - %1 = call <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c) - %2 = call <2 x half> @llvm.maxnum.f16x2(<2 x half> %1, <2 x half> ) + %1 = call nnan <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c) + %2 = call nsz <2 x half> @llvm.maxnum.f16x2(<2 x half> %1, <2 x half> ) ret <2 x half> %2 } -define <2 x bfloat> @fma_bf16x2_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 { +define <2 x bfloat> @fma_bf16x2_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) { ; CHECK-LABEL: fma_bf16x2_no_nans( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; @@ -580,14 +580,14 @@ define <2 x bfloat> @fma_bf16x2_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x b ; CHECK-SM70-NEXT: selp.b16 %rs10, %rs7, 0x0000, %p3; ; CHECK-SM70-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs9}; ; CHECK-SM70-NEXT: ret; - %1 = call <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) + %1 = call nnan <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) %2 = fcmp ogt <2 x bfloat> %1, - %3 = select <2 x i1> %2, <2 x bfloat> %1, <2 x bfloat> + %3 = select nsz <2 x i1> %2, <2 x bfloat> %1, <2 x bfloat> ret <2 x bfloat> %3 } ; FMA_relu shouldn't be selected if the FMA operation has multiple uses -define <2 x bfloat> @fma_bf16x2_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 { +define <2 x bfloat> @fma_bf16x2_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) { ; CHECK-LABEL: fma_bf16x2_no_nans_multiple_uses_of_fma( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<9>; @@ -707,12 +707,12 @@ define <2 x bfloat> @fma_bf16x2_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2 %1 = call <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) %2 = fcmp ogt <2 x bfloat> %1, %3 = select <2 x i1> %2, <2 x bfloat> %1, <2 x bfloat> - %4 = fadd <2 x bfloat> %1, - %5 = fadd <2 x bfloat> %4, %1 + %4 = fadd contract <2 x bfloat> %1, + %5 = fadd contract <2 x bfloat> %4, %1 ret <2 x bfloat> %5 } -define <2 x bfloat> @fma_bf16x2_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 { +define <2 x bfloat> @fma_bf16x2_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) { ; CHECK-LABEL: fma_bf16x2_maxnum_no_nans( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<5>; @@ -792,10 +792,7 @@ define <2 x bfloat> @fma_bf16x2_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, ; CHECK-SM70-NEXT: prmt.b32 %r39, %r38, %r31, 0x7632U; ; CHECK-SM70-NEXT: st.param.b32 [func_retval0], %r39; ; CHECK-SM70-NEXT: ret; - %1 = call <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) - %2 = call <2 x bfloat> @llvm.maxnum.bf16x2(<2 x bfloat> %1, <2 x bfloat> ) + %1 = call nnan <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) + %2 = call nsz <2 x bfloat> @llvm.maxnum.bf16x2(<2 x bfloat> %1, <2 x bfloat> ) ret <2 x bfloat> %2 } - -attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "unsafe-fp-math"="true" } -attributes #1 = { "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/NVPTX/frem.ll b/llvm/test/CodeGen/NVPTX/frem.ll index 5805aed1bebe6..d30c72cef83d5 100644 --- a/llvm/test/CodeGen/NVPTX/frem.ll +++ b/llvm/test/CodeGen/NVPTX/frem.ll @@ -1,313 +1,316 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s --enable-unsafe-fp-math -mcpu=sm_60 | FileCheck %s --check-prefixes=FAST -; RUN: llc < %s -mcpu=sm_60 | FileCheck %s --check-prefixes=NORMAL +; RUN: llc < %s -mcpu=sm_60 | FileCheck %s target triple = "nvptx64-unknown-cuda" define half @frem_f16(half %a, half %b) { -; FAST-LABEL: frem_f16( -; FAST: { -; FAST-NEXT: .reg .b16 %rs<4>; -; FAST-NEXT: .reg .b32 %r<7>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b16 %rs1, [frem_f16_param_0]; -; FAST-NEXT: ld.param.b16 %rs2, [frem_f16_param_1]; -; FAST-NEXT: cvt.f32.f16 %r1, %rs2; -; FAST-NEXT: cvt.f32.f16 %r2, %rs1; -; FAST-NEXT: div.approx.f32 %r3, %r2, %r1; -; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; FAST-NEXT: neg.f32 %r5, %r4; -; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; -; FAST-NEXT: cvt.rn.f16.f32 %rs3, %r6; -; FAST-NEXT: st.param.b16 [func_retval0], %rs3; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f16( -; NORMAL: { -; NORMAL-NEXT: .reg .pred %p<2>; -; NORMAL-NEXT: .reg .b16 %rs<4>; -; NORMAL-NEXT: .reg .b32 %r<8>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b16 %rs1, [frem_f16_param_0]; -; NORMAL-NEXT: ld.param.b16 %rs2, [frem_f16_param_1]; -; NORMAL-NEXT: cvt.f32.f16 %r1, %rs2; -; NORMAL-NEXT: cvt.f32.f16 %r2, %rs1; -; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1; -; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; NORMAL-NEXT: neg.f32 %r5, %r4; -; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; -; NORMAL-NEXT: testp.infinite.f32 %p1, %r1; -; NORMAL-NEXT: selp.f32 %r7, %r2, %r6, %p1; -; NORMAL-NEXT: cvt.rn.f16.f32 %rs3, %r7; -; NORMAL-NEXT: st.param.b16 [func_retval0], %rs3; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_param_1]; +; CHECK-NEXT: cvt.f32.f16 %r1, %rs2; +; CHECK-NEXT: cvt.f32.f16 %r2, %rs1; +; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; +; CHECK-NEXT: testp.infinite.f32 %p1, %r1; +; CHECK-NEXT: selp.f32 %r7, %r2, %r6, %p1; +; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r7; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; +; CHECK-NEXT: ret; %r = frem half %a, %b ret half %r } +define half @frem_f16_fast(half %a, half %b) { +; CHECK-LABEL: frem_f16_fast( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_fast_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_fast_param_1]; +; CHECK-NEXT: cvt.f32.f16 %r1, %rs2; +; CHECK-NEXT: cvt.f32.f16 %r2, %rs1; +; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; +; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; +; CHECK-NEXT: ret; + %r = frem afn ninf half %a, %b + ret half %r +} + define float @frem_f32(float %a, float %b) { -; FAST-LABEL: frem_f32( -; FAST: { -; FAST-NEXT: .reg .b32 %r<7>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b32 %r1, [frem_f32_param_0]; -; FAST-NEXT: ld.param.b32 %r2, [frem_f32_param_1]; -; FAST-NEXT: div.approx.f32 %r3, %r1, %r2; -; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; FAST-NEXT: neg.f32 %r5, %r4; -; FAST-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; -; FAST-NEXT: st.param.b32 [func_retval0], %r6; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f32( -; NORMAL: { -; NORMAL-NEXT: .reg .pred %p<2>; -; NORMAL-NEXT: .reg .b32 %r<8>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_param_0]; -; NORMAL-NEXT: ld.param.b32 %r2, [frem_f32_param_1]; -; NORMAL-NEXT: div.rn.f32 %r3, %r1, %r2; -; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; NORMAL-NEXT: neg.f32 %r5, %r4; -; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; -; NORMAL-NEXT: testp.infinite.f32 %p1, %r2; -; NORMAL-NEXT: selp.f32 %r7, %r1, %r6, %p1; -; NORMAL-NEXT: st.param.b32 [func_retval0], %r7; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_param_1]; +; CHECK-NEXT: div.rn.f32 %r3, %r1, %r2; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; +; CHECK-NEXT: testp.infinite.f32 %p1, %r2; +; CHECK-NEXT: selp.f32 %r7, %r1, %r6, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NEXT: ret; %r = frem float %a, %b ret float %r } +define float @frem_f32_fast(float %a, float %b) { +; CHECK-LABEL: frem_f32_fast( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_fast_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_fast_param_1]; +; CHECK-NEXT: div.approx.f32 %r3, %r1, %r2; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: ret; + %r = frem afn ninf float %a, %b + ret float %r +} + define double @frem_f64(double %a, double %b) { -; FAST-LABEL: frem_f64( -; FAST: { -; FAST-NEXT: .reg .b64 %rd<7>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b64 %rd1, [frem_f64_param_0]; -; FAST-NEXT: ld.param.b64 %rd2, [frem_f64_param_1]; -; FAST-NEXT: div.rn.f64 %rd3, %rd1, %rd2; -; FAST-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; -; FAST-NEXT: neg.f64 %rd5, %rd4; -; FAST-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; -; FAST-NEXT: st.param.b64 [func_retval0], %rd6; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f64( -; NORMAL: { -; NORMAL-NEXT: .reg .pred %p<2>; -; NORMAL-NEXT: .reg .b64 %rd<8>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b64 %rd1, [frem_f64_param_0]; -; NORMAL-NEXT: ld.param.b64 %rd2, [frem_f64_param_1]; -; NORMAL-NEXT: div.rn.f64 %rd3, %rd1, %rd2; -; NORMAL-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; -; NORMAL-NEXT: neg.f64 %rd5, %rd4; -; NORMAL-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; -; NORMAL-NEXT: testp.infinite.f64 %p1, %rd2; -; NORMAL-NEXT: selp.f64 %rd7, %rd1, %rd6, %p1; -; NORMAL-NEXT: st.param.b64 [func_retval0], %rd7; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_param_1]; +; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2; +; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; +; CHECK-NEXT: neg.f64 %rd5, %rd4; +; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; +; CHECK-NEXT: testp.infinite.f64 %p1, %rd2; +; CHECK-NEXT: selp.f64 %rd7, %rd1, %rd6, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; +; CHECK-NEXT: ret; %r = frem double %a, %b ret double %r } +define double @frem_f64_fast(double %a, double %b) { +; CHECK-LABEL: frem_f64_fast( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_fast_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_fast_param_1]; +; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2; +; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; +; CHECK-NEXT: neg.f64 %rd5, %rd4; +; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; +; CHECK-NEXT: ret; + %r = frem afn ninf double %a, %b + ret double %r +} + define half @frem_f16_ninf(half %a, half %b) { -; FAST-LABEL: frem_f16_ninf( -; FAST: { -; FAST-NEXT: .reg .b16 %rs<4>; -; FAST-NEXT: .reg .b32 %r<7>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0]; -; FAST-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1]; -; FAST-NEXT: cvt.f32.f16 %r1, %rs2; -; FAST-NEXT: cvt.f32.f16 %r2, %rs1; -; FAST-NEXT: div.approx.f32 %r3, %r2, %r1; -; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; FAST-NEXT: neg.f32 %r5, %r4; -; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; -; FAST-NEXT: cvt.rn.f16.f32 %rs3, %r6; -; FAST-NEXT: st.param.b16 [func_retval0], %rs3; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f16_ninf( -; NORMAL: { -; NORMAL-NEXT: .reg .b16 %rs<4>; -; NORMAL-NEXT: .reg .b32 %r<7>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0]; -; NORMAL-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1]; -; NORMAL-NEXT: cvt.f32.f16 %r1, %rs2; -; NORMAL-NEXT: cvt.f32.f16 %r2, %rs1; -; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1; -; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; NORMAL-NEXT: neg.f32 %r5, %r4; -; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; -; NORMAL-NEXT: cvt.rn.f16.f32 %rs3, %r6; -; NORMAL-NEXT: st.param.b16 [func_retval0], %rs3; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f16_ninf( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1]; +; CHECK-NEXT: cvt.f32.f16 %r1, %rs2; +; CHECK-NEXT: cvt.f32.f16 %r2, %rs1; +; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; +; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; +; CHECK-NEXT: ret; %r = frem ninf half %a, %b ret half %r } +define half @frem_f16_ninf_fast(half %a, half %b) { +; CHECK-LABEL: frem_f16_ninf_fast( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_fast_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_fast_param_1]; +; CHECK-NEXT: cvt.f32.f16 %r1, %rs2; +; CHECK-NEXT: cvt.f32.f16 %r2, %rs1; +; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; +; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; +; CHECK-NEXT: ret; + %r = frem afn ninf half %a, %b + ret half %r +} + define float @frem_f32_ninf(float %a, float %b) { -; FAST-LABEL: frem_f32_ninf( -; FAST: { -; FAST-NEXT: .reg .b32 %r<7>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0]; -; FAST-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1]; -; FAST-NEXT: div.approx.f32 %r3, %r1, %r2; -; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; FAST-NEXT: neg.f32 %r5, %r4; -; FAST-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; -; FAST-NEXT: st.param.b32 [func_retval0], %r6; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f32_ninf( -; NORMAL: { -; NORMAL-NEXT: .reg .b32 %r<7>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0]; -; NORMAL-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1]; -; NORMAL-NEXT: div.rn.f32 %r3, %r1, %r2; -; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; NORMAL-NEXT: neg.f32 %r5, %r4; -; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; -; NORMAL-NEXT: st.param.b32 [func_retval0], %r6; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f32_ninf( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1]; +; CHECK-NEXT: div.rn.f32 %r3, %r1, %r2; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: ret; %r = frem ninf float %a, %b ret float %r } +define float @frem_f32_ninf_fast(float %a, float %b) { +; CHECK-LABEL: frem_f32_ninf_fast( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_ninf_fast_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_ninf_fast_param_1]; +; CHECK-NEXT: div.approx.f32 %r3, %r1, %r2; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: ret; + %r = frem afn ninf float %a, %b + ret float %r +} + define double @frem_f64_ninf(double %a, double %b) { -; FAST-LABEL: frem_f64_ninf( -; FAST: { -; FAST-NEXT: .reg .b64 %rd<7>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0]; -; FAST-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1]; -; FAST-NEXT: div.rn.f64 %rd3, %rd1, %rd2; -; FAST-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; -; FAST-NEXT: neg.f64 %rd5, %rd4; -; FAST-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; -; FAST-NEXT: st.param.b64 [func_retval0], %rd6; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f64_ninf( -; NORMAL: { -; NORMAL-NEXT: .reg .b64 %rd<7>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0]; -; NORMAL-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1]; -; NORMAL-NEXT: div.rn.f64 %rd3, %rd1, %rd2; -; NORMAL-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; -; NORMAL-NEXT: neg.f64 %rd5, %rd4; -; NORMAL-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; -; NORMAL-NEXT: st.param.b64 [func_retval0], %rd6; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f64_ninf( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1]; +; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2; +; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; +; CHECK-NEXT: neg.f64 %rd5, %rd4; +; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; +; CHECK-NEXT: ret; %r = frem ninf double %a, %b ret double %r } +define double @frem_f64_ninf_fast(double %a, double %b) { +; CHECK-LABEL: frem_f64_ninf_fast( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_fast_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_fast_param_1]; +; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2; +; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; +; CHECK-NEXT: neg.f64 %rd5, %rd4; +; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; +; CHECK-NEXT: ret; + %r = frem afn ninf double %a, %b + ret double %r +} + define float @frem_f32_imm1_fast(float %a) { -; FAST-LABEL: frem_f32_imm1_fast( -; FAST: { -; FAST-NEXT: .reg .b32 %r<5>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0]; -; FAST-NEXT: mul.f32 %r2, %r1, 0f3E124925; -; FAST-NEXT: cvt.rzi.f32.f32 %r3, %r2; -; FAST-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1; -; FAST-NEXT: st.param.b32 [func_retval0], %r4; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f32_imm1_fast( -; NORMAL: { -; NORMAL-NEXT: .reg .b32 %r<5>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0]; -; NORMAL-NEXT: mul.rn.f32 %r2, %r1, 0f3E124925; -; NORMAL-NEXT: cvt.rzi.f32.f32 %r3, %r2; -; NORMAL-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1; -; NORMAL-NEXT: st.param.b32 [func_retval0], %r4; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f32_imm1_fast( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0]; +; CHECK-NEXT: mul.rn.f32 %r2, %r1, 0f3E124925; +; CHECK-NEXT: cvt.rzi.f32.f32 %r3, %r2; +; CHECK-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: ret; %r = frem arcp float %a, 7.0 ret float %r } define float @frem_f32_imm1_normal(float %a) { -; FAST-LABEL: frem_f32_imm1_normal( -; FAST: { -; FAST-NEXT: .reg .b32 %r<5>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0]; -; FAST-NEXT: div.approx.f32 %r2, %r1, 0f40E00000; -; FAST-NEXT: cvt.rzi.f32.f32 %r3, %r2; -; FAST-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1; -; FAST-NEXT: st.param.b32 [func_retval0], %r4; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f32_imm1_normal( -; NORMAL: { -; NORMAL-NEXT: .reg .b32 %r<5>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0]; -; NORMAL-NEXT: div.rn.f32 %r2, %r1, 0f40E00000; -; NORMAL-NEXT: cvt.rzi.f32.f32 %r3, %r2; -; NORMAL-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1; -; NORMAL-NEXT: st.param.b32 [func_retval0], %r4; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f32_imm1_normal( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0]; +; CHECK-NEXT: div.rn.f32 %r2, %r1, 0f40E00000; +; CHECK-NEXT: cvt.rzi.f32.f32 %r3, %r2; +; CHECK-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: ret; %r = frem float %a, 7.0 ret float %r } define float @frem_f32_imm2(float %a) { -; FAST-LABEL: frem_f32_imm2( -; FAST: { -; FAST-NEXT: .reg .b32 %r<7>; -; FAST-EMPTY: -; FAST-NEXT: // %bb.0: -; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0]; -; FAST-NEXT: mov.b32 %r2, 0f40E00000; -; FAST-NEXT: div.approx.f32 %r3, %r2, %r1; -; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; FAST-NEXT: neg.f32 %r5, %r4; -; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000; -; FAST-NEXT: st.param.b32 [func_retval0], %r6; -; FAST-NEXT: ret; -; -; NORMAL-LABEL: frem_f32_imm2( -; NORMAL: { -; NORMAL-NEXT: .reg .pred %p<2>; -; NORMAL-NEXT: .reg .b32 %r<8>; -; NORMAL-EMPTY: -; NORMAL-NEXT: // %bb.0: -; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0]; -; NORMAL-NEXT: mov.b32 %r2, 0f40E00000; -; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1; -; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; -; NORMAL-NEXT: neg.f32 %r5, %r4; -; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000; -; NORMAL-NEXT: testp.infinite.f32 %p1, %r1; -; NORMAL-NEXT: selp.f32 %r7, 0f40E00000, %r6, %p1; -; NORMAL-NEXT: st.param.b32 [func_retval0], %r7; -; NORMAL-NEXT: ret; +; CHECK-LABEL: frem_f32_imm2( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0]; +; CHECK-NEXT: mov.b32 %r2, 0f40E00000; +; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000; +; CHECK-NEXT: testp.infinite.f32 %p1, %r1; +; CHECK-NEXT: selp.f32 %r7, 0f40E00000, %r6, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NEXT: ret; %r = frem float 7.0, %a ret float %r } + +define float @frem_f32_imm2_fast(float %a) { +; CHECK-LABEL: frem_f32_imm2_fast( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm2_fast_param_0]; +; CHECK-NEXT: mov.b32 %r2, 0f40E00000; +; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3; +; CHECK-NEXT: neg.f32 %r5, %r4; +; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: ret; + %r = frem afn ninf float 7.0, %a + ret float %r +} diff --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll index 3989c8e32e458..7e4e701af4cd1 100644 --- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll +++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll @@ -13,7 +13,7 @@ declare double @llvm.sqrt.f64(double) ; -- reciprocal sqrt -- -define float @test_rsqrt32(float %a) #0 { +define float @test_rsqrt32(float %a) { ; CHECK-LABEL: test_rsqrt32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -28,7 +28,7 @@ define float @test_rsqrt32(float %a) #0 { ret float %ret } -define float @test_rsqrt_ftz(float %a) #0 #1 { +define float @test_rsqrt_ftz(float %a) #1 { ; CHECK-LABEL: test_rsqrt_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -76,7 +76,7 @@ define double @test_rsqrt64_ftz(double %a) #1 { ; -- sqrt -- -define float @test_sqrt32(float %a) #0 { +define float @test_sqrt32(float %a) { ; CHECK-LABEL: test_sqrt32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -90,7 +90,7 @@ define float @test_sqrt32(float %a) #0 { ret float %ret } -define float @test_sqrt32_ninf(float %a) #0 { +define float @test_sqrt32_ninf(float %a) { ; CHECK-LABEL: test_sqrt32_ninf( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -108,7 +108,7 @@ define float @test_sqrt32_ninf(float %a) #0 { ret float %ret } -define float @test_sqrt_ftz(float %a) #0 #1 { +define float @test_sqrt_ftz(float %a) #1 { ; CHECK-LABEL: test_sqrt_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -122,7 +122,7 @@ define float @test_sqrt_ftz(float %a) #0 #1 { ret float %ret } -define float @test_sqrt_ftz_ninf(float %a) #0 #1 { +define float @test_sqrt_ftz_ninf(float %a) #1 { ; CHECK-LABEL: test_sqrt_ftz_ninf( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -139,7 +139,7 @@ define float @test_sqrt_ftz_ninf(float %a) #0 #1 { ret float %ret } -define double @test_sqrt64(double %a) #0 { +define double @test_sqrt64(double %a) { ; CHECK-LABEL: test_sqrt64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; @@ -156,7 +156,7 @@ define double @test_sqrt64(double %a) #0 { ; There's no sqrt.approx.f64 instruction; we emit ; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal, ; so we just use the ftz version. -define double @test_sqrt64_ninf(double %a) #0 { +define double @test_sqrt64_ninf(double %a) { ; CHECK-LABEL: test_sqrt64_ninf( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -175,7 +175,7 @@ define double @test_sqrt64_ninf(double %a) #0 { ret double %ret } -define double @test_sqrt64_ftz(double %a) #0 #1 { +define double @test_sqrt64_ftz(double %a) #1 { ; CHECK-LABEL: test_sqrt64_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; @@ -190,7 +190,7 @@ define double @test_sqrt64_ftz(double %a) #0 #1 { } ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version. -define double @test_sqrt64_ftz_ninf(double %a) #0 #1 { +define double @test_sqrt64_ftz_ninf(double %a) #1 { ; CHECK-LABEL: test_sqrt64_ftz_ninf( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -214,7 +214,7 @@ define double @test_sqrt64_ftz_ninf(double %a) #0 #1 { ; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed ; by some math. -define float @test_rsqrt32_refined(float %a) #0 #2 { +define float @test_rsqrt32_refined(float %a) #2 { ; CHECK-LABEL: test_rsqrt32_refined( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<7>; @@ -229,11 +229,11 @@ define float @test_rsqrt32_refined(float %a) #0 #2 { ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %val = tail call float @llvm.sqrt.f32(float %a) - %ret = fdiv arcp float 1.0, %val + %ret = fdiv arcp contract float 1.0, %val ret float %ret } -define float @test_sqrt32_refined(float %a) #0 #2 { +define float @test_sqrt32_refined(float %a) #2 { ; CHECK-LABEL: test_sqrt32_refined( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -247,7 +247,7 @@ define float @test_sqrt32_refined(float %a) #0 #2 { ret float %ret } -define float @test_sqrt32_refined_ninf(float %a) #0 #2 { +define float @test_sqrt32_refined_ninf(float %a) #2 { ; CHECK-LABEL: test_sqrt32_refined_ninf( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -265,11 +265,11 @@ define float @test_sqrt32_refined_ninf(float %a) #0 #2 { ; CHECK-NEXT: selp.f32 %r8, 0f00000000, %r6, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; - %ret = tail call ninf afn float @llvm.sqrt.f32(float %a) + %ret = tail call ninf afn contract float @llvm.sqrt.f32(float %a) ret float %ret } -define double @test_rsqrt64_refined(double %a) #0 #2 { +define double @test_rsqrt64_refined(double %a) #2 { ; CHECK-LABEL: test_rsqrt64_refined( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<7>; @@ -284,11 +284,11 @@ define double @test_rsqrt64_refined(double %a) #0 #2 { ; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; ; CHECK-NEXT: ret; %val = tail call double @llvm.sqrt.f64(double %a) - %ret = fdiv arcp double 1.0, %val + %ret = fdiv arcp contract double 1.0, %val ret double %ret } -define double @test_sqrt64_refined(double %a) #0 #2 { +define double @test_sqrt64_refined(double %a) #2 { ; CHECK-LABEL: test_sqrt64_refined( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; @@ -302,7 +302,7 @@ define double @test_sqrt64_refined(double %a) #0 #2 { ret double %ret } -define double @test_sqrt64_refined_ninf(double %a) #0 #2 { +define double @test_sqrt64_refined_ninf(double %a) #2 { ; CHECK-LABEL: test_sqrt64_refined_ninf( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -320,13 +320,13 @@ define double @test_sqrt64_refined_ninf(double %a) #0 #2 { ; CHECK-NEXT: selp.f64 %rd8, 0d0000000000000000, %rd6, %p1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd8; ; CHECK-NEXT: ret; - %ret = tail call ninf afn double @llvm.sqrt.f64(double %a) + %ret = tail call ninf afn contract double @llvm.sqrt.f64(double %a) ret double %ret } ; -- refined sqrt and rsqrt with ftz enabled -- -define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 { +define float @test_rsqrt32_refined_ftz(float %a) #1 #2 { ; CHECK-LABEL: test_rsqrt32_refined_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<7>; @@ -341,11 +341,11 @@ define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 { ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %val = tail call float @llvm.sqrt.f32(float %a) - %ret = fdiv arcp float 1.0, %val + %ret = fdiv arcp contract float 1.0, %val ret float %ret } -define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 { +define float @test_sqrt32_refined_ftz(float %a) #1 #2 { ; CHECK-LABEL: test_sqrt32_refined_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -359,7 +359,7 @@ define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 { ret float %ret } -define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 { +define float @test_sqrt32_refined_ftz_ninf(float %a) #1 #2 { ; CHECK-LABEL: test_sqrt32_refined_ftz_ninf( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -376,12 +376,12 @@ define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 { ; CHECK-NEXT: selp.f32 %r7, 0f00000000, %r6, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NEXT: ret; - %ret = tail call ninf afn float @llvm.sqrt.f32(float %a) + %ret = tail call ninf afn contract float @llvm.sqrt.f32(float %a) ret float %ret } ; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version. -define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 { +define double @test_rsqrt64_refined_ftz(double %a) #1 #2 { ; CHECK-LABEL: test_rsqrt64_refined_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<7>; @@ -396,11 +396,11 @@ define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 { ; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; ; CHECK-NEXT: ret; %val = tail call double @llvm.sqrt.f64(double %a) - %ret = fdiv arcp double 1.0, %val + %ret = fdiv arcp contract double 1.0, %val ret double %ret } -define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 { +define double @test_sqrt64_refined_ftz(double %a) #1 #2 { ; CHECK-LABEL: test_sqrt64_refined_ftz( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<3>; @@ -414,7 +414,7 @@ define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 { ret double %ret } -define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 { +define double @test_sqrt64_refined_ftz_ninf(double %a) #1 #2 { ; CHECK-LABEL: test_sqrt64_refined_ftz_ninf( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; @@ -432,10 +432,9 @@ define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 { ; CHECK-NEXT: selp.f64 %rd8, 0d0000000000000000, %rd6, %p1; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd8; ; CHECK-NEXT: ret; - %ret = tail call ninf afn double @llvm.sqrt.f64(double %a) + %ret = tail call ninf afn contract double @llvm.sqrt.f64(double %a) ret double %ret } -attributes #0 = { "unsafe-fp-math" = "true" } attributes #1 = { "denormal-fp-math-f32" = "preserve-sign,preserve-sign" } attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }