diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d3df43473013e..9d1a6fe8c01c0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19279,13 +19279,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // MachineBasicBlock CFG, which is awkward. // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal - // on the target. + // on the target, also copy fast math flags. if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { - return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, - Chain, N1.getOperand(2), - N1.getOperand(0), N1.getOperand(1), N2); + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain, + N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2, + N1->getFlags()); } if (N1.hasOneUse()) { diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 1c42f44765abf..0666b9e984af9 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -629,6 +629,21 @@ static bool checkDenormalAttributeConsistency(const Module &M, }); } +// Returns true if all functions have different denormal modes. +static bool checkDenormalAttributeInconsistency(const Module &M) { + if (M.functions().empty()) + return false; + DenormalMode Value = + parseDenormalFPAttribute(M.functions() + .begin() + ->getFnAttribute("denormal-fp-math") + .getValueAsString()); + return any_of(M, [&](const Function &F) { + StringRef AttrVal = F.getFnAttribute("denormal-fp-math").getValueAsString(); + return parseDenormalFPAttribute(AttrVal) != Value; + }); +} + void ARMAsmPrinter::emitAttributes() { MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); ARMTargetStreamer &ATS = static_cast(TS); @@ -695,7 +710,9 @@ void ARMAsmPrinter::emitAttributes() { DenormalMode::getPositiveZero())) ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::PositiveZero); - else if (!TM.Options.UnsafeFPMath) + else if (checkDenormalAttributeInconsistency(*MMI->getModule()) || + checkDenormalAttributeConsistency( + *MMI->getModule(), "denormal-fp-math", DenormalMode::getIEEE())) ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::IEEEDenormals); else { @@ -730,7 +747,7 @@ void ARMAsmPrinter::emitAttributes() { TM.Options.NoTrappingFPMath) ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Not_Allowed); - else if (!TM.Options.UnsafeFPMath) { + else { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); // If the user has permitted this code to choose the IEEE 754 diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 066b392213fa9..c9a8eab7b63cf 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5736,7 +5736,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, llvm_unreachable("Unknown VFP cmp argument!"); } -/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some +/// OptimizeVFPBrcond - With nnan, it's legal to optimize some /// f32 and even f64 comparisons to integer ones. SDValue ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { @@ -5878,9 +5878,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp); } - if (getTargetMachine().Options.UnsafeFPMath && - (CC == ISD::SETEQ || CC == ISD::SETOEQ || - CC == ISD::SETNE || CC == ISD::SETUNE)) { + if (SDNodeFlags Flags = Op->getFlags(); + Flags.hasNoNaNs() && (CC == ISD::SETEQ || CC == ISD::SETOEQ || + CC == ISD::SETNE || CC == ISD::SETUNE)) { if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) return Result; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 9f600e0c685ab..3329beab63ddf 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -88,18 +88,16 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle, - bool MinSize) + bool MinSize, DenormalMode DM) : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), UseMulOps(UseFusedMulOps), CPUString(CPU), OptMinSize(MinSize), - IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM), + IsLittle(IsLittle), DM(DM), TargetTriple(TT), Options(TM.Options), TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. - InstrInfo(isThumb1Only() - ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) - : !isThumb() - ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) - : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), + InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) + : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) + : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), TLInfo(TM, *this) { CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering())); @@ -224,7 +222,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. const FeatureBitset &Bits = getFeatureBits(); if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters - (Options.UnsafeFPMath || isTargetDarwin())) + (isTargetDarwin() || DM == DenormalMode::getPreserveSign())) HasNEONForFP = true; if (isRWPI()) diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 637eb4560e0f1..2395e2dc9bded 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -186,6 +186,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// IsLittle - The target is Little Endian bool IsLittle; + /// DM - Denormal mode + DenormalMode DM; + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -206,7 +209,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle, - bool MinSize = false); + bool MinSize = false, DenormalMode DM = DenormalMode::getIEEE()); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index e8d0d35080775..71928b5e59e20 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -285,6 +285,11 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { if (F.hasMinSize()) Key += "+minsize"; + DenormalMode DM = parseDenormalFPAttribute( + F.getFnAttribute("denormal-fp-math").getValueAsString()); + if (DM != DenormalMode::getIEEE()) + Key += "denormal-fp-math=" + DM.str(); + auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any @@ -292,7 +297,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = std::make_unique(TargetTriple, CPU, FS, *this, isLittle, - F.hasMinSize()); + F.hasMinSize(), DM); if (!I->isThumb() && !I->hasARMOps()) F.getContext().emitError("Function '" + F.getName() + "' uses ARM " diff --git a/llvm/test/CodeGen/ARM/fadds.ll b/llvm/test/CodeGen/ARM/fadds.ll index b5d3bdae1f9d3..191d5b3c13d26 100644 --- a/llvm/test/CodeGen/ARM/fadds.ll +++ b/llvm/test/CodeGen/ARM/fadds.ll @@ -7,7 +7,7 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ diff --git a/llvm/test/CodeGen/ARM/fmuls.ll b/llvm/test/CodeGen/ARM/fmuls.ll index b24d867a7e866..a390a242e5918 100644 --- a/llvm/test/CodeGen/ARM/fmuls.ll +++ b/llvm/test/CodeGen/ARM/fmuls.ll @@ -7,7 +7,7 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ diff --git a/llvm/test/CodeGen/ARM/fnegs.ll b/llvm/test/CodeGen/ARM/fnegs.ll index 435a600822e4d..94b4d38b2a414 100644 --- a/llvm/test/CodeGen/ARM/fnegs.ll +++ b/llvm/test/CodeGen/ARM/fnegs.ll @@ -10,7 +10,7 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ diff --git a/llvm/test/CodeGen/ARM/fnmscs.ll b/llvm/test/CodeGen/ARM/fnmscs.ll index 0fa878c0c2f49..65720ccba3b59 100644 --- a/llvm/test/CodeGen/ARM/fnmscs.ll +++ b/llvm/test/CodeGen/ARM/fnmscs.ll @@ -13,7 +13,7 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic %s -o - \ ; RUN: | FileCheck %s -check-prefix=A8 -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ ; RUN: | FileCheck %s -check-prefix=A8U ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ diff --git a/llvm/test/CodeGen/ARM/fp_convert.ll b/llvm/test/CodeGen/ARM/fp_convert.ll index 6f4707573fb50..0b749bf1c7ad4 100644 --- a/llvm/test/CodeGen/ARM/fp_convert.ll +++ b/llvm/test/CodeGen/ARM/fp_convert.ll @@ -7,7 +7,7 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ ; RUN: | FileCheck %s -check-prefix=VFP2 -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \ ; RUN: | FileCheck %s -check-prefix=NEON ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ diff --git a/llvm/test/CodeGen/ARM/fpcmp-opt.ll b/llvm/test/CodeGen/ARM/fpcmp-opt.ll index 447e470b2363a..a40fd4244af17 100644 --- a/llvm/test/CodeGen/ARM/fpcmp-opt.ll +++ b/llvm/test/CodeGen/ARM/fpcmp-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -mattr=+vfp2 %s -o - \ ; RUN: | FileCheck %s ; rdar://7461510 @@ -42,7 +42,7 @@ entry: ; CHECK-NOT: vmrs ; CHECK: bne %0 = load double, ptr %a - %1 = fcmp oeq double %0, 0.000000e+00 + %1 = fcmp nnan oeq double %0, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb1: @@ -65,7 +65,7 @@ entry: ; CHECK-NOT: vmrs ; CHECK: bne %0 = load float, ptr %a - %1 = fcmp oeq float %0, 0.000000e+00 + %1 = fcmp nnan oeq float %0, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb1: diff --git a/llvm/test/CodeGen/ARM/fsubs.ll b/llvm/test/CodeGen/ARM/fsubs.ll index baff34ab31fcf..7170f04ea0dd3 100644 --- a/llvm/test/CodeGen/ARM/fsubs.ll +++ b/llvm/test/CodeGen/ARM/fsubs.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ ; RUN: | FileCheck %s -check-prefix=NFP1 -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \ ; RUN: | FileCheck %s -check-prefix=NFP1U ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ diff --git a/llvm/test/CodeGen/ARM/neon-spfp.ll b/llvm/test/CodeGen/ARM/neon-spfp.ll index cbf25965a2fac..bb6d47b908341 100644 --- a/llvm/test/CodeGen/ARM/neon-spfp.ll +++ b/llvm/test/CodeGen/ARM/neon-spfp.ll @@ -4,11 +4,11 @@ ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=CHECK-LINUXA15 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=CHECK-LINUXSWIFT -; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA5 -; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA8 -; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA9 -; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA15 -; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFESWIFT +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFEA5 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFEA8 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFEA9 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFEA15 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFESWIFT ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=CHECK-DARWINA5 ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-DARWINA8