10 files changed, 151 insertions, 74 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 13e0729604f..735b022e309 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -25,6 +25,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/DataTypes.h"
@@ -518,11 +519,11 @@ public:
 
   /// \returns The cost of Intrinsic instructions. Types analysis only.
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Type *> Tys) const;
+                            ArrayRef<Type *> Tys, FastMathFlags FMF) const;
 
   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Value *> Args) const;
+                            ArrayRef<Value *> Args, FastMathFlags FMF) const;
 
   /// \returns The cost of Call instructions.
   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
@@ -664,9 +665,11 @@ public:
   virtual int getReductionCost(unsigned Opcode, Type *Ty,
                                bool IsPairwiseForm) = 0;
   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                    ArrayRef<Type *> Tys) = 0;
+                                    ArrayRef<Type *> Tys,
+                                    FastMathFlags FMF) = 0;
   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                    ArrayRef<Value *> Args) = 0;
+                                    ArrayRef<Value *> Args,
+                                    FastMathFlags FMF) = 0;
   virtual int getCallInstrCost(Function *F, Type *RetTy,
                                ArrayRef<Type *> Tys) = 0;
   virtual unsigned getNumberOfParts(Type *Tp) = 0;
@@ -861,13 +864,14 @@ public:
                        bool IsPairwiseForm) override {
     return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
   }
-  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Type *> Tys) override {
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys);
+  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
+                            FastMathFlags FMF) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
   }
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Value *> Args) override {
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Args);
+                            ArrayRef<Value *> Args,
+                            FastMathFlags FMF) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
   }
   int getCallInstrCost(Function *F, Type *RetTy,
                        ArrayRef<Type *> Tys) override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 39eba4d6e58..487de0892ff 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -324,11 +324,11 @@ public:
   }
 
   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Type *> Tys) {
+                                 ArrayRef<Type *> Tys, FastMathFlags FMF) {
     return 1;
   }
   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Value *> Args) {
+                                 ArrayRef<Value *> Args, FastMathFlags FMF) {
     return 1;
   }
 
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 476f0d5fb53..cfe6a25f915 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -587,13 +587,14 @@ public:
 
   /// Get intrinsic cost based on arguments  
   unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                 ArrayRef<Value *> Args) {
+                                 ArrayRef<Value *> Args, FastMathFlags FMF) {
     switch (IID) {
     default: {
       SmallVector<Type *, 4> Types;
       for (Value *Op : Args)
         Types.push_back(Op->getType());
-      return static_cast<T *>(this)->getIntrinsicInstrCost(IID, RetTy, Types);
+      return static_cast<T *>(this)->getIntrinsicInstrCost(IID, RetTy, Types,
+                                                           FMF);
     }
     case Intrinsic::masked_scatter: {
       Value *Mask = Args[3];
@@ -619,8 +620,8 @@ public:
   
   /// Get intrinsic cost based on argument types
   unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                 ArrayRef<Type *> Tys) {
-    unsigned ISD = 0;
+                                 ArrayRef<Type *> Tys, FastMathFlags FMF) {
+    SmallVector<unsigned, 2> ISDs;
     unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
     switch (IID) {
     default: {
@@ -647,74 +648,78 @@ public:
         return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
 
       unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
-          IID, ScalarRetTy, ScalarTys);
+          IID, ScalarRetTy, ScalarTys, FMF);
 
       return ScalarCalls * ScalarCost + ScalarizationCost;
     }
     // Look for intrinsics that can be lowered directly or turned into a scalar
     // intrinsic call.
     case Intrinsic::sqrt:
-      ISD = ISD::FSQRT;
+      ISDs.push_back(ISD::FSQRT);
       break;
     case Intrinsic::sin:
-      ISD = ISD::FSIN;
+      ISDs.push_back(ISD::FSIN);
       break;
     case Intrinsic::cos:
-      ISD = ISD::FCOS;
+      ISDs.push_back(ISD::FCOS);
       break;
     case Intrinsic::exp:
-      ISD = ISD::FEXP;
+      ISDs.push_back(ISD::FEXP);
       break;
     case Intrinsic::exp2:
-      ISD = ISD::FEXP2;
+      ISDs.push_back(ISD::FEXP2);
       break;
     case Intrinsic::log:
-      ISD = ISD::FLOG;
+      ISDs.push_back(ISD::FLOG);
       break;
     case Intrinsic::log10:
-      ISD = ISD::FLOG10;
+      ISDs.push_back(ISD::FLOG10);
       break;
     case Intrinsic::log2:
-      ISD = ISD::FLOG2;
+      ISDs.push_back(ISD::FLOG2);
       break;
     case Intrinsic::fabs:
-      ISD = ISD::FABS;
+      ISDs.push_back(ISD::FABS);
       break;
     case Intrinsic::minnum:
-      ISD = ISD::FMINNUM;
+      ISDs.push_back(ISD::FMINNUM);
+      if (FMF.noNaNs())
+        ISDs.push_back(ISD::FMINNAN);
       break;
     case Intrinsic::maxnum:
-      ISD = ISD::FMAXNUM;
+      ISDs.push_back(ISD::FMAXNUM);
+      if (FMF.noNaNs())
+        ISDs.push_back(ISD::FMAXNAN);
       break;
     case Intrinsic::copysign:
-      ISD = ISD::FCOPYSIGN;
+      ISDs.push_back(ISD::FCOPYSIGN);
       break;
     case Intrinsic::floor:
-      ISD = ISD::FFLOOR;
+      ISDs.push_back(ISD::FFLOOR);
       break;
     case Intrinsic::ceil:
-      ISD = ISD::FCEIL;
+      ISDs.push_back(ISD::FCEIL);
       break;
     case Intrinsic::trunc:
-      ISD = ISD::FTRUNC;
+      ISDs.push_back(ISD::FTRUNC);
       break;
     case Intrinsic::nearbyint:
-      ISD = ISD::FNEARBYINT;
+      ISDs.push_back(ISD::FNEARBYINT);
       break;
     case Intrinsic::rint:
-      ISD = ISD::FRINT;
+      ISDs.push_back(ISD::FRINT);
       break;
     case Intrinsic::round:
-      ISD = ISD::FROUND;
+      ISDs.push_back(ISD::FROUND);
       break;
     case Intrinsic::pow:
-      ISD = ISD::FPOW;
+      ISDs.push_back(ISD::FPOW);
       break;
     case Intrinsic::fma:
-      ISD = ISD::FMA;
+      ISDs.push_back(ISD::FMA);
       break;
     case Intrinsic::fmuladd:
-      ISD = ISD::FMA;
+      ISDs.push_back(ISD::FMA);
       break;
     // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
     case Intrinsic::lifetime_start:
@@ -727,7 +732,7 @@ public:
       return static_cast<T *>(this)
           ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
     case Intrinsic::ctpop:
-      ISD = ISD::CTPOP;
+      ISDs.push_back(ISD::CTPOP);
       // In case of legalization use TCC_Expensive. This is cheaper than a
       // library call but still not a cheap instruction.
       SingleCallCost = TargetTransformInfo::TCC_Expensive;
@@ -738,26 +743,36 @@ public:
     const TargetLoweringBase *TLI = getTLI();
     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
 
-    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-      if (IID == Intrinsic::fabs &&
-          TLI->isFAbsFree(LT.second)) {
-        return 0;
-      }
+    SmallVector<unsigned, 2> LegalCost;
+    SmallVector<unsigned, 2> CustomCost;
+    for (unsigned ISD : ISDs) {
+      if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+        if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
+          return 0;
+        }
 
-      // The operation is legal. Assume it costs 1.
-      // If the type is split to multiple registers, assume that there is some
-      // overhead to this.
-      // TODO: Once we have extract/insert subvector cost we need to use them.
-      if (LT.first > 1)
-        return LT.first * 2;
-      return LT.first * 1;
+        // The operation is legal. Assume it costs 1.
+        // If the type is split to multiple registers, assume that there is some
+        // overhead to this.
+        // TODO: Once we have extract/insert subvector cost we need to use them.
+        if (LT.first > 1)
+          LegalCost.push_back(LT.first * 2);
+        else
+          LegalCost.push_back(LT.first * 1);
+      } else if (!TLI->isOperationExpand(ISD, LT.second)) {
+        // If the operation is custom lowered then assume
+        // that the code is twice as expensive.
+        CustomCost.push_back(LT.first * 2);
+      }
     }
 
-    if (!TLI->isOperationExpand(ISD, LT.second)) {
-      // If the operation is custom lowered then assume
-      // thare the code is twice as expensive.
-      return LT.first * 2;
-    }
+    auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
+    if (MinLegalCostI != LegalCost.end())
+      return *MinLegalCostI;
+
+    auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
+    if (MinCustomCostI != CustomCost.end())
+      return *MinCustomCostI;
 
     // If we can't lower fmuladd into an FMA estimate the cost as a floating
     // point mul followed by an add.
@@ -781,7 +796,7 @@ public:
         ScalarTys.push_back(Ty);
       }
       unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
-          IID, RetTy->getScalarType(), ScalarTys);
+          IID, RetTy->getScalarType(), ScalarTys, FMF);
       for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
         if (Tys[i]->isVectorTy()) {
           ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index 0383cbfbbe4..36a1db664e1 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -504,8 +504,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
       for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
         Args.push_back(II->getArgOperand(J));
 
+      FastMathFlags FMF;
+      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+        FMF = FPMO->getFastMathFlags();
+
       return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
-                                        Args);
+                                        Args, FMF);
     }
     return -1;
   default:
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index b64d4133420..48e441bac69 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -315,15 +315,17 @@ int TargetTransformInfo::getInterleavedMemoryOpCost(
 }
 
 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                               ArrayRef<Type *> Tys) const {
-  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+                                               ArrayRef<Type *> Tys,
+                                               FastMathFlags FMF) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
 
 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                               ArrayRef<Value *> Args) const {
-  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args);
+                                               ArrayRef<Value *> Args,
+                                               FastMathFlags FMF) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3a4a3301377..ed193198667 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5143,18 +5143,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getValue(I.getArgOperand(0))));
     return nullptr;
   }
-  case Intrinsic::minnum:
-    setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
-                             getValue(I.getArgOperand(0)).getValueType(),
+  case Intrinsic::minnum: {
+    auto VT = getValue(I.getArgOperand(0)).getValueType();
+    unsigned Opc =
+        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)
+            ? ISD::FMINNAN
+            : ISD::FMINNUM;
+    setValue(&I, DAG.getNode(Opc, sdl, VT,
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1))));
     return nullptr;
-  case Intrinsic::maxnum:
-    setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
-                             getValue(I.getArgOperand(0)).getValueType(),
+  }
+  case Intrinsic::maxnum: {
+    auto VT = getValue(I.getArgOperand(0)).getValueType();
+    unsigned Opc =
+        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)
+            ? ISD::FMAXNAN
+            : ISD::FMAXNUM;
+    setValue(&I, DAG.getNode(Opc, sdl, VT,
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1))));
     return nullptr;
+  }
   case Intrinsic::copysign:
     setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 5d3af4590d8..b43951d3f40 100644
--- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1117,16 +1117,25 @@ namespace {
       }
 
       if (IID && TTI) {
+        FastMathFlags FMFCI;
+        if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
+          FMFCI = FPMOCI->getFastMathFlags();
+
         SmallVector<Type*, 4> Tys;
         for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
           Tys.push_back(CI->getArgOperand(i)->getType());
-        unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys);
+        unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI);
 
         Tys.clear();
         CallInst *CJ = cast<CallInst>(J);
+
+        FastMathFlags FMFCJ;
+        if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
+          FMFCJ = FPMOCJ->getFastMathFlags();
+
         for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
           Tys.push_back(CJ->getArgOperand(i)->getType());
-        unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys);
+        unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ);
 
         Tys.clear();
         assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
@@ -1140,8 +1149,10 @@ namespace {
                                             CJ->getArgOperand(i)->getType()));
         }
 
+        FastMathFlags FMFV = FMFCI;
+        FMFV &= FMFCJ;
         Type *RetTy = getVecTypeForPair(IT1, JT1);
-        unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys);
+        unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV);
 
         if (VCost > ICost + JCost)
           return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 72e96cdf46d..2c5fec64c18 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3302,7 +3302,11 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
   for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
     Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
 
-  return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+  FastMathFlags FMF;
+  if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
+    FMF = FPMO->getFastMathFlags();
+
+  return TTI.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
 }
 
 static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
@@ -4269,7 +4273,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
           }
         }
         assert(VectorF && "Can't create vector function.");
-        Entry[Part] = Builder.CreateCall(VectorF, Args);
+
+        CallInst *V = Builder.CreateCall(VectorF, Args);
+
+        if (isa<FPMathOperator>(V))
+          V->copyFastMathFlags(CI);
+
+        Entry[Part] = V;
       }
 
       addMetadata(Entry, &*it);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0254ef47225..fc060f19eb6 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1659,10 +1659,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
                                          VecTy->getNumElements()));
       }
 
+      FastMathFlags FMF;
+      if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
+        FMF = FPMO->getFastMathFlags();
+
       int ScalarCallCost = VecTy->getNumElements() *
-          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys);
+          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
 
-      int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys);
+      int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF);
 
       DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
             << " (" << VecCallCost  << "-" <<  ScalarCallCost << ")"
diff --git a/llvm/test/CodeGen/ARM/vminmax.ll b/llvm/test/CodeGen/ARM/vminmax.ll
index 011bfd7ff88..e4f30faa917 100644
--- a/llvm/test/CodeGen/ARM/vminmax.ll
+++ b/llvm/test/CodeGen/ARM/vminmax.ll
@@ -291,3 +291,20 @@ declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind read
 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare float @llvm.maxnum.f32(float %a, float %b)
+declare float @llvm.minnum.f32(float %a, float %b)
+
+define float @maxnum(float %a, float %b) {
+;CHECK-LABEL: maxnum:
+;CHECK: vmax.f32
+  %r = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  ret float %r
+}
+
+define float @minnum(float %a, float %b) {
+;CHECK-LABEL: minnum:
+;CHECK: vmin.f32
+  %r = call nnan float @llvm.minnum.f32(float %a, float %b)
+  ret float %r
+}