[CodeGen] Teach LLVM how to lower @llvm.{min,max}num to {MIN,MAX}NAN

The behavior of {MIN,MAX}NAN differs from that of {MIN,MAX}NUM when only one of the inputs is NaN: -NUM will return the non-NaN argument while -NAN would return NaN. It is desirable to lower to @llvm.{min,max}num to -NAN if they don't have a native instruction for -NUM. Notably, ARMv7 NEON's vmin has the -NAN semantics. N.B. Of course, it is only safe to do this if the intrinsic call is marked nnan. llvm-svn: 266279
author: David Majnemer <david.majnemer@gmail.com> 2016-04-14 07:13:24 +0000
committer: David Majnemer <david.majnemer@gmail.com> 2016-04-14 07:13:24 +0000
commit: 0f26b0aeb4a91418d4c273bb25ab22f3b416a960 (patch)
tree: f4a1cfadf86bfb6a4a6aa20c9df02cea75ebfbef /llvm/lib/Transforms
parent: d871531687b062862234a3346b50f1824e27ed3b (diff)
download: bcm5719-llvm-0f26b0aeb4a91418d4c273bb25ab22f3b416a960.tar.gz
bcm5719-llvm-0f26b0aeb4a91418d4c273bb25ab22f3b416a960.zip
3 files changed, 32 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 5d3af4590d8..b43951d3f40 100644
--- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1117,16 +1117,25 @@ namespace {
       }
 
       if (IID && TTI) {
+        FastMathFlags FMFCI;
+        if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
+          FMFCI = FPMOCI->getFastMathFlags();
+
         SmallVector<Type*, 4> Tys;
         for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
           Tys.push_back(CI->getArgOperand(i)->getType());
-        unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys);
+        unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI);
 
         Tys.clear();
         CallInst *CJ = cast<CallInst>(J);
+
+        FastMathFlags FMFCJ;
+        if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
+          FMFCJ = FPMOCJ->getFastMathFlags();
+
         for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
           Tys.push_back(CJ->getArgOperand(i)->getType());
-        unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys);
+        unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ);
 
         Tys.clear();
         assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
@@ -1140,8 +1149,10 @@ namespace {
                                             CJ->getArgOperand(i)->getType()));
         }
 
+        FastMathFlags FMFV = FMFCI;
+        FMFV &= FMFCJ;
         Type *RetTy = getVecTypeForPair(IT1, JT1);
-        unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys);
+        unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV);
 
         if (VCost > ICost + JCost)
           return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 72e96cdf46d..2c5fec64c18 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3302,7 +3302,11 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
   for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
     Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
 
-  return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+  FastMathFlags FMF;
+  if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
+    FMF = FPMO->getFastMathFlags();
+
+  return TTI.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
 }
 
 static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
@@ -4269,7 +4273,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
           }
         }
         assert(VectorF && "Can't create vector function.");
-        Entry[Part] = Builder.CreateCall(VectorF, Args);
+
+        CallInst *V = Builder.CreateCall(VectorF, Args);
+
+        if (isa<FPMathOperator>(V))
+          V->copyFastMathFlags(CI);
+
+        Entry[Part] = V;
       }
 
       addMetadata(Entry, &*it);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0254ef47225..fc060f19eb6 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1659,10 +1659,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
                                          VecTy->getNumElements()));
       }
 
+      FastMathFlags FMF;
+      if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
+        FMF = FPMO->getFastMathFlags();
+
       int ScalarCallCost = VecTy->getNumElements() *
-          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys);
+          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
 
-      int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys);
+      int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF);
 
       DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
             << " (" << VecCallCost  << "-" <<  ScalarCallCost << ")"
author	David Majnemer <david.majnemer@gmail.com>	2016-04-14 07:13:24 +0000
committer	David Majnemer <david.majnemer@gmail.com>	2016-04-14 07:13:24 +0000
commit	0f26b0aeb4a91418d4c273bb25ab22f3b416a960 (patch)
tree	f4a1cfadf86bfb6a4a6aa20c9df02cea75ebfbef /llvm/lib/Transforms
parent	d871531687b062862234a3346b50f1824e27ed3b (diff)
download	bcm5719-llvm-0f26b0aeb4a91418d4c273bb25ab22f3b416a960.tar.gz bcm5719-llvm-0f26b0aeb4a91418d4c273bb25ab22f3b416a960.zip