summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
diff options
context:
space:
mode:
authorDavid Majnemer <david.majnemer@gmail.com>2016-04-14 07:13:24 +0000
committerDavid Majnemer <david.majnemer@gmail.com>2016-04-14 07:13:24 +0000
commit0f26b0aeb4a91418d4c273bb25ab22f3b416a960 (patch)
treef4a1cfadf86bfb6a4a6aa20c9df02cea75ebfbef /llvm/lib/Transforms/Vectorize/BBVectorize.cpp
parentd871531687b062862234a3346b50f1824e27ed3b (diff)
downloadbcm5719-llvm-0f26b0aeb4a91418d4c273bb25ab22f3b416a960.tar.gz
bcm5719-llvm-0f26b0aeb4a91418d4c273bb25ab22f3b416a960.zip
[CodeGen] Teach LLVM how to lower @llvm.{min,max}num to {MIN,MAX}NAN
The behavior of {MIN,MAX}NAN differs from that of {MIN,MAX}NUM when only one of the inputs is NaN: -NUM will return the non-NaN argument while -NAN would return NaN. It is desirable to lower to @llvm.{min,max}num to -NAN if they don't have a native instruction for -NUM. Notably, ARMv7 NEON's vmin has the -NAN semantics. N.B. Of course, it is only safe to do this if the intrinsic call is marked nnan. llvm-svn: 266279
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/BBVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/BBVectorize.cpp17
1 files changed, 14 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 5d3af4590d8..b43951d3f40 100644
--- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1117,16 +1117,25 @@ namespace {
}
if (IID && TTI) {
+ FastMathFlags FMFCI;
+ if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
+ FMFCI = FPMOCI->getFastMathFlags();
+
SmallVector<Type*, 4> Tys;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
Tys.push_back(CI->getArgOperand(i)->getType());
- unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys);
+ unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI);
Tys.clear();
CallInst *CJ = cast<CallInst>(J);
+
+ FastMathFlags FMFCJ;
+ if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
+ FMFCJ = FPMOCJ->getFastMathFlags();
+
for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
Tys.push_back(CJ->getArgOperand(i)->getType());
- unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys);
+ unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ);
Tys.clear();
assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
@@ -1140,8 +1149,10 @@ namespace {
CJ->getArgOperand(i)->getType()));
}
+ FastMathFlags FMFV = FMFCI;
+ FMFV &= FMFCJ;
Type *RetTy = getVecTypeForPair(IT1, JT1);
- unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys);
+ unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV);
if (VCost > ICost + JCost)
return false;
OpenPOWER on IntegriCloud