diff options
author | David Majnemer <david.majnemer@gmail.com> | 2016-04-14 07:13:24 +0000 |
---|---|---|
committer | David Majnemer <david.majnemer@gmail.com> | 2016-04-14 07:13:24 +0000 |
commit | 0f26b0aeb4a91418d4c273bb25ab22f3b416a960 (patch) | |
tree | f4a1cfadf86bfb6a4a6aa20c9df02cea75ebfbef /llvm/lib/Transforms | |
parent | d871531687b062862234a3346b50f1824e27ed3b (diff) | |
download | bcm5719-llvm-0f26b0aeb4a91418d4c273bb25ab22f3b416a960.tar.gz bcm5719-llvm-0f26b0aeb4a91418d4c273bb25ab22f3b416a960.zip |
[CodeGen] Teach LLVM how to lower @llvm.{min,max}num to {MIN,MAX}NAN
The behavior of {MIN,MAX}NAN differs from that of {MIN,MAX}NUM when only
one of the inputs is NaN: -NUM will return the non-NaN argument while
-NAN would return NaN.
It is desirable to lower to @llvm.{min,max}num to -NAN if they don't
have a native instruction for -NUM. Notably, ARMv7 NEON's vmin has the
-NAN semantics.
N.B. Of course, it is only safe to do this if the intrinsic call is
marked nnan.
llvm-svn: 266279
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/BBVectorize.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 |
3 files changed, 32 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp index 5d3af4590d8..b43951d3f40 100644 --- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1117,16 +1117,25 @@ namespace { } if (IID && TTI) { + FastMathFlags FMFCI; + if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI)) + FMFCI = FPMOCI->getFastMathFlags(); + SmallVector<Type*, 4> Tys; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) Tys.push_back(CI->getArgOperand(i)->getType()); - unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys); + unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI); Tys.clear(); CallInst *CJ = cast<CallInst>(J); + + FastMathFlags FMFCJ; + if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ)) + FMFCJ = FPMOCJ->getFastMathFlags(); + for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i) Tys.push_back(CJ->getArgOperand(i)->getType()); - unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys); + unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ); Tys.clear(); assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && @@ -1140,8 +1149,10 @@ namespace { CJ->getArgOperand(i)->getType())); } + FastMathFlags FMFV = FMFCI; + FMFV &= FMFCJ; Type *RetTy = getVecTypeForPair(IT1, JT1); - unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys); + unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV); if (VCost > ICost + JCost) return false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 72e96cdf46d..2c5fec64c18 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3302,7 +3302,11 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF, for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); - return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(CI)) + FMF = FPMO->getFastMathFlags(); + + return TTI.getIntrinsicInstrCost(ID, RetTy, Tys, FMF); } static Type *smallestIntegerVectorType(Type *T1, Type *T2) { @@ -4269,7 +4273,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { } } assert(VectorF && "Can't create vector function."); - Entry[Part] = Builder.CreateCall(VectorF, Args); + + CallInst *V = Builder.CreateCall(VectorF, Args); + + if (isa<FPMathOperator>(V)) + V->copyFastMathFlags(CI); + + Entry[Part] = V; } addMetadata(Entry, &*it); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0254ef47225..fc060f19eb6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1659,10 +1659,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { VecTy->getNumElements())); } + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(CI)) + FMF = FPMO->getFastMathFlags(); + int ScalarCallCost = VecTy->getNumElements() * - TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys); + TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); - int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys); + int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF); DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost << " (" << VecCallCost << "-" << ScalarCallCost << ")" |