diff options
author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2017-03-14 06:35:36 +0000 |
---|---|---|
committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2017-03-14 06:35:36 +0000 |
commit | a48ea231c06e97ce518cace0fb9b1c1da71977dc (patch) | |
tree | 07214834bd1b8c17d6263f9a1eaeb1e072b09c1c /llvm/lib/Transforms/Vectorize/BBVectorize.cpp | |
parent | 9d50e187cd4f0f74010eddea7d2fb6b77139b2b1 (diff) | |
download | bcm5719-llvm-a48ea231c06e97ce518cace0fb9b1c1da71977dc.tar.gz bcm5719-llvm-a48ea231c06e97ce518cace0fb9b1c1da71977dc.zip |
[TargetTransformInfo] getIntrinsicInstrCost() scalarization estimation improved
getIntrinsicInstrCost() used to only compute scalarization cost based on types.
This patch improves this so that the actual arguments are checked when they are
available, in order to handle only unique non-constant operands.
Tests updates:
Analysis/CostModel/X86/arith-fp.ll
Transforms/LoopVectorize/AArch64/interleaved_cost.ll
Transforms/LoopVectorize/ARM/interleaved_cost.ll
The improvement in getOperandsScalarizationOverhead() to differentiate on
constants made it necessary to update the interleaved_cost.ll tests even
though they do not relate to intrinsics.
Review: Hal Finkel
https://reviews.llvm.org/D29540
llvm-svn: 297705
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/BBVectorize.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/BBVectorize.cpp | 40 |
1 files changed, 26 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp index c01740b27d5..705e1533275 100644 --- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1127,39 +1127,51 @@ namespace { FastMathFlags FMFCI; if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI)) FMFCI = FPMOCI->getFastMathFlags(); + SmallVector<Value *, 4> IArgs(CI->arg_operands()); + unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI); - SmallVector<Type*, 4> Tys; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) - Tys.push_back(CI->getArgOperand(i)->getType()); - unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI); - - Tys.clear(); CallInst *CJ = cast<CallInst>(J); FastMathFlags FMFCJ; if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ)) FMFCJ = FPMOCJ->getFastMathFlags(); - for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i) - Tys.push_back(CJ->getArgOperand(i)->getType()); - unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ); + SmallVector<Value *, 4> JArgs(CJ->arg_operands()); + unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ); - Tys.clear(); assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && "Intrinsic argument counts differ"); + SmallVector<Type*, 4> Tys; + SmallVector<Value *, 4> VecArgs; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || - IID == Intrinsic::cttz) && i == 1) + IID == Intrinsic::cttz) && i == 1) { Tys.push_back(CI->getArgOperand(i)->getType()); - else + VecArgs.push_back(CI->getArgOperand(i)); + } + else { Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(), CJ->getArgOperand(i)->getType())); + // Add both operands, and then count their scalarization overhead + // with VF 1. + VecArgs.push_back(CI->getArgOperand(i)); + VecArgs.push_back(CJ->getArgOperand(i)); + } } + // Compute the scalarization cost here with the original operands (to + // check for uniqueness etc), and then call getIntrinsicInstrCost() + // with the constructed vector types. + Type *RetTy = getVecTypeForPair(IT1, JT1); + unsigned ScalarizationCost = 0; + if (!RetTy->isVoidTy()) + ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false); + ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1); + FastMathFlags FMFV = FMFCI; FMFV &= FMFCJ; - Type *RetTy = getVecTypeForPair(IT1, JT1); - unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV); + unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV, + ScalarizationCost); if (VCost > ICost + JCost) return false; |