From a48ea231c06e97ce518cace0fb9b1c1da71977dc Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 14 Mar 2017 06:35:36 +0000 Subject: [TargetTransformInfo] getIntrinsicInstrCost() scalarization estimation improved getIntrinsicInstrCost() used to only compute scalarization cost based on types. This patch improves this so that the actual arguments are checked when they are available, in order to handle only unique non-constant operands. Tests updates: Analysis/CostModel/X86/arith-fp.ll Transforms/LoopVectorize/AArch64/interleaved_cost.ll Transforms/LoopVectorize/ARM/interleaved_cost.ll The improvement in getOperandsScalarizationOverhead() to differentiate on constants made it necessary to update the interleaved_cost.ll tests even though they do not relate to intrinsics. Review: Hal Finkel https://reviews.llvm.org/D29540 llvm-svn: 297705 --- llvm/lib/Transforms/Vectorize/BBVectorize.cpp | 40 +++++++++++++++++---------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'llvm/lib/Transforms/Vectorize/BBVectorize.cpp') diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp index c01740b27d5..705e1533275 100644 --- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1127,39 +1127,51 @@ namespace { FastMathFlags FMFCI; if (auto *FPMOCI = dyn_cast(CI)) FMFCI = FPMOCI->getFastMathFlags(); + SmallVector IArgs(CI->arg_operands()); + unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI); - SmallVector Tys; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) - Tys.push_back(CI->getArgOperand(i)->getType()); - unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI); - - Tys.clear(); CallInst *CJ = cast(J); FastMathFlags FMFCJ; if (auto *FPMOCJ = dyn_cast(CJ)) FMFCJ = FPMOCJ->getFastMathFlags(); - for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i) - Tys.push_back(CJ->getArgOperand(i)->getType()); - unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ); + SmallVector JArgs(CJ->arg_operands()); + unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ); - Tys.clear(); assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && "Intrinsic argument counts differ"); + SmallVector Tys; + SmallVector VecArgs; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || - IID == Intrinsic::cttz) && i == 1) + IID == Intrinsic::cttz) && i == 1) { Tys.push_back(CI->getArgOperand(i)->getType()); - else + VecArgs.push_back(CI->getArgOperand(i)); + } + else { Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(), CJ->getArgOperand(i)->getType())); + // Add both operands, and then count their scalarization overhead + // with VF 1. + VecArgs.push_back(CI->getArgOperand(i)); + VecArgs.push_back(CJ->getArgOperand(i)); + } } + // Compute the scalarization cost here with the original operands (to + // check for uniqueness etc), and then call getIntrinsicInstrCost() + // with the constructed vector types. + Type *RetTy = getVecTypeForPair(IT1, JT1); + unsigned ScalarizationCost = 0; + if (!RetTy->isVoidTy()) + ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false); + ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1); + FastMathFlags FMFV = FMFCI; FMFV &= FMFCJ; - Type *RetTy = getVecTypeForPair(IT1, JT1); - unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV); + unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV, + ScalarizationCost); if (VCost > ICost + JCost) return false; -- cgit v1.2.3