diff options
author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2017-03-14 06:35:36 +0000 |
---|---|---|
committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2017-03-14 06:35:36 +0000 |
commit | a48ea231c06e97ce518cace0fb9b1c1da71977dc (patch) | |
tree | 07214834bd1b8c17d6263f9a1eaeb1e072b09c1c /llvm/lib | |
parent | 9d50e187cd4f0f74010eddea7d2fb6b77139b2b1 (diff) | |
download | bcm5719-llvm-a48ea231c06e97ce518cace0fb9b1c1da71977dc.tar.gz bcm5719-llvm-a48ea231c06e97ce518cace0fb9b1c1da71977dc.zip |
[TargetTransformInfo] getIntrinsicInstrCost() scalarization estimation improved
getIntrinsicInstrCost() used to only compute scalarization cost based on types.
This patch improves this so that the actual arguments are checked when they are
available, in order to handle only unique non-constant operands.
Tests updates:
Analysis/CostModel/X86/arith-fp.ll
Transforms/LoopVectorize/AArch64/interleaved_cost.ll
Transforms/LoopVectorize/ARM/interleaved_cost.ll
The improvement in getOperandsScalarizationOverhead() to differentiate on
constants made it necessary to update the interleaved_cost.ll tests even
though they do not relate to intrinsics.
Review: Hal Finkel
https://reviews.llvm.org/D29540
llvm-svn: 297705
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/CostModel.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/BBVectorize.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 11 |
7 files changed, 49 insertions, 41 deletions
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index 6b77397956c..757a1e50284 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -542,9 +542,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - SmallVector<Value *, 4> Args; - for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) - Args.push_back(II->getArgOperand(J)); + SmallVector<Value *, 4> Args(II->arg_operands()); FastMathFlags FMF; if (auto *FPMO = dyn_cast<FPMathOperator>(II)) diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 3e1c1457b6d..4249b0cbe85 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -378,17 +378,17 @@ int TargetTransformInfo::getInterleavedMemoryOpCost( } int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys, - FastMathFlags FMF) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF); + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, + ScalarizationCostPassed); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, - FastMathFlags FMF) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF); + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 7629e0c95c6..7a92ddff253 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1370,7 +1370,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { } int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF) { + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) { // Costs should match the codegen from: // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll // BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll @@ -1551,12 +1552,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) return LT.first * Entry->Cost; - return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF); + return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed); } int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Value *> Args, FastMathFlags FMF) { - return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF); + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { + return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF); } int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 63a1493002f..0622fcf2815 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -74,9 +74,11 @@ public: const SCEV *Ptr); int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF); + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX); int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Value *> Args, FastMathFlags FMF); + ArrayRef<Value *> Args, FastMathFlags FMF, + unsigned VF = 1); int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm); diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp index c01740b27d5..705e1533275 100644 --- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1127,39 +1127,51 @@ namespace { FastMathFlags FMFCI; if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI)) FMFCI = FPMOCI->getFastMathFlags(); + SmallVector<Value *, 4> IArgs(CI->arg_operands()); + unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI); - SmallVector<Type*, 4> Tys; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) - Tys.push_back(CI->getArgOperand(i)->getType()); - unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI); - - Tys.clear(); CallInst *CJ = cast<CallInst>(J); FastMathFlags FMFCJ; if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ)) FMFCJ = FPMOCJ->getFastMathFlags(); - for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i) - Tys.push_back(CJ->getArgOperand(i)->getType()); - unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ); + SmallVector<Value *, 4> JArgs(CJ->arg_operands()); + unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ); - Tys.clear(); assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && "Intrinsic argument counts differ"); + SmallVector<Type*, 4> Tys; + SmallVector<Value *, 4> VecArgs; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || - IID == Intrinsic::cttz) && i == 1) + IID == Intrinsic::cttz) && i == 1) { Tys.push_back(CI->getArgOperand(i)->getType()); - else + VecArgs.push_back(CI->getArgOperand(i)); + } + else { Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(), CJ->getArgOperand(i)->getType())); + // Add both operands, and then count their scalarization overhead + // with VF 1. + VecArgs.push_back(CI->getArgOperand(i)); + VecArgs.push_back(CJ->getArgOperand(i)); + } } + // Compute the scalarization cost here with the original operands (to + // check for uniqueness etc), and then call getIntrinsicInstrCost() + // with the constructed vector types. + Type *RetTy = getVecTypeForPair(IT1, JT1); + unsigned ScalarizationCost = 0; + if (!RetTy->isVoidTy()) + ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false); + ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1); + FastMathFlags FMFV = FMFCI; FMFV &= FMFCJ; - Type *RetTy = getVecTypeForPair(IT1, JT1); - unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV); + unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV, + ScalarizationCost); if (VCost > ICost + JCost) return false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 43dca02f65f..c13bcea737e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3784,16 +3784,12 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF, Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); assert(ID && "Expected intrinsic call!"); - Type *RetTy = ToVectorTy(CI->getType(), VF); - SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI->arg_operands()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); - FastMathFlags FMF; if (auto *FPMO = dyn_cast<FPMathOperator>(CI)) FMF = FPMO->getFastMathFlags(); - return TTI.getIntrinsicInstrCost(ID, RetTy, Tys, FMF); + SmallVector<Value *, 4> Operands(CI->arg_operands()); + return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF); } static Type *smallestIntegerVectorType(Type *T1, Type *T2) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 40adf2e79be..b9df89e3eec 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1877,12 +1877,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. - SmallVector<Type*, 4> ScalarTys, VecTys; - for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) { + SmallVector<Type*, 4> ScalarTys; + for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) ScalarTys.push_back(CI->getArgOperand(op)->getType()); - VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(), - VecTy->getNumElements())); - } FastMathFlags FMF; if (auto *FPMO = dyn_cast<FPMathOperator>(CI)) @@ -1891,7 +1888,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { int ScalarCallCost = VecTy->getNumElements() * TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); - int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF); + SmallVector<Value *, 4> Args(CI->arg_operands()); + int VecCallCost = TTI->getIntrinsicInstrCost(ID, CI->getType(), Args, FMF, + VecTy->getNumElements()); DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost << " (" << VecCallCost << "-" << ScalarCallCost << ")" |