summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorJonas Paulsson <paulsson@linux.vnet.ibm.com>2017-03-14 06:35:36 +0000
committerJonas Paulsson <paulsson@linux.vnet.ibm.com>2017-03-14 06:35:36 +0000
commita48ea231c06e97ce518cace0fb9b1c1da71977dc (patch)
tree07214834bd1b8c17d6263f9a1eaeb1e072b09c1c /llvm/lib
parent9d50e187cd4f0f74010eddea7d2fb6b77139b2b1 (diff)
downloadbcm5719-llvm-a48ea231c06e97ce518cace0fb9b1c1da71977dc.tar.gz
bcm5719-llvm-a48ea231c06e97ce518cace0fb9b1c1da71977dc.zip
[TargetTransformInfo] getIntrinsicInstrCost() scalarization estimation improved
getIntrinsicInstrCost() used to only compute scalarization cost based on types. This patch improves this so that the actual arguments are checked when they are available, in order to handle only unique non-constant operands. Tests updates: Analysis/CostModel/X86/arith-fp.ll Transforms/LoopVectorize/AArch64/interleaved_cost.ll Transforms/LoopVectorize/ARM/interleaved_cost.ll The improvement in getOperandsScalarizationOverhead() to differentiate on constants made it necessary to update the interleaved_cost.ll tests even though they do not relate to intrinsics. Review: Hal Finkel https://reviews.llvm.org/D29540 llvm-svn: 297705
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/CostModel.cpp4
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp12
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp9
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h6
-rw-r--r--llvm/lib/Transforms/Vectorize/BBVectorize.cpp40
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp11
7 files changed, 49 insertions, 41 deletions
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index 6b77397956c..757a1e50284 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -542,9 +542,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
}
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- SmallVector<Value *, 4> Args;
- for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
- Args.push_back(II->getArgOperand(J));
+ SmallVector<Value *, 4> Args(II->arg_operands());
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 3e1c1457b6d..4249b0cbe85 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -378,17 +378,17 @@ int TargetTransformInfo::getInterleavedMemoryOpCost(
}
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys,
- FastMathFlags FMF) const {
- int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) const {
+ int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+ ScalarizationCostPassed);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args,
- FastMathFlags FMF) const {
- int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
+ int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7629e0c95c6..7a92ddff253 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1370,7 +1370,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
}
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF) {
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
// Costs should match the codegen from:
// BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
// BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll
@@ -1551,12 +1552,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
- return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF);
+ return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed);
}
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF) {
- return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
+ return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF);
}
int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 63a1493002f..0622fcf2815 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -74,9 +74,11 @@ public:
const SCEV *Ptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF);
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF = 1);
int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index c01740b27d5..705e1533275 100644
--- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1127,39 +1127,51 @@ namespace {
FastMathFlags FMFCI;
if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
FMFCI = FPMOCI->getFastMathFlags();
+ SmallVector<Value *, 4> IArgs(CI->arg_operands());
+ unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI);
- SmallVector<Type*, 4> Tys;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
- Tys.push_back(CI->getArgOperand(i)->getType());
- unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI);
-
- Tys.clear();
CallInst *CJ = cast<CallInst>(J);
FastMathFlags FMFCJ;
if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
FMFCJ = FPMOCJ->getFastMathFlags();
- for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
- Tys.push_back(CJ->getArgOperand(i)->getType());
- unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ);
+ SmallVector<Value *, 4> JArgs(CJ->arg_operands());
+ unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ);
- Tys.clear();
assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
"Intrinsic argument counts differ");
+ SmallVector<Type*, 4> Tys;
+ SmallVector<Value *, 4> VecArgs;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
- IID == Intrinsic::cttz) && i == 1)
+ IID == Intrinsic::cttz) && i == 1) {
Tys.push_back(CI->getArgOperand(i)->getType());
- else
+ VecArgs.push_back(CI->getArgOperand(i));
+ }
+ else {
Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
CJ->getArgOperand(i)->getType()));
+ // Add both operands, and then count their scalarization overhead
+ // with VF 1.
+ VecArgs.push_back(CI->getArgOperand(i));
+ VecArgs.push_back(CJ->getArgOperand(i));
+ }
}
+ // Compute the scalarization cost here with the original operands (to
+ // check for uniqueness etc), and then call getIntrinsicInstrCost()
+ // with the constructed vector types.
+ Type *RetTy = getVecTypeForPair(IT1, JT1);
+ unsigned ScalarizationCost = 0;
+ if (!RetTy->isVoidTy())
+ ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false);
+ ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1);
+
FastMathFlags FMFV = FMFCI;
FMFV &= FMFCJ;
- Type *RetTy = getVecTypeForPair(IT1, JT1);
- unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV);
+ unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV,
+ ScalarizationCost);
if (VCost > ICost + JCost)
return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 43dca02f65f..c13bcea737e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3784,16 +3784,12 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
assert(ID && "Expected intrinsic call!");
- Type *RetTy = ToVectorTy(CI->getType(), VF);
- SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI->arg_operands())
- Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
-
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
FMF = FPMO->getFastMathFlags();
- return TTI.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+ SmallVector<Value *, 4> Operands(CI->arg_operands());
+ return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF);
}
static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 40adf2e79be..b9df89e3eec 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1877,12 +1877,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
- SmallVector<Type*, 4> ScalarTys, VecTys;
- for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) {
+ SmallVector<Type*, 4> ScalarTys;
+ for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op)
ScalarTys.push_back(CI->getArgOperand(op)->getType());
- VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
- VecTy->getNumElements()));
- }
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
@@ -1891,7 +1888,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
int ScalarCallCost = VecTy->getNumElements() *
TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
- int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF);
+ SmallVector<Value *, 4> Args(CI->arg_operands());
+ int VecCallCost = TTI->getIntrinsicInstrCost(ID, CI->getType(), Args, FMF,
+ VecTy->getNumElements());
DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
OpenPOWER on IntegriCloud