summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/CostModel.cpp11
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp35
-rw-r--r--llvm/lib/Transforms/Vectorize/BBVectorize.cpp55
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp20
5 files changed, 122 insertions, 12 deletions
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index 543977a376f..898da8d0e8d 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -98,15 +98,20 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
TargetTransformInfo::OperandValueKind OpInfo =
TargetTransformInfo::OK_AnyValue;
- // Check for a splat of a constant.
+ // Check for a splat of a constant or for a non uniform vector of constants.
ConstantDataVector *CDV = 0;
- if ((CDV = dyn_cast<ConstantDataVector>(V)))
+ if ((CDV = dyn_cast<ConstantDataVector>(V))) {
+ OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
if (CDV->getSplatValue() != NULL)
OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+ }
+
ConstantVector *CV = 0;
- if ((CV = dyn_cast<ConstantVector>(V)))
+ if ((CV = dyn_cast<ConstantVector>(V))) {
+ OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
if (CV->getSplatValue() != NULL)
OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+ }
return OpInfo;
}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 207a7685c59..d50bab99ff3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -225,6 +225,13 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
+ if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+ // On AVX2, a packed v16i16 shift left by a constant build_vector
+ // is lowered into a vector multiply (vpmullw).
+ return LT.first;
+
int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * AVX2CostTable[Idx].Cost;
@@ -257,6 +264,20 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return LT.first * SSE2UniformConstCostTable[Idx].Cost;
}
+ if (ISD == ISD::SHL &&
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
+ EVT VT = LT.second;
+ if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
+ (VT == MVT::v4i32 && ST->hasSSE41()))
+ // Vector shift left by non uniform constant can be lowered
+ // into vector multiply (pmullw/pmulld).
+ return LT.first;
+ if (VT == MVT::v4i32 && ST->hasSSE2())
+ // A vector shift left by non uniform constant is converted
+ // into a vector multiply; the new multiply is eventually
+ // lowered into a sequence of shuffles and 2 x pmuludq.
+ ISD = ISD::MUL;
+ }
static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
@@ -271,6 +292,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
+ { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
{ ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
{ ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
@@ -308,6 +330,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
+ { ISD::MUL, MVT::v16i16, 4 },
{ ISD::MUL, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v8i32, 4 },
{ ISD::ADD, MVT::v8i32, 4 },
@@ -323,7 +346,15 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Look for AVX1 lowering tricks.
if (ST->hasAVX() && !ST->hasAVX2()) {
- int Idx = CostTableLookup(AVX1CostTable, ISD, LT.second);
+ EVT VT = LT.second;
+
+ // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
+ // sequence of extract + two vector multiply + insert.
+ if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
+ ISD = ISD::MUL;
+
+ int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
if (Idx != -1)
return LT.first * AVX1CostTable[Idx].Cost;
}
@@ -343,7 +374,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// 2x pmuludq, 2x shuffle.
if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
!ST->hasSSE41())
- return 6;
+ return LT.first * 6;
// Fallback to the default implementation.
return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 0cc1f3962a4..f59dd2160a9 100644
--- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -532,7 +532,11 @@ namespace {
// Returns the cost of the provided instruction using TTI.
// This does not handle loads and stores.
- unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) {
+ unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2,
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_AnyValue) {
switch (Opcode) {
default: break;
case Instruction::GetElementPtr:
@@ -562,7 +566,7 @@ namespace {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- return TTI->getArithmeticInstrCost(Opcode, T1);
+ return TTI->getArithmeticInstrCost(Opcode, T1, Op1VK, Op2VK);
case Instruction::Select:
case Instruction::ICmp:
case Instruction::FCmp:
@@ -1013,13 +1017,58 @@ namespace {
unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
Type *VT1 = getVecTypeForPair(IT1, JT1),
*VT2 = getVecTypeForPair(IT2, JT2);
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_AnyValue;
+
+ // On some targets (example X86) the cost of a vector shift may vary
+ // depending on whether the second operand is a Uniform or
+ // NonUniform Constant.
+ switch (I->getOpcode()) {
+ default : break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+
+ // If both I and J are scalar shifts by constant, then the
+ // merged vector shift count would be either a constant splat value
+ // or a non-uniform vector of constants.
+ if (ConstantInt *CII = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (ConstantInt *CIJ = dyn_cast<ConstantInt>(J->getOperand(1)))
+ Op2VK = CII == CIJ ? TargetTransformInfo::OK_UniformConstantValue :
+ TargetTransformInfo::OK_NonUniformConstantValue;
+ } else {
+ // Check for a splat of a constant or for a non uniform vector
+ // of constants.
+ Value *IOp = I->getOperand(1);
+ Value *JOp = J->getOperand(1);
+ if (ConstantDataVector *CDVI = dyn_cast<ConstantDataVector>(IOp)) {
+ if (ConstantDataVector *CDVJ = dyn_cast<ConstantDataVector>(JOp)) {
+ Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
+ Constant *SplatValue = CDVI->getSplatValue();
+ if (SplatValue != NULL && SplatValue == CDVJ->getSplatValue())
+ Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ }
+ }
+
+ if (ConstantVector *CVI = dyn_cast<ConstantVector>(IOp)) {
+ if (ConstantVector *CVJ = dyn_cast<ConstantVector>(JOp)) {
+ Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
+ Constant *SplatValue = CVI->getSplatValue();
+ if (SplatValue != NULL && SplatValue == CVJ->getSplatValue())
+ Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ }
+ }
+ }
+ }
// Note that this procedure is incorrect for insert and extract element
// instructions (because combining these often results in a shuffle),
// but this cost is ignored (because insert and extract element
// instructions are assigned a zero depth factor and are not really
// fused in general).
- unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
+ unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK);
if (VCost > ICost + JCost)
return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b52970119a5..ecbab63acf0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5491,9 +5491,20 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_AnyValue;
+ Value *Op2 = I->getOperand(1);
- if (isa<ConstantInt>(I->getOperand(1)))
+ // Check for a splat of a constant or for a non uniform vector of constants.
+ if (isa<ConstantInt>(Op2))
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ else if (ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(Op2)) {
+ Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
+ if (CDV->getSplatValue() != NULL)
+ Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op2)) {
+ Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
+ if (CV->getSplatValue() != NULL)
+ Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ }
return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9eadfb58bea..80826bd6b17 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1044,12 +1044,26 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
- // Check whether all second operands are constant.
- for (unsigned i = 0; i < VL.size(); ++i)
- if (!isa<ConstantInt>(cast<Instruction>(VL[i])->getOperand(1))) {
+ // If all operands are exactly the same ConstantInt then set the
+ // operand kind to OK_UniformConstantValue.
+ // If instead not all operands are constants, then set the operand kind
+ // to OK_AnyValue. If all operands are constants but not the same,
+ // then set the operand kind to OK_NonUniformConstantValue.
+ ConstantInt *CInt = NULL;
+ for (unsigned i = 0; i < VL.size(); ++i) {
+ const Instruction *I = cast<Instruction>(VL[i]);
+ if (!isa<ConstantInt>(I->getOperand(1))) {
Op2VK = TargetTransformInfo::OK_AnyValue;
break;
}
+ if (i == 0) {
+ CInt = cast<ConstantInt>(I->getOperand(1));
+ continue;
+ }
+ if (Op2VK == TargetTransformInfo::OK_UniformConstantValue &&
+ CInt != cast<ConstantInt>(I->getOperand(1)))
+ Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
+ }
ScalarCost =
VecTy->getNumElements() *
OpenPOWER on IntegriCloud