summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td10
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp57
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h1
5 files changed, 59 insertions, 12 deletions
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 5db76f22d8f..fbca91437ec 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -190,6 +190,13 @@ def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
"Enable POWER9 vector instructions",
[FeatureISA3_0, FeatureP8Vector,
FeatureP9Altivec]>;
+// A separate feature for this even though it is equivalent to P9Vector
+// because this is a feature of the implementation rather than the architecture
+// and may go away with future CPU's.
+def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
+ "VectorsUseTwoUnits",
+ "true",
+ "Vectors use two units">;
// Since new processors generally contain a superset of features of those that
// came before them, the idea is to make implementations of new processors
@@ -222,7 +229,8 @@ def ProcessorFeatures {
list<SubtargetFeature> Power8FeatureList =
!listconcat(Power7FeatureList, Power8SpecificFeatures);
list<SubtargetFeature> Power9SpecificFeatures =
- [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
+ [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0,
+ FeatureVectorsUseTwoUnits];
list<SubtargetFeature> Power9FeatureList =
!listconcat(Power8FeatureList, Power9SpecificFeatures);
}
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 02c73e16e8e..94b80dd5836 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -107,6 +107,7 @@ void PPCSubtarget::initializeEnvironment() {
IsISA3_0 = false;
UseLongCalls = false;
SecurePlt = false;
+ VectorsUseTwoUnits = false;
HasPOPCNTD = POPCNTD_Unavailable;
}
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index cb2efeac5f6..ed9d5f1ae56 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -135,6 +135,7 @@ protected:
bool IsISA3_0;
bool UseLongCalls;
bool SecurePlt;
+ bool VectorsUseTwoUnits;
POPCNTDKind HasPOPCNTD;
@@ -259,6 +260,7 @@ public:
bool isPPC4xx() const { return IsPPC4xx; }
bool isPPC6xx() const { return IsPPC6xx; }
bool isSecurePlt() const {return SecurePlt; }
+ bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
bool isE500() const { return IsE500; }
bool isFeatureMFTB() const { return FeatureMFTB; }
bool isDeprecatedDST() const { return DeprecatedDST; }
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index e0c4322e104..990b2cc29b1 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -323,6 +323,32 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
+// Adjust the cost of vector instructions on targets which there is overlap
+// between the vector and scalar units, thereby reducing the overall throughput
+// of vector code wrt. scalar code.
+int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
+ Type *Ty2) {
+ if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
+ return Cost;
+
+ std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
+ // If type legalization involves splitting the vector, we don't want to
+ // double the cost at every step - only the last step.
+ if (LT1.first != 1 || !LT1.second.isVector())
+ return Cost;
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (TLI->isOperationExpand(ISD, LT1.second))
+ return Cost;
+
+ if (Ty2) {
+ std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
+ if (LT2.first != 1 || !LT2.second.isVector())
+ return Cost;
+ }
+
+ return Cost * 2;
+}
+
int PPCTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
@@ -330,8 +356,9 @@ int PPCTTIImpl::getArithmeticInstrCost(
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
- Opd1PropInfo, Opd2PropInfo);
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
+ return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
}
int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -344,19 +371,22 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
// structured types of shuffles covered by TTI::ShuffleKind).
- return LT.first;
+ return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
+ nullptr);
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return vectorCostAdjustment(Cost, Opcode, Dst, Src);
}
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I) {
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
}
int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -365,18 +395,22 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
+ Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);
+
if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
- // Double-precision scalars are already located in index #0.
- if (Index == 0)
+ // Double-precision scalars are already located in index #0 (or #1 if LE).
+ if (ISD == ISD::EXTRACT_VECTOR_ELT && Index == ST->isLittleEndian() ? 1 : 0)
return 0;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
+
} else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
// Floating point scalars are already located in index #0.
if (Index == 0)
return 0;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
}
// Estimated cost of a load-hit-store delay. This was obtained
@@ -393,9 +427,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
// these need to be estimated as very costly.
if (ISD == ISD::EXTRACT_VECTOR_ELT ||
ISD == ISD::INSERT_VECTOR_ELT)
- return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return LHSPenalty + Cost;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
}
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -406,6 +440,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
"Invalid Opcode");
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
bool IsAltivecType = ST->hasAltivec() &&
(LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index de369d2ee8c..a8d9d092854 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -70,6 +70,7 @@ public:
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
unsigned getMaxInterleaveFactor(unsigned VF);
+ int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
OpenPOWER on IntegriCloud