diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPC.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 57 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 1 |
5 files changed, 59 insertions, 12 deletions
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 5db76f22d8f..fbca91437ec 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -190,6 +190,13 @@ def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true", "Enable POWER9 vector instructions", [FeatureISA3_0, FeatureP8Vector, FeatureP9Altivec]>; +// A separate feature for this even though it is equivalent to P9Vector +// because this is a feature of the implementation rather than the architecture +// and may go away with future CPU's. +def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units", + "VectorsUseTwoUnits", + "true", + "Vectors use two units">; // Since new processors generally contain a superset of features of those that // came before them, the idea is to make implementations of new processors @@ -222,7 +229,8 @@ def ProcessorFeatures { list<SubtargetFeature> Power8FeatureList = !listconcat(Power7FeatureList, Power8SpecificFeatures); list<SubtargetFeature> Power9SpecificFeatures = - [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0]; + [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0, + FeatureVectorsUseTwoUnits]; list<SubtargetFeature> Power9FeatureList = !listconcat(Power8FeatureList, Power9SpecificFeatures); } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 02c73e16e8e..94b80dd5836 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -107,6 +107,7 @@ void PPCSubtarget::initializeEnvironment() { IsISA3_0 = false; UseLongCalls = false; SecurePlt = false; + VectorsUseTwoUnits = false; HasPOPCNTD = POPCNTD_Unavailable; } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index cb2efeac5f6..ed9d5f1ae56 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -135,6 +135,7 @@ protected: bool IsISA3_0; bool UseLongCalls; bool SecurePlt; + bool VectorsUseTwoUnits; POPCNTDKind HasPOPCNTD; @@ -259,6 +260,7 @@ public: bool isPPC4xx() const { return IsPPC4xx; } bool isPPC6xx() const { return IsPPC6xx; } bool isSecurePlt() const {return SecurePlt; } + bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; } bool isE500() const { return IsE500; } bool isFeatureMFTB() const { return FeatureMFTB; } bool isDeprecatedDST() const { return DeprecatedDST; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index e0c4322e104..990b2cc29b1 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -323,6 +323,32 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { return 2; } +// Adjust the cost of vector instructions on targets which there is overlap +// between the vector and scalar units, thereby reducing the overall throughput +// of vector code wrt. scalar code. +int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, + Type *Ty2) { + if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy()) + return Cost; + + std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1); + // If type legalization involves splitting the vector, we don't want to + // double the cost at every step - only the last step. + if (LT1.first != 1 || !LT1.second.isVector()) + return Cost; + int ISD = TLI->InstructionOpcodeToISD(Opcode); + if (TLI->isOperationExpand(ISD, LT1.second)) + return Cost; + + if (Ty2) { + std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2); + if (LT2.first != 1 || !LT2.second.isVector()) + return Cost; + } + + return Cost * 2; +} + int PPCTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -330,8 +356,9 @@ int PPCTTIImpl::getArithmeticInstrCost( assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo); + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo); + return vectorCostAdjustment(Cost, Opcode, Ty, nullptr); } int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, @@ -344,19 +371,22 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, // instruction). We need one such shuffle instruction for each actual // register (this is not true for arbitrary shuffles, but is true for the // structured types of shuffles covered by TTI::ShuffleKind). - return LT.first; + return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp, + nullptr); } int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); - return BaseT::getCastInstrCost(Opcode, Dst, Src); + int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src); + return vectorCostAdjustment(Cost, Opcode, Dst, Src); } int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I) { - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr); } int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { @@ -365,18 +395,22 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index); + Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr); + if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { - // Double-precision scalars are already located in index #0. - if (Index == 0) + // Double-precision scalars are already located in index #0 (or #1 if LE). + if (ISD == ISD::EXTRACT_VECTOR_ELT && Index == ST->isLittleEndian() ? 1 : 0) return 0; - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return Cost; + } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) { // Floating point scalars are already located in index #0. if (Index == 0) return 0; - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return Cost; } // Estimated cost of a load-hit-store delay. This was obtained @@ -393,9 +427,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { // these need to be estimated as very costly. if (ISD == ISD::EXTRACT_VECTOR_ELT || ISD == ISD::INSERT_VECTOR_ELT) - return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index); + return LHSPenalty + Cost; - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return Cost; } int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, @@ -406,6 +440,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, "Invalid Opcode"); int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr); bool IsAltivecType = ST->hasAltivec() && (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 || diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index de369d2ee8c..a8d9d092854 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -70,6 +70,7 @@ public: unsigned getCacheLineSize(); unsigned getPrefetchDistance(); unsigned getMaxInterleaveFactor(unsigned VF); + int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |