diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-25 01:00:32 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-25 01:00:32 +0000 |
commit | 9651813ee0d9633e12accb7ae673a8a3b944f35c (patch) | |
tree | 2101cca435e81795769c32e76b5c88c2202625c0 /llvm/lib | |
parent | efe16c8eb4fa5450fba4a1465e01620ba6c2c402 (diff) | |
download | bcm5719-llvm-9651813ee0d9633e12accb7ae673a8a3b944f35c.tar.gz bcm5719-llvm-9651813ee0d9633e12accb7ae673a8a3b944f35c.zip |
AMDGPU: Partially implement getArithmeticInstrCost for FP ops
llvm-svn: 264374
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 64 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 31 |
2 files changed, 94 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 39be33e4a2c..0d107af59bd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -29,6 +29,7 @@ using namespace llvm; #define DEBUG_TYPE "AMDGPUtti" + void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { UP.Threshold = 300; // Twice the default. @@ -84,6 +85,69 @@ unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) { return 64; } +int AMDGPUTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { + + EVT OrigTy = TLI->getValueType(DL, Ty); + if (!OrigTy.isSimple()) { + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); + } + + // Legalize the type. + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); + int ISD = TLI->InstructionOpcodeToISD(Opcode); + + // Because we don't have any legal vector operations, but the legal types, we + // need to account for split vectors. + unsigned NElts = LT.second.isVector() ? + LT.second.getVectorNumElements() : 1; + + MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy; + + switch (ISD) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + if (SLT == MVT::f64) + return LT.first * NElts * get64BitInstrCost(); + + if (SLT == MVT::f32 || SLT == MVT::f16) + return LT.first * NElts * getFullRateInstrCost(); + break; + + case ISD::FDIV: + case ISD::FREM: + // FIXME: frem should be handled separately. The fdiv in it is most of it, + // but the current lowering is also not entirely correct. + if (SLT == MVT::f64) { + int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost(); + + // Add cost of workaround. + if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) + Cost += 3 * getFullRateInstrCost(); + + return LT.first * Cost * NElts; + } + + // Assuming no fp32 denormals lowering. + if (SLT == MVT::f32 || SLT == MVT::f16) { + assert(!ST->hasFP32Denormals() && "will change when supported"); + int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost(); + return LT.first * NElts * Cost; + } + + break; + default: + break; + } + + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); +} + unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) { // XXX - For some reason this isn't called for switch. switch (Opcode) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 05fe2114512..036f627e466 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -21,9 +21,9 @@ #include "AMDGPUTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/Target/TargetLowering.h" namespace llvm { +class AMDGPUTargetLowering; class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT; @@ -36,6 +36,28 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { const AMDGPUSubtarget *getST() const { return ST; } const AMDGPUTargetLowering *getTLI() const { return TLI; } + + static inline int getFullRateInstrCost() { + return TargetTransformInfo::TCC_Basic; + } + + static inline int getHalfRateInstrCost() { + return 2 * TargetTransformInfo::TCC_Basic; + } + + // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe + // should be 2 or 4. + static inline int getQuarterRateInstrCost() { + return 3 * TargetTransformInfo::TCC_Basic; + } + + // On some parts, normal fp64 operations are half rate, and others + // quarter. This also applies to some integer operations. + inline int get64BitInstrCost() const { + return ST->hasHalfRate64Ops() ? + getHalfRateInstrCost() : getQuarterRateInstrCost(); + } + public: explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const DataLayout &DL) : BaseT(TM, DL), ST(TM->getSubtargetImpl()), @@ -61,6 +83,13 @@ public: unsigned getRegisterBitWidth(bool Vector); unsigned getMaxInterleaveFactor(unsigned VF); + int getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); + unsigned getCFInstrCost(unsigned Opcode); int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); |