diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-25 01:00:32 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-25 01:00:32 +0000 |
commit | 9651813ee0d9633e12accb7ae673a8a3b944f35c (patch) | |
tree | 2101cca435e81795769c32e76b5c88c2202625c0 /llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | |
parent | efe16c8eb4fa5450fba4a1465e01620ba6c2c402 (diff) | |
download | bcm5719-llvm-9651813ee0d9633e12accb7ae673a8a3b944f35c.tar.gz bcm5719-llvm-9651813ee0d9633e12accb7ae673a8a3b944f35c.zip |
AMDGPU: Partially implement getArithmeticInstrCost for FP ops
llvm-svn: 264374
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 39be33e4a2c..0d107af59bd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -29,6 +29,7 @@ using namespace llvm; #define DEBUG_TYPE "AMDGPUtti" + void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { UP.Threshold = 300; // Twice the default. @@ -84,6 +85,69 @@ unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) { return 64; } +int AMDGPUTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { + + EVT OrigTy = TLI->getValueType(DL, Ty); + if (!OrigTy.isSimple()) { + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); + } + + // Legalize the type. + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); + int ISD = TLI->InstructionOpcodeToISD(Opcode); + + // Because we don't have any legal vector operations, but the legal types, we + // need to account for split vectors. + unsigned NElts = LT.second.isVector() ? + LT.second.getVectorNumElements() : 1; + + MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy; + + switch (ISD) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + if (SLT == MVT::f64) + return LT.first * NElts * get64BitInstrCost(); + + if (SLT == MVT::f32 || SLT == MVT::f16) + return LT.first * NElts * getFullRateInstrCost(); + break; + + case ISD::FDIV: + case ISD::FREM: + // FIXME: frem should be handled separately. The fdiv in it is most of it, + // but the current lowering is also not entirely correct. + if (SLT == MVT::f64) { + int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost(); + + // Add cost of workaround. + if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) + Cost += 3 * getFullRateInstrCost(); + + return LT.first * Cost * NElts; + } + + // Assuming no fp32 denormals lowering. + if (SLT == MVT::f32 || SLT == MVT::f16) { + assert(!ST->hasFP32Denormals() && "will change when supported"); + int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost(); + return LT.first * NElts * Cost; + } + + break; + default: + break; + } + + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); +} + unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) { // XXX - For some reason this isn't called for switch. switch (Opcode) { |