diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-08-31 05:47:00 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-08-31 05:47:00 +0000 |
commit | 376f1bd73cdc8219b288400798e1f0d205d2ca77 (patch) | |
tree | 7b1a0ecc736e2b691dcdfcd65cbafb86c1fc216e /llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | |
parent | 285f265c0903f004e302892150862fdadae6e263 (diff) | |
download | bcm5719-llvm-376f1bd73cdc8219b288400798e1f0d205d2ca77.tar.gz bcm5719-llvm-376f1bd73cdc8219b288400798e1f0d205d2ca77.zip |
AMDGPU: Don't assert in TTI with fp32 denorms enabled
Also refine for f16 and rcp cases.
llvm-svn: 312213
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0930ed1c8a6..d607fc54e8e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/SubtargetFeature.h" @@ -353,7 +354,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost( // but the current lowering is also not entirely correct. if (SLT == MVT::f64) { int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost(); - // Add cost of workaround. if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) Cost += 3 * getFullRateInstrCost(); @@ -361,10 +361,32 @@ int AMDGPUTTIImpl::getArithmeticInstrCost( return LT.first * Cost * NElts; } - // Assuming no fp32 denormals lowering. + if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) { + // TODO: This is more complicated, unsafe flags etc. + if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) || + (SLT == MVT::f16 && ST->has16BitInsts())) { + return LT.first * getQuarterRateInstrCost() * NElts; + } + } + + if (SLT == MVT::f16 && ST->has16BitInsts()) { + // 2 x v_cvt_f32_f16 + // f32 rcp + // f32 fmul + // v_cvt_f16_f32 + // f16 div_fixup + int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost(); + return LT.first * Cost * NElts; + } + if (SLT == MVT::f32 || SLT == MVT::f16) { - assert(!ST->hasFP32Denormals() && "will change when supported"); int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost(); + + if (!ST->hasFP32Denormals()) { + // FP mode switches. + Cost += 2 * getFullRateInstrCost(); + } + return LT.first * NElts * Cost; } break; |