summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-08-31 05:47:00 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-08-31 05:47:00 +0000
commit376f1bd73cdc8219b288400798e1f0d205d2ca77 (patch)
tree7b1a0ecc736e2b691dcdfcd65cbafb86c1fc216e /llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
parent285f265c0903f004e302892150862fdadae6e263 (diff)
downloadbcm5719-llvm-376f1bd73cdc8219b288400798e1f0d205d2ca77.tar.gz
bcm5719-llvm-376f1bd73cdc8219b288400798e1f0d205d2ca77.zip
AMDGPU: Don't assert in TTI with fp32 denorms enabled
Also refine for f16 and rcp cases. llvm-svn: 312213
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp28
1 files changed, 25 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0930ed1c8a6..d607fc54e8e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/SubtargetFeature.h"
@@ -353,7 +354,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
// but the current lowering is also not entirely correct.
if (SLT == MVT::f64) {
int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
-
// Add cost of workaround.
if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
Cost += 3 * getFullRateInstrCost();
@@ -361,10 +361,32 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
return LT.first * Cost * NElts;
}
- // Assuming no fp32 denormals lowering.
+ if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) {
+ // TODO: This is more complicated, unsafe flags etc.
+ if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) ||
+ (SLT == MVT::f16 && ST->has16BitInsts())) {
+ return LT.first * getQuarterRateInstrCost() * NElts;
+ }
+ }
+
+ if (SLT == MVT::f16 && ST->has16BitInsts()) {
+ // 2 x v_cvt_f32_f16
+ // f32 rcp
+ // f32 fmul
+ // v_cvt_f16_f32
+ // f16 div_fixup
+ int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost();
+ return LT.first * Cost * NElts;
+ }
+
if (SLT == MVT::f32 || SLT == MVT::f16) {
- assert(!ST->hasFP32Denormals() && "will change when supported");
int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
+
+ if (!ST->hasFP32Denormals()) {
+ // FP mode switches.
+ Cost += 2 * getFullRateInstrCost();
+ }
+
return LT.first * NElts * Cost;
}
break;
OpenPOWER on IntegriCloud