summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp19
1 files changed, 11 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0d44f3be539..a5066a0f669 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -412,7 +412,7 @@ int GCNTTIImpl::getArithmeticInstrCost(
if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) {
// TODO: This is more complicated, unsafe flags etc.
- if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) ||
+ if ((SLT == MVT::f32 && !HasFP32Denormals) ||
(SLT == MVT::f16 && ST->has16BitInsts())) {
return LT.first * getQuarterRateInstrCost() * NElts;
}
@@ -431,7 +431,7 @@ int GCNTTIImpl::getArithmeticInstrCost(
if (SLT == MVT::f32 || SLT == MVT::f16) {
int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
- if (!ST->hasFP32Denormals()) {
+ if (!HasFP32Denormals) {
// FP mode switches.
Cost += 2 * getFullRateInstrCost();
}
@@ -671,10 +671,13 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
- const FeatureBitset &CallerBits =
- TM.getSubtargetImpl(*Caller)->getFeatureBits();
- const FeatureBitset &CalleeBits =
- TM.getSubtargetImpl(*Callee)->getFeatureBits();
+ const GCNSubtarget *CallerST
+ = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Caller));
+ const GCNSubtarget *CalleeST
+ = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Callee));
+
+ const FeatureBitset &CallerBits = CallerST->getFeatureBits();
+ const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
@@ -683,8 +686,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
// FIXME: dx10_clamp can just take the caller setting, but there seems to be
// no way to support merge for backend defined attributes.
- AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
- AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
+ AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
+ AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
return CallerMode.isInlineCompatible(CalleeMode);
}
OpenPOWER on IntegriCloud