diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0d44f3be539..a5066a0f669 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -412,7 +412,7 @@ int GCNTTIImpl::getArithmeticInstrCost( if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) { // TODO: This is more complicated, unsafe flags etc. - if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) || + if ((SLT == MVT::f32 && !HasFP32Denormals) || (SLT == MVT::f16 && ST->has16BitInsts())) { return LT.first * getQuarterRateInstrCost() * NElts; } @@ -431,7 +431,7 @@ int GCNTTIImpl::getArithmeticInstrCost( if (SLT == MVT::f32 || SLT == MVT::f16) { int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost(); - if (!ST->hasFP32Denormals()) { + if (!HasFP32Denormals) { // FP mode switches. Cost += 2 * getFullRateInstrCost(); } @@ -671,10 +671,13 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, bool GCNTTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { const TargetMachine &TM = getTLI()->getTargetMachine(); - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); + const GCNSubtarget *CallerST + = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Caller)); + const GCNSubtarget *CalleeST + = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Callee)); + + const FeatureBitset &CallerBits = CallerST->getFeatureBits(); + const FeatureBitset &CalleeBits = CalleeST->getFeatureBits(); FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; @@ -683,8 +686,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller, // FIXME: dx10_clamp can just take the caller setting, but there seems to be // no way to support merge for backend defined attributes. - AMDGPU::SIModeRegisterDefaults CallerMode(*Caller); - AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee); + AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST); + AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST); return CallerMode.isInlineCompatible(CalleeMode); } |