summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp11
1 files changed, 9 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 8fcabeba5ed..37c8de93be5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -611,7 +611,7 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
- const Function *Callee) const {
+ const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
const FeatureBitset &CallerBits =
TM.getSubtargetImpl(*Caller)->getFeatureBits();
@@ -620,7 +620,14 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
- return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
+ if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
+ return false;
+
+ // FIXME: dx10_clamp can just take the caller setting, but there seems to be
+ // no way to support merge for backend defined attributes.
+ AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
+ AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
+ return CallerMode.isInlineCompatible(CalleeMode);
}
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
OpenPOWER on IntegriCloud