summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h')
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h18
1 files changed, 16 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f8c082060ff..05bb39235a4 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -676,7 +676,8 @@ struct SIModeRegisterDefaults {
FP32Denormals(true),
FP64FP16Denormals(true) {}
- SIModeRegisterDefaults(const Function &F);
+ // FIXME: Should not depend on the subtarget
+ SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
const bool IsCompute = AMDGPU::isCompute(CC);
@@ -695,10 +696,23 @@ struct SIModeRegisterDefaults {
FP64FP16Denormals == Other.FP64FP16Denormals;
}
+ /// Returns true if a flag is compatible if it's enabled in the callee, but
+ /// disabled in the caller.
+ static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
+ return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
+ }
+
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
// be able to override.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
- return *this == CalleeMode;
+ if (DX10Clamp != CalleeMode.DX10Clamp)
+ return false;
+ if (IEEE != CalleeMode.IEEE)
+ return false;
+
+ // Allow inlining denormals enabled into denormals flushed functions.
+ return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
+ oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
}
};
OpenPOWER on IntegriCloud