summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/Utils
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-31 18:50:30 -0700
committerMatt Arsenault <arsenm2@gmail.com>2019-11-19 19:55:43 +0530
commitdb0ed3e429b55d1730d1ecc253b0643de7fca099 (patch)
tree33f725b9778863f7c737075b4ca9d9e6c43ceb08 /llvm/lib/Target/AMDGPU/Utils
parentea23b6428b88ed50a2cfc91b783f627fa139bb36 (diff)
downloadbcm5719-llvm-db0ed3e429b55d1730d1ecc253b0643de7fca099.tar.gz
bcm5719-llvm-db0ed3e429b55d1730d1ecc253b0643de7fca099.zip
AMDGPU: Refactor treatment of denormal mode
Start moving towards treating this as a property of the calling convention, and not the subtarget. The default denormal mode should not be part of the subtarget, and be moved into a separate function attribute. This patch is still NFC. The denormal mode remains as a subtarget feature for now, but make the necessary changes to switch to using an attribute.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils')
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h18
2 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c72f93eb739..a4b216f583d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1303,7 +1303,8 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
return true;
}
-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
+ const GCNSubtarget &ST) {
*this = getDefaultForCallingConv(F.getCallingConv());
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
@@ -1314,6 +1315,9 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
= F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
if (!DX10ClampAttr.empty())
DX10Clamp = DX10ClampAttr == "true";
+
+ FP32Denormals = ST.hasFP32Denormals(F);
+ FP64FP16Denormals = ST.hasFP64FP16Denormals(F);
}
namespace {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f8c082060ff..05bb39235a4 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -676,7 +676,8 @@ struct SIModeRegisterDefaults {
FP32Denormals(true),
FP64FP16Denormals(true) {}
- SIModeRegisterDefaults(const Function &F);
+ // FIXME: Should not depend on the subtarget
+ SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
const bool IsCompute = AMDGPU::isCompute(CC);
@@ -695,10 +696,23 @@ struct SIModeRegisterDefaults {
FP64FP16Denormals == Other.FP64FP16Denormals;
}
+ /// Returns true if a flag is compatible if it's enabled in the callee, but
+ /// disabled in the caller.
+ static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
+ return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
+ }
+
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
// be able to override.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
- return *this == CalleeMode;
+ if (DX10Clamp != CalleeMode.DX10Clamp)
+ return false;
+ if (IEEE != CalleeMode.IEEE)
+ return false;
+
+ // Allow inlining denormals enabled into denormals flushed functions.
+ return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
+ oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
}
};
OpenPOWER on IntegriCloud