diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-10-31 18:50:30 -0700 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2019-11-19 19:55:43 +0530 |
commit | db0ed3e429b55d1730d1ecc253b0643de7fca099 (patch) | |
tree | 33f725b9778863f7c737075b4ca9d9e6c43ceb08 /llvm/lib/Target/AMDGPU/Utils | |
parent | ea23b6428b88ed50a2cfc91b783f627fa139bb36 (diff) | |
download | bcm5719-llvm-db0ed3e429b55d1730d1ecc253b0643de7fca099.tar.gz bcm5719-llvm-db0ed3e429b55d1730d1ecc253b0643de7fca099.zip |
AMDGPU: Refactor treatment of denormal mode
Start moving towards treating this as a property of the calling
convention, and not the subtarget. The default denormal mode should
not be part of the subtarget, and be moved into a separate function
attribute.
This patch is still NFC. The denormal mode remains as a subtarget
feature for now, but make the necessary changes to switch to using an
attribute.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils')
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 18 |
2 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index c72f93eb739..a4b216f583d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1303,7 +1303,8 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, return true; } -SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { +SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, + const GCNSubtarget &ST) { *this = getDefaultForCallingConv(F.getCallingConv()); StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); @@ -1314,6 +1315,9 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); if (!DX10ClampAttr.empty()) DX10Clamp = DX10ClampAttr == "true"; + + FP32Denormals = ST.hasFP32Denormals(F); + FP64FP16Denormals = ST.hasFP64FP16Denormals(F); } namespace { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index f8c082060ff..05bb39235a4 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -676,7 +676,8 @@ struct SIModeRegisterDefaults { FP32Denormals(true), FP64FP16Denormals(true) {} - SIModeRegisterDefaults(const Function &F); + // FIXME: Should not depend on the subtarget + SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { const bool IsCompute = AMDGPU::isCompute(CC); @@ -695,10 +696,23 @@ struct SIModeRegisterDefaults { FP64FP16Denormals == Other.FP64FP16Denormals; } + /// Returns true if a flag is compatible if it's enabled in the callee, but + /// disabled in the caller. + static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { + return CallerMode == CalleeMode || (CallerMode && !CalleeMode); + } + // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should // be able to override. bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { - return *this == CalleeMode; + if (DX10Clamp != CalleeMode.DX10Clamp) + return false; + if (IEEE != CalleeMode.IEEE) + return false; + + // Allow inlining denormals enabled into denormals flushed functions. + return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) && + oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals); } }; |