summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td29
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h7
3 files changed, 27 insertions, 18 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 46cd112ad49..0c5f9e72b52 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -206,12 +206,6 @@ def FeatureDPP : SubtargetFeature<"dpp",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
- "FP16Denormals",
- "true",
- "Enable half precision denormal handling"
->;
-
// Some instructions do not support denormals despite this flag. Using
// fp32 denormals also causes instructions to run at the double
// precision rate for the device.
@@ -221,13 +215,30 @@ def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
"Enable single precision denormal handling"
>;
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64Denormals",
+// Denormal handling for fp64 and fp16 is controlled by the same
+// config register when fp16 supported.
+// TODO: Do we need a separate f16 setting when not legal?
+def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
+ "FP64FP16Denormals",
"true",
- "Enable double precision denormal handling",
+ "Enable double and half precision denormal handling",
[FeatureFP64]
>;
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable double and half precision denormal handling",
+ [FeatureFP64, FeatureFP64FP16Denormals]
+>;
+
+def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable half precision denormal handling",
+ [FeatureFP64FP16Denormals]
+>;
+
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 4e8529b57ab..4848b3b86bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -41,9 +41,10 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
- SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
+ SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,";
+
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -52,9 +53,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP16Denormals = false;
+ FP64FP16Denormals = false;
FP32Denormals = false;
- FP64Denormals = false;
}
// Set defaults if needed.
@@ -78,9 +78,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FastFMAF32(false),
HalfRate64Ops(false),
- FP16Denormals(false),
FP32Denormals(false),
- FP64Denormals(false),
+ FP64FP16Denormals(false),
FPExceptions(false),
FlatForGlobal(false),
UnalignedScratchAccess(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 26c4c34c988..c8414f97808 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -81,9 +81,8 @@ protected:
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP16Denormals;
bool FP32Denormals;
- bool FP64Denormals;
+ bool FP64FP16Denormals;
bool FPExceptions;
bool FlatForGlobal;
bool UnalignedScratchAccess;
@@ -282,7 +281,7 @@ public:
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
bool hasFP16Denormals() const {
- return FP16Denormals;
+ return FP64FP16Denormals;
}
bool hasFP32Denormals() const {
@@ -290,7 +289,7 @@ public:
}
bool hasFP64Denormals() const {
- return FP64Denormals;
+ return FP64FP16Denormals;
}
bool hasFPExceptions() const {
OpenPOWER on IntegriCloud