diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Processors.td | 10 |
4 files changed, 23 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 789187be2df..83ab1e80030 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -59,6 +59,12 @@ def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", "Assuming f32 fma is at least as fast as mul + add", []>; +def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", + "HalfRate64Ops", + "true", + "Most fp64 instructions are half rate instead of quarter", + []>; + // Some instructions do not support denormals despite this flag. Using // fp32 denormals also causes instructions to run at the double // precision rate for the device. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index c6af5b93d25..d17a7ac2f3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -70,11 +70,14 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, DumpCode(false), R600ALUInst(false), HasVertexCache(false), TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), FP64Denormals(false), FP32Denormals(false), FastFMAF32(false), - CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false), - EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), - EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), + HalfRate64Ops(false), CaymanISA(false), FlatAddressSpace(false), + FlatForGlobal(false), EnableIRStructurizer(true), + EnablePromoteAlloca(false), + EnableIfCvt(true), EnableLoadStoreOpt(false), + EnableUnsafeDSOffsetFolding(false), EnableXNACK(false), - WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), + WavefrontSize(0), CFALUBug(false), + LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index f43ce40212a..3a4266947b4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -69,6 +69,7 @@ private: bool FP64Denormals; bool FP32Denormals; bool FastFMAF32; + bool HalfRate64Ops; bool CaymanISA; bool FlatAddressSpace; bool FlatForGlobal; @@ -158,6 +159,10 @@ public: return FastFMAF32; } + bool hasHalfRate64Ops() const { + return HalfRate64Ops; + } + bool hasFlatAddressSpace() const { return FlatAddressSpace; } diff --git a/llvm/lib/Target/AMDGPU/Processors.td b/llvm/lib/Target/AMDGPU/Processors.td index 9eaeb58b9b4..24681ea0da5 100644 --- a/llvm/lib/Target/AMDGPU/Processors.td +++ b/llvm/lib/Target/AMDGPU/Processors.td @@ -84,11 +84,11 @@ def : Proc<"cayman", R600_VLIW4_Itin, //===----------------------------------------------------------------------===// def : ProcessorModel<"SI", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32] + [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] >; -def : ProcessorModel<"tahiti", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32] +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] >; def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; @@ -116,8 +116,8 @@ def : ProcessorModel<"kaveri", SIQuarterSpeedModel, >; def : ProcessorModel<"hawaii", SIFullSpeedModel, - [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32, - FeatureISAVersion7_0_1] + [FeatureSeaIslands, FeatureFastFMAF32, HalfRate64Ops, + FeatureLDSBankCount32, FeatureISAVersion7_0_1] >; def : ProcessorModel<"mullins", SIQuarterSpeedModel, |