diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2017-12-04 23:07:28 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-12-04 23:07:28 +0000 |
| commit | 39aeab4f306d822688957d3118e2bc2a779147e3 (patch) | |
| tree | 58198d629733551981866e56397ee1ec7dbdb9e8 /llvm/lib/Target | |
| parent | 98360946bbfdc80a3d1038ccf6cce442fbbee890 (diff) | |
| download | bcm5719-llvm-39aeab4f306d822688957d3118e2bc2a779147e3.tar.gz bcm5719-llvm-39aeab4f306d822688957d3118e2bc2a779147e3.zip | |
AMDGPU/EG: Add a new FeatureFMA and use it to selectively enable FMA instruction
Only used by pre-GCN targets
v2: fix predicate setting for FMA_Common
Differential Revision: https://reviews.llvm.org/D40692
llvm-svn: 319712
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600Instructions.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600Processors.td | 4 |
6 files changed, 23 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 71d93444bdf..3bf5c8885f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -19,6 +19,12 @@ def FeatureFP64 : SubtargetFeature<"fp64", "Enable double precision operations" >; +def FeatureFMA : SubtargetFeature<"fmaf", + "FMA", + "true", + "Enable single precision FMA (not as fast as mul+add, but fused)" +>; + def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", "FastFMAF32", "true", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c14679701c0..31f728b0c22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -49,6 +49,7 @@ def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">; def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">; def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">; def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; +def FMA : Predicate<"Subtarget->hasFMA()">; def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 99d0e191dd7..456bbdb28e4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -140,6 +140,7 @@ protected: // Subtarget statically properties set by tablegen bool FP64; + bool FMA; bool IsGCN; bool GCN3Encoding; bool CIInsts; @@ -348,6 +349,10 @@ public: return CaymanISA; } + bool hasFMA() const { + return FMA; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 0d62c5a32d4..66291d0be4e 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -211,6 +211,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + if (!Subtarget->hasFMA()) { + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + } + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index f422f441af4..801e4e61fca 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -989,7 +989,10 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < class FMA_Common <bits<5> inst> : R600_3OP < inst, "FMA", [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU ->; +> +{ + let OtherPredicates = [FMA]; +} class CNDE_Common <bits<5> inst> : R600_3OP < inst, "CNDE", diff --git a/llvm/lib/Target/AMDGPU/R600Processors.td b/llvm/lib/Target/AMDGPU/R600Processors.td index aaca7a1b183..89194dc1bdf 100644 --- a/llvm/lib/Target/AMDGPU/R600Processors.td +++ b/llvm/lib/Target/AMDGPU/R600Processors.td @@ -53,7 +53,7 @@ def : Processor<"cedar", R600_VLIW5_Itin, >; def : Processor<"cypress", R600_VLIW5_Itin, - [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache] + [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFMA] >; def : Processor<"juniper", R600_VLIW5_Itin, @@ -82,7 +82,7 @@ def : Processor<"caicos", R600_VLIW5_Itin, >; def : Processor<"cayman", R600_VLIW4_Itin, - [FeatureNorthernIslands, FeatureCaymanISA] + [FeatureNorthernIslands, FeatureCaymanISA, FeatureFMA] >; def : Processor<"turks", R600_VLIW5_Itin, |

