summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2017-12-04 23:07:28 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2017-12-04 23:07:28 +0000
commit39aeab4f306d822688957d3118e2bc2a779147e3 (patch)
tree58198d629733551981866e56397ee1ec7dbdb9e8 /llvm/lib/Target
parent98360946bbfdc80a3d1038ccf6cce442fbbee890 (diff)
downloadbcm5719-llvm-39aeab4f306d822688957d3118e2bc2a779147e3.tar.gz
bcm5719-llvm-39aeab4f306d822688957d3118e2bc2a779147e3.zip
AMDGPU/EG: Add a new FeatureFMA and use it to selectively enable FMA instruction
Only used by pre-GCN targets v2: fix predicate setting for FMA_Common Differential Revision: https://reviews.llvm.org/D40692 llvm-svn: 319712
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/R600Instructions.td5
-rw-r--r--llvm/lib/Target/AMDGPU/R600Processors.td4
6 files changed, 23 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 71d93444bdf..3bf5c8885f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -19,6 +19,12 @@ def FeatureFP64 : SubtargetFeature<"fp64",
"Enable double precision operations"
>;
+def FeatureFMA : SubtargetFeature<"fmaf",
+ "FMA",
+ "true",
+ "Enable single precision FMA (not as fast as mul+add, but fused)"
+>;
+
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
"FastFMAF32",
"true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index c14679701c0..31f728b0c22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -49,6 +49,7 @@ def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
+def FMA : Predicate<"Subtarget->hasFMA()">;
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 99d0e191dd7..456bbdb28e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -140,6 +140,7 @@ protected:
// Subtarget statically properties set by tablegen
bool FP64;
+ bool FMA;
bool IsGCN;
bool GCN3Encoding;
bool CIInsts;
@@ -348,6 +349,10 @@ public:
return CaymanISA;
}
+ bool hasFMA() const {
+ return FMA;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 0d62c5a32d4..66291d0be4e 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -211,6 +211,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ if (!Subtarget->hasFMA()) {
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ }
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index f422f441af4..801e4e61fca 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -989,7 +989,10 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
class FMA_Common <bits<5> inst> : R600_3OP <
inst, "FMA",
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
->;
+>
+{
+ let OtherPredicates = [FMA];
+}
class CNDE_Common <bits<5> inst> : R600_3OP <
inst, "CNDE",
diff --git a/llvm/lib/Target/AMDGPU/R600Processors.td b/llvm/lib/Target/AMDGPU/R600Processors.td
index aaca7a1b183..89194dc1bdf 100644
--- a/llvm/lib/Target/AMDGPU/R600Processors.td
+++ b/llvm/lib/Target/AMDGPU/R600Processors.td
@@ -53,7 +53,7 @@ def : Processor<"cedar", R600_VLIW5_Itin,
>;
def : Processor<"cypress", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache]
+ [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFMA]
>;
def : Processor<"juniper", R600_VLIW5_Itin,
@@ -82,7 +82,7 @@ def : Processor<"caicos", R600_VLIW5_Itin,
>;
def : Processor<"cayman", R600_VLIW4_Itin,
- [FeatureNorthernIslands, FeatureCaymanISA]
+ [FeatureNorthernIslands, FeatureCaymanISA, FeatureFMA]
>;
def : Processor<"turks", R600_VLIW5_Itin,
OpenPOWER on IntegriCloud