summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-22 03:55:35 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-22 03:55:35 +0000
commit770ec8680a0a1404fa7dc524ae2861ffdc5901eb (patch)
treece4dc183f53a90dbaca0af167357d2b75920ec9d /llvm/lib/Target
parente27c6dfd317b4a9a4aa6aacabfed0fa5d2dcc26d (diff)
downloadbcm5719-llvm-770ec8680a0a1404fa7dc524ae2861ffdc5901eb.tar.gz
bcm5719-llvm-770ec8680a0a1404fa7dc524ae2861ffdc5901eb.zip
AMDGPU: Form more FMAs if fusion is allowed
Extend the existing fadd/fsub->fmad combines to produce FMA if allowed. llvm-svn: 290311
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp75
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
2 files changed, 46 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1572897630e..52cc0428e28 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3871,24 +3871,31 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
return SDValue();
}
+unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, EVT VT) const {
+ // Only do this if we are not trying to support denormals. v_mad_f32 does not
+ // support denormals ever.
+ if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
+ (VT == MVT::f16 && !Subtarget->hasFP16Denormals()))
+ return ISD::FMAD;
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ isFMAFasterThanFMulAndFAdd(VT)) {
+ return ISD::FMA;
+ }
+
+ return 0;
+}
+
SDValue SITargetLowering::performFAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
return SDValue();
+ SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- if (VT == MVT::f64)
- return SDValue();
-
- assert(!VT.isVector());
-
- // Only do this if we are not trying to support denormals. v_mad_f32 does
- // not support denormals ever.
- if ((VT == MVT::f32 && Subtarget->hasFP32Denormals()) ||
- (VT == MVT::f16 && Subtarget->hasFP16Denormals()))
- return SDValue();
+ assert(!VT.isVector());
- SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -3900,8 +3907,11 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N,
if (LHS.getOpcode() == ISD::FADD) {
SDValue A = LHS.getOperand(0);
if (A == LHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
- return DAG.getNode(ISD::FMAD, SL, VT, Two, A, RHS);
+ unsigned FusedOp = getFusedOpcode(DAG, VT);
+ if (FusedOp != 0) {
+ const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
+ return DAG.getNode(FusedOp, SL, VT, Two, A, RHS);
+ }
}
}
@@ -3909,8 +3919,11 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N,
if (RHS.getOpcode() == ISD::FADD) {
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
- return DAG.getNode(ISD::FMAD, SL, VT, Two, A, LHS);
+ unsigned FusedOp = getFusedOpcode(DAG, VT);
+ if (FusedOp != 0) {
+ const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
+ return DAG.getNode(FusedOp, SL, VT, Two, A, LHS);
+ }
}
}
@@ -3932,29 +3945,31 @@ SDValue SITargetLowering::performFSubCombine(SDNode *N,
//
// Only do this if we are not trying to support denormals. v_mad_f32 does
// not support denormals ever.
- if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
- (VT == MVT::f16 && !Subtarget->hasFP16Denormals())) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- if (LHS.getOpcode() == ISD::FADD) {
- // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
-
- SDValue A = LHS.getOperand(0);
- if (A == LHS.getOperand(1)) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (LHS.getOpcode() == ISD::FADD) {
+ // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
+ SDValue A = LHS.getOperand(0);
+ if (A == LHS.getOperand(1)) {
+ unsigned FusedOp = getFusedOpcode(DAG, VT);
+ if (FusedOp != 0){
const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
- return DAG.getNode(ISD::FMAD, SL, VT, Two, A, NegRHS);
+ return DAG.getNode(FusedOp, SL, VT, Two, A, NegRHS);
}
}
+ }
- if (RHS.getOpcode() == ISD::FADD) {
- // (fsub c, (fadd a, a)) -> mad -2.0, a, c
+ if (RHS.getOpcode() == ISD::FADD) {
+ // (fsub c, (fadd a, a)) -> mad -2.0, a, c
- SDValue A = RHS.getOperand(0);
- if (A == RHS.getOperand(1)) {
+ SDValue A = RHS.getOperand(0);
+ if (A == RHS.getOperand(1)) {
+ unsigned FusedOp = getFusedOpcode(DAG, VT);
+ if (FusedOp != 0){
const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT);
- return DAG.getNode(ISD::FMAD, SL, VT, NegTwo, A, LHS);
+ return DAG.getNode(FusedOp, SL, VT, NegTwo, A, LHS);
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 5f3c4ce51bd..d7127193f05 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -83,6 +83,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ unsigned getFusedOpcode(const SelectionDAG &DAG, EVT VT) const;
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
OpenPOWER on IntegriCloud