diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-22 03:55:35 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-22 03:55:35 +0000 |
| commit | 770ec8680a0a1404fa7dc524ae2861ffdc5901eb (patch) | |
| tree | ce4dc183f53a90dbaca0af167357d2b75920ec9d /llvm/lib/Target | |
| parent | e27c6dfd317b4a9a4aa6aacabfed0fa5d2dcc26d (diff) | |
| download | bcm5719-llvm-770ec8680a0a1404fa7dc524ae2861ffdc5901eb.tar.gz bcm5719-llvm-770ec8680a0a1404fa7dc524ae2861ffdc5901eb.zip | |
AMDGPU: Form more FMAs if fusion is allowed
Extend the existing fadd/fsub->fmad combines to produce
FMA if allowed.
llvm-svn: 290311
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 75 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 |
2 files changed, 46 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1572897630e..52cc0428e28 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3871,24 +3871,31 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, return SDValue(); } +unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, EVT VT) const { + // Only do this if we are not trying to support denormals. v_mad_f32 does not + // support denormals ever. + if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) || + (VT == MVT::f16 && !Subtarget->hasFP16Denormals())) + return ISD::FMAD; + + const TargetOptions &Options = DAG.getTarget().Options; + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + isFMAFasterThanFMulAndFAdd(VT)) { + return ISD::FMA; + } + + return 0; +} + SDValue SITargetLowering::performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) return SDValue(); + SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); - if (VT == MVT::f64) - return SDValue(); - - assert(!VT.isVector()); - - // Only do this if we are not trying to support denormals. v_mad_f32 does - // not support denormals ever. - if ((VT == MVT::f32 && Subtarget->hasFP32Denormals()) || - (VT == MVT::f16 && Subtarget->hasFP16Denormals())) - return SDValue(); + assert(!VT.isVector()); - SelectionDAG &DAG = DCI.DAG; SDLoc SL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -3900,8 +3907,11 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N, if (LHS.getOpcode() == ISD::FADD) { SDValue A = LHS.getOperand(0); if (A == LHS.getOperand(1)) { - const SDValue Two = DAG.getConstantFP(2.0, SL, VT); - return DAG.getNode(ISD::FMAD, SL, VT, Two, A, RHS); + unsigned FusedOp = getFusedOpcode(DAG, VT); + if (FusedOp != 0) { + const SDValue Two = DAG.getConstantFP(2.0, SL, VT); + return DAG.getNode(FusedOp, SL, VT, Two, A, RHS); + } } } @@ -3909,8 +3919,11 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N, if (RHS.getOpcode() == ISD::FADD) { SDValue A = RHS.getOperand(0); if (A == RHS.getOperand(1)) { - const SDValue Two = DAG.getConstantFP(2.0, SL, VT); - return DAG.getNode(ISD::FMAD, SL, VT, Two, A, LHS); + unsigned FusedOp = getFusedOpcode(DAG, VT); + if (FusedOp != 0) { + const SDValue Two = DAG.getConstantFP(2.0, SL, VT); + return DAG.getNode(FusedOp, SL, VT, Two, A, LHS); + } } } @@ -3932,29 +3945,31 @@ SDValue SITargetLowering::performFSubCombine(SDNode *N, // // Only do this if we are not trying to support denormals. v_mad_f32 does // not support denormals ever. - if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) || - (VT == MVT::f16 && !Subtarget->hasFP16Denormals())) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - if (LHS.getOpcode() == ISD::FADD) { - // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c) - - SDValue A = LHS.getOperand(0); - if (A == LHS.getOperand(1)) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS.getOpcode() == ISD::FADD) { + // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c) + SDValue A = LHS.getOperand(0); + if (A == LHS.getOperand(1)) { + unsigned FusedOp = getFusedOpcode(DAG, VT); + if (FusedOp != 0){ const SDValue Two = DAG.getConstantFP(2.0, SL, VT); SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); - return DAG.getNode(ISD::FMAD, SL, VT, Two, A, NegRHS); + return DAG.getNode(FusedOp, SL, VT, Two, A, NegRHS); } } + } - if (RHS.getOpcode() == ISD::FADD) { - // (fsub c, (fadd a, a)) -> mad -2.0, a, c + if (RHS.getOpcode() == ISD::FADD) { + // (fsub c, (fadd a, a)) -> mad -2.0, a, c - SDValue A = RHS.getOperand(0); - if (A == RHS.getOperand(1)) { + SDValue A = RHS.getOperand(0); + if (A == RHS.getOperand(1)) { + unsigned FusedOp = getFusedOpcode(DAG, VT); + if (FusedOp != 0){ const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT); - return DAG.getNode(ISD::FMAD, SL, VT, NegTwo, A, LHS); + return DAG.getNode(FusedOp, SL, VT, NegTwo, A, LHS); } } } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 5f3c4ce51bd..d7127193f05 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -83,6 +83,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const; + unsigned getFusedOpcode(const SelectionDAG &DAG, EVT VT) const; SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; |

