diff options
author | Olivier Sallenave <ohsallen@us.ibm.com> | 2015-01-07 20:54:17 +0000 |
---|---|---|
committer | Olivier Sallenave <ohsallen@us.ibm.com> | 2015-01-07 20:54:17 +0000 |
commit | 045153299658ed71708347f6cb0c14ccdcdf2de2 (patch) | |
tree | 1ba8cb193f521ba0a2ea3f6f19c27e0ab47728ee /llvm/lib/CodeGen | |
parent | 72b8ee708f2bcf14fbbd656efeb3751d37b9df19 (diff) | |
download | bcm5719-llvm-045153299658ed71708347f6cb0c14ccdcdf2de2.tar.gz bcm5719-llvm-045153299658ed71708347f6cb0c14ccdcdf2de2.zip |
More FMA folding opportunities.
llvm-svn: 225380
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 134 |
1 files changed, 133 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 64cc1f5f7cb..c68b1e336c3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6878,7 +6878,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } // enable-unsafe-fp-math - // FADD -> FMA combines: if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && TLI.isFMAFasterThanFMulAndFAdd(VT) && @@ -6896,6 +6895,50 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); + + // Remove FP_EXTEND when there is an opportunity to combine. This is + // legal here since extra precision is allowed. + + // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N00.getOperand(0), N00.getOperand(1), N1); + } + + // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N10.getOperand(0), N10.getOperand(1), N0); + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); } return SDValue(); @@ -6989,6 +7032,95 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { DAG.getNode(ISD::FNEG, dl, VT, N00), N01, DAG.getNode(ISD::FNEG, dl, VT, N1)); } + + // Remove FP_EXTEND when there is an opportunity to combine. This is + // legal here since extra precision is allowed. + + // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N00.getOperand(0), + N00.getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N10.getOperand(0)), + N10.getOperand(1), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + N000.getOperand(0)), + N000.getOperand(1), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + N000.getOperand(0)), + N000.getOperand(1), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } } return SDValue(); |