diff options
| author | Olivier Sallenave <ohsallen@us.ibm.com> | 2015-01-13 15:06:36 +0000 |
|---|---|---|
| committer | Olivier Sallenave <ohsallen@us.ibm.com> | 2015-01-13 15:06:36 +0000 |
| commit | 325096980baf1a9799c6bb6d45c789367553be89 (patch) | |
| tree | 8c5458103b0a25bce939d90b420a81d4f8ac2628 /llvm/lib | |
| parent | f28f613eda9de3a62d48bb559ffe40a0f472c511 (diff) | |
| download | bcm5719-llvm-325096980baf1a9799c6bb6d45c789367553be89.tar.gz bcm5719-llvm-325096980baf1a9799c6bb6d45c789367553be89.zip | |
Added TLI hook for isFPExtFree. Some of the FMA combine heuristics are now guarded with that hook.
llvm-svn: 225795
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 133 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 |
3 files changed, 77 insertions, 63 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 56ba91fbac5..171ee1e3488 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6957,32 +6957,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { - // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - N1)); - - // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(2).getOperand(0), - N1.getOperand(2).getOperand(1), - N0)); + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { - // Remove FP_EXTEND when there is an opportunity to combine. This is - // legal here since extra precision is allowed. - - // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z) + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FMUL) @@ -6993,7 +6972,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N00.getOperand(1)), N1); } - // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x) + // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); @@ -7005,6 +6984,30 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N10.getOperand(1)), N0); } } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } } return SDValue(); @@ -7099,41 +7102,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { DAG.getNode(ISD::FNEG, dl, VT, N1)); } - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { - // fold (fsub (fma x, y, (fmul u, v)), z) - // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1))); - - // fold (fsub x, (fma y, z, (fmul u, v))) - // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) { - SDValue N20 = N1.getOperand(2).getOperand(0); - SDValue N21 = N1.getOperand(2).getOperand(1); - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1.getOperand(0)), - N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N20), - N21, N0)); - } - - // Remove FP_EXTEND when there is an opportunity to combine. This is - // legal here since extra precision is allowed. - - // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z)) + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FMUL) @@ -7145,7 +7119,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); } - // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x) + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); @@ -7160,7 +7135,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub (fpext (fneg (fmul, x, y))), z) - // -> (fma (fneg x), y, (fneg z)) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FNEG) { @@ -7178,7 +7153,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub (fneg (fpext (fmul, x, y))), z) - // -> (fma (fneg x), y, (fneg z)) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FNEG) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FP_EXTEND) { @@ -7195,6 +7170,38 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } } return SDValue(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 30f08d050da..2157003ef3e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9817,6 +9817,11 @@ bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return TargetLowering::isZExtFree(Val, VT2); } +bool PPCTargetLowering::isFPExtFree(EVT VT) const { + assert(VT.isFloatingPoint()); + return true; +} + bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index db5a3e42d52..b171b165877 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -528,6 +528,8 @@ namespace llvm { bool isZExtFree(SDValue Val, EVT VT2) const override; + bool isFPExtFree(EVT VT) const override; + /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |

