summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorOlivier Sallenave <ohsallen@us.ibm.com>2015-01-13 15:06:36 +0000
committerOlivier Sallenave <ohsallen@us.ibm.com>2015-01-13 15:06:36 +0000
commit325096980baf1a9799c6bb6d45c789367553be89 (patch)
tree8c5458103b0a25bce939d90b420a81d4f8ac2628 /llvm/lib
parentf28f613eda9de3a62d48bb559ffe40a0f472c511 (diff)
downloadbcm5719-llvm-325096980baf1a9799c6bb6d45c789367553be89.tar.gz
bcm5719-llvm-325096980baf1a9799c6bb6d45c789367553be89.zip
Added TLI hook for isFPExtFree. Some of the FMA combine heuristics are now guarded with that hook.
llvm-svn: 225795
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp133
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h2
3 files changed, 77 insertions, 63 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 56ba91fbac5..171ee1e3488 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6957,32 +6957,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1), N0);
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
- // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- N1));
-
- // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(2).getOperand(0),
- N1.getOperand(2).getOperand(1),
- N0));
+ // When FP_EXTEND nodes are free on the target, and there is an opportunity
+ // to combine into FMA, arrange such nodes accordingly.
+ if (TLI.isFPExtFree(VT)) {
- // Remove FP_EXTEND when there is an opportunity to combine. This is
- // legal here since extra precision is allowed.
-
- // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z)
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
@@ -6993,7 +6972,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N00.getOperand(1)), N1);
}
- // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x)
+ // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
@@ -7005,6 +6984,30 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N10.getOperand(1)), N0);
}
}
+
+ // More folding opportunities when target permits.
+ if (TLI.enableAggressiveFMAFusion(VT)) {
+
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (N0.getOpcode() == ISD::FMA &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1));
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (N1->getOpcode() == ISD::FMA &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0));
+ }
}
return SDValue();
@@ -7099,41 +7102,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
DAG.getNode(ISD::FNEG, dl, VT, N1));
}
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
+ // When FP_EXTEND nodes are free on the target, and there is an opportunity
+ // to combine into FMA, arrange such nodes accordingly.
+ if (TLI.isFPExtFree(VT)) {
- // fold (fsub (fma x, y, (fmul u, v)), z)
- // -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1)));
-
- // fold (fsub x, (fma y, z, (fmul u, v)))
- // -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
- SDValue N20 = N1.getOperand(2).getOperand(0);
- SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N20),
- N21, N0));
- }
-
- // Remove FP_EXTEND when there is an opportunity to combine. This is
- // legal here since extra precision is allowed.
-
- // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z))
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
@@ -7145,7 +7119,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
}
- // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x)
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
@@ -7160,7 +7135,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub (fpext (fneg (fmul, x, y))), z)
- // -> (fma (fneg x), y, (fneg z))
+ // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
@@ -7178,7 +7153,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub (fneg (fpext (fmul, x, y))), z)
- // -> (fma (fneg x), y, (fneg z))
+ // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FNEG) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
@@ -7195,6 +7170,38 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
}
+
+ // More folding opportunities when target permits.
+ if (TLI.enableAggressiveFMAFusion(VT)) {
+
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (N0.getOpcode() == ISD::FMA &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1)));
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (N1.getOpcode() == ISD::FMA &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N20),
+ N21, N0));
+ }
+ }
}
return SDValue();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 30f08d050da..2157003ef3e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9817,6 +9817,11 @@ bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return TargetLowering::isZExtFree(Val, VT2);
}
+bool PPCTargetLowering::isFPExtFree(EVT VT) const {
+ assert(VT.isFloatingPoint());
+ return true;
+}
+
bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<16>(Imm) || isUInt<16>(Imm);
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index db5a3e42d52..b171b165877 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -528,6 +528,8 @@ namespace llvm {
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool isFPExtFree(EVT VT) const override;
+
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
OpenPOWER on IntegriCloud