Partial fix to r225380 (More FMA folding opportunities)

As pointed out by Aditya (and Owen), there are two things wrong with this code. First, it adds patterns which elide FP extends when forming FMAs, and that might not be profitable on all targets (it belongs behind the pre-existing aggressive-FMA-formation flag). This is fixed by this change. Second, the resulting nodes might have operands of different types (the extensions need to be re-added). That will be fixed in the follow-up commit. llvm-svn: 225485
author: Hal Finkel <hfinkel@anl.gov> 2015-01-09 00:45:54 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2015-01-09 00:45:54 +0000
commit: 33ead6f90164bf46e9bdb3509fe3996da77c9981 (patch)
tree: 1950d0bd3e7ee6020c873db3df8a5d8f6afc0f5c /llvm/lib/CodeGen
parent: 1a03ccd848b5d59ad96601821a073edab9cecde1 (diff)
download: bcm5719-llvm-33ead6f90164bf46e9bdb3509fe3996da77c9981.tar.gz
bcm5719-llvm-33ead6f90164bf46e9bdb3509fe3996da77c9981.zip
1 files changed, 95 insertions, 96 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 48b91268a97..08549d00cf8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6898,49 +6898,48 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
                          N1.getOperand(0), N1.getOperand(1), N0);
 
-    // Remove FP_EXTEND when there is an opportunity to combine. This is
-    // legal here since extra precision is allowed.
-
-    // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z)
-    if (N0.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N00 = N0.getOperand(0);
-      if (N00.getOpcode() == ISD::FMUL)
+    // More folding opportunities when target permits.
+    if (TLI.enableAggressiveFMAFusion(VT)) {
+      // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+      if (N0.getOpcode() == ISD::FMA &&
+          N0.getOperand(2).getOpcode() == ISD::FMUL)
         return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N00.getOperand(0), N00.getOperand(1), N1);
-    }
+                           N0.getOperand(0), N0.getOperand(1),
+                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
+                                       N0.getOperand(2).getOperand(0),
+                                       N0.getOperand(2).getOperand(1),
+                                       N1));
 
-    // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x)
-    // Note: Commutes FADD operands.
-    if (N1.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N10 = N1.getOperand(0);
-      if (N10.getOpcode() == ISD::FMUL)
+      // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+      if (N1->getOpcode() == ISD::FMA &&
+          N1.getOperand(2).getOpcode() == ISD::FMUL)
         return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N10.getOperand(0), N10.getOperand(1), N0);
-    }
-  }
-
-  // More folding opportunities when target permits.
-  if (TLI.enableAggressiveFMAFusion(VT)) {
+                           N1.getOperand(0), N1.getOperand(1),
+                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
+                                       N1.getOperand(2).getOperand(0),
+                                       N1.getOperand(2).getOperand(1),
+                                       N0));
+
+      // Remove FP_EXTEND when there is an opportunity to combine. This is
+      // legal here since extra precision is allowed.
+
+      // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z)
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == ISD::FMUL)
+          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+                             N00.getOperand(0), N00.getOperand(1), N1);
+      }
 
-    // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
-    if (N0.getOpcode() == ISD::FMA &&
-        N0.getOperand(2).getOpcode() == ISD::FMUL)
-      return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                         N0.getOperand(0), N0.getOperand(1),
-                         DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                     N0.getOperand(2).getOperand(0),
-                                     N0.getOperand(2).getOperand(1),
-                                     N1));
-
-    // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
-    if (N1->getOpcode() == ISD::FMA &&
-        N1.getOperand(2).getOpcode() == ISD::FMUL)
-      return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                         N1.getOperand(0), N1.getOperand(1),
-                         DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                     N1.getOperand(2).getOperand(0),
-                                     N1.getOperand(2).getOperand(1),
-                                     N0));
+      // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x)
+      // Note: Commutes FADD operands.
+      if (N1.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N10 = N1.getOperand(0);
+        if (N10.getOpcode() == ISD::FMUL)
+          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+                             N10.getOperand(0), N10.getOperand(1), N0);
+      }
+    }
   }
 
   return SDValue();
@@ -7035,63 +7034,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
                          DAG.getNode(ISD::FNEG, dl, VT, N1));
     }
 
-    // Remove FP_EXTEND when there is an opportunity to combine. This is
-    // legal here since extra precision is allowed.
-
-    // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z))
-    if (N0.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N00 = N0.getOperand(0);
-      if (N00.getOpcode() == ISD::FMUL)
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N00.getOperand(0),
-                           N00.getOperand(1),
-                           DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
-    }
-
-    // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x)
-    // Note: Commutes FSUB operands.
-    if (N1.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N10 = N1.getOperand(0);
-      if (N10.getOpcode() == ISD::FMUL)
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                       N10.getOperand(0)),
-                           N10.getOperand(1),
-                           N0);
-    }
-
-    // fold (fsub (fpext (fneg (fmul, x, y))), z)
-    //   -> (fma (fneg x), y, (fneg z))
-    if (N0.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N00 = N0.getOperand(0);
-      if (N00.getOpcode() == ISD::FNEG) {
-        SDValue N000 = N00.getOperand(0);
-        if (N000.getOpcode() == ISD::FMUL) {
-          return DAG.getNode(ISD::FMA, dl, VT,
-                             DAG.getNode(ISD::FNEG, dl, VT,
-                                         N000.getOperand(0)),
-                             N000.getOperand(1),
-                             DAG.getNode(ISD::FNEG, dl, VT, N1));
-        }
-      }
-    }
-
-    // fold (fsub (fneg (fpext (fmul, x, y))), z)
-    //   -> (fma (fneg x), y, (fneg z))
-    if (N0.getOpcode() == ISD::FNEG) {
-      SDValue N00 = N0.getOperand(0);
-      if (N00.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N000 = N00.getOperand(0);
-        if (N000.getOpcode() == ISD::FMUL) {
-          return DAG.getNode(ISD::FMA, dl, VT,
-                             DAG.getNode(ISD::FNEG, dl, VT,
-                                         N000.getOperand(0)),
-                             N000.getOperand(1),
-                             DAG.getNode(ISD::FNEG, dl, VT, N1));
-        }
-      }
-    }
-
     // More folding opportunities when target permits.
     if (TLI.enableAggressiveFMAFusion(VT)) {
 
@@ -7122,6 +7064,63 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
                                                    N20),
                                        N21, N0));
       }
+
+      // Remove FP_EXTEND when there is an opportunity to combine. This is
+      // legal here since extra precision is allowed.
+
+      // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z))
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == ISD::FMUL)
+          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+                             N00.getOperand(0),
+                             N00.getOperand(1),
+                             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
+      }
+
+      // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x)
+      // Note: Commutes FSUB operands.
+      if (N1.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N10 = N1.getOperand(0);
+        if (N10.getOpcode() == ISD::FMUL)
+          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+                             DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+                                         N10.getOperand(0)),
+                             N10.getOperand(1),
+                             N0);
+      }
+
+      // fold (fsub (fpext (fneg (fmul, x, y))), z)
+      //   -> (fma (fneg x), y, (fneg z))
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == ISD::FNEG) {
+          SDValue N000 = N00.getOperand(0);
+          if (N000.getOpcode() == ISD::FMUL) {
+            return DAG.getNode(ISD::FMA, dl, VT,
+                               DAG.getNode(ISD::FNEG, dl, VT,
+                                           N000.getOperand(0)),
+                               N000.getOperand(1),
+                               DAG.getNode(ISD::FNEG, dl, VT, N1));
+          }
+        }
+      }
+
+      // fold (fsub (fneg (fpext (fmul, x, y))), z)
+      //   -> (fma (fneg x), y, (fneg z))
+      if (N0.getOpcode() == ISD::FNEG) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N000 = N00.getOperand(0);
+          if (N000.getOpcode() == ISD::FMUL) {
+            return DAG.getNode(ISD::FMA, dl, VT,
+                               DAG.getNode(ISD::FNEG, dl, VT,
+                                           N000.getOperand(0)),
+                               N000.getOperand(1),
+                               DAG.getNode(ISD::FNEG, dl, VT, N1));
+          }
+        }
+      }
     }
   }
author	Hal Finkel <hfinkel@anl.gov>	2015-01-09 00:45:54 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2015-01-09 00:45:54 +0000
commit	33ead6f90164bf46e9bdb3509fe3996da77c9981 (patch)
tree	1950d0bd3e7ee6020c873db3df8a5d8f6afc0f5c /llvm/lib/CodeGen
parent	1a03ccd848b5d59ad96601821a073edab9cecde1 (diff)
download	bcm5719-llvm-33ead6f90164bf46e9bdb3509fe3996da77c9981.tar.gz bcm5719-llvm-33ead6f90164bf46e9bdb3509fe3996da77c9981.zip