avoid infinite looping when folding vector multiplies of constants (PR22698)

We were missing a check for the following fold in DAGCombiner: // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) If 'x' is also a constant, then we shouldn't do anything. Otherwise, we could end up swapping the operands back and forth forever. This should fix: http://llvm.org/bugs/show_bug.cgi?id=22698 Differential Revision: http://reviews.llvm.org/D7917 llvm-svn: 230884
author: Sanjay Patel <spatel@rotateright.com> 2015-03-01 00:09:35 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-03-01 00:09:35 +0000
commit: b8c907e2a7bb021d9f6a2c7ff70e42a8bd8af87d (patch)
tree: 485d3e0c30724bcbd64c5b08ac9e34329f9b3aea
parent: d076b2a8791bb0627e312292f5bbaeb5b5b87130 (diff)
download: bcm5719-llvm-b8c907e2a7bb021d9f6a2c7ff70e42a8bd8af87d.tar.gz
bcm5719-llvm-b8c907e2a7bb021d9f6a2c7ff70e42a8bd8af87d.zip
2 files changed, 48 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8722c624797..9e2b5afde1e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7453,14 +7453,23 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       // Fold scalars or any vector constants (not just splats).
       // This fold is done in general by InstCombine, but extra fmul insts
       // may have been generated during lowering.
+      SDValue N00 = N0.getOperand(0);
       SDValue N01 = N0.getOperand(1);
       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+      auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-      if ((N1CFP && isConstOrConstSplatFP(N01)) ||
-          (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
-        SDLoc SL(N);
-        SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
-        return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+      
+      // Check 1: Make sure that the first operand of the inner multiply is NOT
+      // a constant. Otherwise, we may induce infinite looping.
+      if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
+        // Check 2: Make sure that the second operand of the inner multiply and
+        // the second operand of the outer multiply are constants.
+        if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+            (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+          SDLoc SL(N);
+          SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
+          return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts);
+        }
       }
     }
 
diff --git a/llvm/test/CodeGen/X86/fmul-combines.ll b/llvm/test/CodeGen/X86/fmul-combines.ll
index 703651153c1..7d75611e133 100644
--- a/llvm/test/CodeGen/X86/fmul-combines.ll
+++ b/llvm/test/CodeGen/X86/fmul-combines.ll
@@ -103,6 +103,40 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x)
   ret <4 x float> %a
 }
 
+; PR22698 - http://llvm.org/bugs/show_bug.cgi?id=22698
+; Make sure that we don't infinite loop swapping constants back and forth.
+
+define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
+  %mul1 = fmul fast <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <float 3.0, float 3.0, float 3.0, float 3.0>
+  %mul2 = fmul fast <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, %mul1
+  %mul3 = fmul fast <4 x float> %a, %mul2
+  ret <4 x float> %mul3
+
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK-LABEL: PR22698_splats:
+; CHECK: mulps
+; CHECK: ret
+}
+
+; Same as above, but verify that non-splat vectors are handled correctly too.
+define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
+  %mul1 = fmul fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <float 5.0, float 6.0, float 7.0, float 8.0>
+  %mul2 = fmul fast <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, %mul1
+  %mul3 = fmul fast <4 x float> %a, %mul2
+  ret <4 x float> %mul3
+
+; CHECK: float 4.500000e+01
+; CHECK: float 1.200000e+02
+; CHECK: float 2.310000e+02
+; CHECK: float 3.840000e+02
+; CHECK-LABEL: PR22698_no_splats:
+; CHECK: mulps
+; CHECK: ret
+}
+
 ; CHECK-LABEL: fmul_c2_c4_f32:
 ; CHECK-NOT: addss
 ; CHECK: mulss
author	Sanjay Patel <spatel@rotateright.com>	2015-03-01 00:09:35 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-03-01 00:09:35 +0000
commit	b8c907e2a7bb021d9f6a2c7ff70e42a8bd8af87d (patch)
tree	485d3e0c30724bcbd64c5b08ac9e34329f9b3aea
parent	d076b2a8791bb0627e312292f5bbaeb5b5b87130 (diff)
download	bcm5719-llvm-b8c907e2a7bb021d9f6a2c7ff70e42a8bd8af87d.tar.gz bcm5719-llvm-b8c907e2a7bb021d9f6a2c7ff70e42a8bd8af87d.zip