3 files changed, 16 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 614525b1f52..ac324fe4c3d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10351,13 +10351,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     // Selection pass has a hard time dealing with FP constants.
     bool AllowNewConst = (Level < AfterLegalizeDAG);
 
-    // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
-    if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
-        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
-      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
-                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
-                                     Flags),
-                         Flags);
+    // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
+    if (N1CFP && N0.getOpcode() == ISD::FADD &&
+        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+      SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
+      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
+    }
 
     // If allowed, fold (fadd (fneg x), x) -> 0.0
     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll
index f605efe9862..3c5524f0919 100644
--- a/llvm/test/CodeGen/AArch64/fadd-combines.ll
+++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll
@@ -112,14 +112,19 @@ define float @fadd_const_multiuse_fmf(float %x) {
   ret float %a3
 }
 
+; DAGCombiner transforms this into: (x + 59.0) + (x + 17.0).
+; The machine combiner transforms this into a chain of 3 dependent adds:
+; ((x + 59.0) + 17.0) + x
+
 define float @fadd_const_multiuse_attr(float %x) #0 {
 ; CHECK-LABEL: fadd_const_multiuse_attr:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x9, .LCPI8_1
 ; CHECK-NEXT:    adrp x8, .LCPI8_0
-; CHECK-NEXT:    ldr s1, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT:    fadd s0, s0, s1
-; CHECK-NEXT:    fmov s1, #17.00000000
+; CHECK-NEXT:    ldr s1, [x9, :lo12:.LCPI8_1]
+; CHECK-NEXT:    ldr s2, [x8, :lo12:.LCPI8_0]
 ; CHECK-NEXT:    fadd s1, s0, s1
+; CHECK-NEXT:    fadd s1, s2, s1
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
   %a1 = fadd float %x, 42.0
diff --git a/llvm/test/CodeGen/X86/fadd-combines.ll b/llvm/test/CodeGen/X86/fadd-combines.ll
index 1f9740da49d..f9d899e44da 100644
--- a/llvm/test/CodeGen/X86/fadd-combines.ll
+++ b/llvm/test/CodeGen/X86/fadd-combines.ll
@@ -221,17 +221,16 @@ define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {
   ret <4 x float> %z
 }
 
-; FIXME:
 ; ((x + 42.0) + 17.0) + (x + 42.0) --> (x + 59.0) + (x + 17.0)
-; It's still 3 adds, but the first two are independent.
+; It's still 3 adds, but the first 2 are independent.
 ; More reassocation could get this to 2 adds or 1 FMA (that's done in IR, but not in the DAG).
 
 define float @fadd_const_multiuse_attr(float %x) #0 {
 ; CHECK-LABEL: fadd_const_multiuse_attr:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addss {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    addss %xmm0, %xmm1
+; CHECK-NEXT:    addss {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    addss %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %a1 = fadd float %x, 42.0