4 files changed, 19 insertions, 37 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index d482b539a9d..28d27b7a459 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1602,10 +1602,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V);
 bool isBitwiseNot(SDValue V);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant float.
-ConstantFPSDNode *isConstOrConstSplatFP(SDValue N);
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
 
 class GlobalAddressSDNode : public SDNode {
   friend class SelectionDAG;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3872f2d0142..ec7f63a3d3f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11011,8 +11011,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 SDValue DAGCombiner::visitFSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
-  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
@@ -11044,9 +11044,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
       return DAG.getConstantFP(0.0f, DL, VT);
   }
 
-  // (fsub 0, B) -> -B
+  // (fsub -0.0, N1) -> -N1
   if (N0CFP && N0CFP->isZero()) {
-    if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
+    if (N0CFP->isNegative() ||
+        (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
         return GetNegatedExpression(N1, DAG, LegalOperations);
       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a8843314ef3..0f8bd080867 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8211,7 +8211,7 @@ bool llvm::isBitwiseNot(SDValue V) {
   return C && C->isAllOnesValue();
 }
 
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
     return CN;
 
@@ -8220,9 +8220,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
 
     // BuildVectors can truncate their operands. Ignore that case here.
-    // FIXME: We blindly ignore splats which include undef which is overly
-    // pessimistic.
-    if (CN && UndefElements.none() &&
+    if (CN && (UndefElements.none() || AllowUndefs) &&
         CN->getValueType(0) == N.getValueType().getScalarType())
       return CN;
   }
@@ -8230,15 +8228,14 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
   return nullptr;
 }
 
-ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
     return CN;
 
   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
     BitVector UndefElements;
     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
-
-    if (CN && UndefElements.none())
+    if (CN && (UndefElements.none() || AllowUndefs))
       return CN;
   }
 
diff --git a/llvm/test/CodeGen/X86/vec_fneg.ll b/llvm/test/CodeGen/X86/vec_fneg.ll
index e9dc88ea433..e84f7163bbe 100644
--- a/llvm/test/CodeGen/X86/vec_fneg.ll
+++ b/llvm/test/CodeGen/X86/vec_fneg.ll
@@ -102,16 +102,12 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind {
 define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
 ; X32-SSE-LABEL: fneg_undef_elts_v4f32:
 ; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movaps {{.*#+}} xmm1 = <-0,u,u,-0>
-; X32-SSE-NEXT:    subps %xmm0, %xmm1
-; X32-SSE-NEXT:    movaps %xmm1, %xmm0
+; X32-SSE-NEXT:    xorps {{\.LCPI.*}}, %xmm0
 ; X32-SSE-NEXT:    retl
 ;
 ; X64-SSE-LABEL: fneg_undef_elts_v4f32:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movaps {{.*#+}} xmm1 = <-0,u,u,-0>
-; X64-SSE-NEXT:    subps %xmm0, %xmm1
-; X64-SSE-NEXT:    movaps %xmm1, %xmm0
+; X64-SSE-NEXT:    xorps {{.*}}(%rip), %xmm0
 ; X64-SSE-NEXT:    retq
   %r = fsub <4 x float> <float -0.0, float undef, float undef, float -0.0>, %x
   ret <4 x float> %r
@@ -120,25 +116,13 @@ define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
 ; This isn't fneg, but similarly check that (X - 0.0) is simplified.
 
 define <4 x float> @fsub0_undef_elts_v4f32(<4 x float> %x) {
-; X32-SSE1-LABEL: fsub0_undef_elts_v4f32:
-; X32-SSE1:       # %bb.0:
-; X32-SSE1-NEXT:    retl
-;
-; X32-SSE2-LABEL: fsub0_undef_elts_v4f32:
-; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    xorps %xmm1, %xmm1
-; X32-SSE2-NEXT:    subps %xmm1, %xmm0
-; X32-SSE2-NEXT:    retl
-;
-; X64-SSE1-LABEL: fsub0_undef_elts_v4f32:
-; X64-SSE1:       # %bb.0:
-; X64-SSE1-NEXT:    retq
+; X32-SSE-LABEL: fsub0_undef_elts_v4f32:
+; X32-SSE:       # %bb.0:
+; X32-SSE-NEXT:    retl
 ;
-; X64-SSE2-LABEL: fsub0_undef_elts_v4f32:
-; X64-SSE2:       # %bb.0:
-; X64-SSE2-NEXT:    xorps %xmm1, %xmm1
-; X64-SSE2-NEXT:    subps %xmm1, %xmm0
-; X64-SSE2-NEXT:    retq
+; X64-SSE-LABEL: fsub0_undef_elts_v4f32:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    retq
   %r = fsub <4 x float> %x, <float 0.0, float undef, float 0.0, float undef>
   ret <4 x float> %r
 }