[SelectionDAG] allow undefs when matching splat constants

And use that to transform fsub with zero constant operands. The integer part isn't used yet, but it is proposed for use in D44548, so adding both enhancements here makes that patch simpler. llvm-svn: 343865
author: Sanjay Patel <spatel@rotateright.com> 2018-10-05 17:42:19 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-10-05 17:42:19 +0000
commit: f6a160a1023079f02e89917c6d7a74f90f12e23c (patch)
tree: 6336b58521061a26f39d4a8bb42a0b2d7cbd0fc0
parent: 7875142b5c02eec06aeb6be5ffe18ca71a0dfeab (diff)
download: bcm5719-llvm-f6a160a1023079f02e89917c6d7a74f90f12e23c.tar.gz
bcm5719-llvm-f6a160a1023079f02e89917c6d7a74f90f12e23c.zip
4 files changed, 19 insertions, 37 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index d482b539a9d..28d27b7a459 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1602,10 +1602,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V);
 bool isBitwiseNot(SDValue V);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant float.
-ConstantFPSDNode *isConstOrConstSplatFP(SDValue N);
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
 
 class GlobalAddressSDNode : public SDNode {
   friend class SelectionDAG;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3872f2d0142..ec7f63a3d3f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11011,8 +11011,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 SDValue DAGCombiner::visitFSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
-  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
@@ -11044,9 +11044,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
       return DAG.getConstantFP(0.0f, DL, VT);
   }
 
-  // (fsub 0, B) -> -B
+  // (fsub -0.0, N1) -> -N1
   if (N0CFP && N0CFP->isZero()) {
-    if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
+    if (N0CFP->isNegative() ||
+        (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
         return GetNegatedExpression(N1, DAG, LegalOperations);
       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a8843314ef3..0f8bd080867 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8211,7 +8211,7 @@ bool llvm::isBitwiseNot(SDValue V) {
   return C && C->isAllOnesValue();
 }
 
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
     return CN;
 
@@ -8220,9 +8220,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
 
     // BuildVectors can truncate their operands. Ignore that case here.
-    // FIXME: We blindly ignore splats which include undef which is overly
-    // pessimistic.
-    if (CN && UndefElements.none() &&
+    if (CN && (UndefElements.none() || AllowUndefs) &&
         CN->getValueType(0) == N.getValueType().getScalarType())
       return CN;
   }
@@ -8230,15 +8228,14 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
   return nullptr;
 }
 
-ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
     return CN;
 
   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
     BitVector UndefElements;
     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
-
-    if (CN && UndefElements.none())
+    if (CN && (UndefElements.none() || AllowUndefs))
       return CN;
   }
 
diff --git a/llvm/test/CodeGen/X86/vec_fneg.ll b/llvm/test/CodeGen/X86/vec_fneg.ll
index e9dc88ea433..e84f7163bbe 100644
--- a/llvm/test/CodeGen/X86/vec_fneg.ll
+++ b/llvm/test/CodeGen/X86/vec_fneg.ll
@@ -102,16 +102,12 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind {
 define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
 ; X32-SSE-LABEL: fneg_undef_elts_v4f32:
 ; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movaps {{.*#+}} xmm1 = <-0,u,u,-0>
-; X32-SSE-NEXT:    subps %xmm0, %xmm1
-; X32-SSE-NEXT:    movaps %xmm1, %xmm0
+; X32-SSE-NEXT:    xorps {{\.LCPI.*}}, %xmm0
 ; X32-SSE-NEXT:    retl
 ;
 ; X64-SSE-LABEL: fneg_undef_elts_v4f32:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movaps {{.*#+}} xmm1 = <-0,u,u,-0>
-; X64-SSE-NEXT:    subps %xmm0, %xmm1
-; X64-SSE-NEXT:    movaps %xmm1, %xmm0
+; X64-SSE-NEXT:    xorps {{.*}}(%rip), %xmm0
 ; X64-SSE-NEXT:    retq
   %r = fsub <4 x float> <float -0.0, float undef, float undef, float -0.0>, %x
   ret <4 x float> %r
@@ -120,25 +116,13 @@ define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
 ; This isn't fneg, but similarly check that (X - 0.0) is simplified.
 
 define <4 x float> @fsub0_undef_elts_v4f32(<4 x float> %x) {
-; X32-SSE1-LABEL: fsub0_undef_elts_v4f32:
-; X32-SSE1:       # %bb.0:
-; X32-SSE1-NEXT:    retl
-;
-; X32-SSE2-LABEL: fsub0_undef_elts_v4f32:
-; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    xorps %xmm1, %xmm1
-; X32-SSE2-NEXT:    subps %xmm1, %xmm0
-; X32-SSE2-NEXT:    retl
-;
-; X64-SSE1-LABEL: fsub0_undef_elts_v4f32:
-; X64-SSE1:       # %bb.0:
-; X64-SSE1-NEXT:    retq
+; X32-SSE-LABEL: fsub0_undef_elts_v4f32:
+; X32-SSE:       # %bb.0:
+; X32-SSE-NEXT:    retl
 ;
-; X64-SSE2-LABEL: fsub0_undef_elts_v4f32:
-; X64-SSE2:       # %bb.0:
-; X64-SSE2-NEXT:    xorps %xmm1, %xmm1
-; X64-SSE2-NEXT:    subps %xmm1, %xmm0
-; X64-SSE2-NEXT:    retq
+; X64-SSE-LABEL: fsub0_undef_elts_v4f32:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    retq
   %r = fsub <4 x float> %x, <float 0.0, float undef, float 0.0, float undef>
   ret <4 x float> %r
 }
author	Sanjay Patel <spatel@rotateright.com>	2018-10-05 17:42:19 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-10-05 17:42:19 +0000
commit	f6a160a1023079f02e89917c6d7a74f90f12e23c (patch)
tree	6336b58521061a26f39d4a8bb42a0b2d7cbd0fc0
parent	7875142b5c02eec06aeb6be5ffe18ca71a0dfeab (diff)
download	bcm5719-llvm-f6a160a1023079f02e89917c6d7a74f90f12e23c.tar.gz bcm5719-llvm-f6a160a1023079f02e89917c6d7a74f90f12e23c.zip