[x86] use demanded bits to simplify masked store codegen

As noted in D52747, if we prefer IR to use trunc for bool vectors rather than and+icmp, we can expose codegen shortcomings as seen here with masked store. Replace a hard-coded PCMPGT simplification with the more general demanded bits call to improve things. Differential Revision: https://reviews.llvm.org/D52964 llvm-svn: 344048
author: Sanjay Patel <spatel@rotateright.com> 2018-10-09 14:04:14 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-10-09 14:04:14 +0000
commit: f5fac1826a867d5e65310456670cc1d5a495ed02 (patch)
tree: 8b91cb196a284c34941c1b8346d3b386adeecf10 /llvm/lib/Target/X86
parent: 0b24a86f6322d1fa3e458b8114fa9a88f39ea360 (diff)
download: bcm5719-llvm-f5fac1826a867d5e65310456670cc1d5a495ed02.tar.gz
bcm5719-llvm-f5fac1826a867d5e65310456670cc1d5a495ed02.zip
1 files changed, 16 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 76d5f8e9d31..2256fe02696 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36521,31 +36521,31 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
 }
 
 static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
                                   const X86Subtarget &Subtarget) {
   MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
-
   if (Mst->isCompressingStore())
     return SDValue();
 
+  EVT VT = Mst->getValue().getValueType();
   if (!Mst->isTruncatingStore()) {
     if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
       return ScalarStore;
 
-    // If the mask is checking (0 > X), we're creating a vector with all-zeros
-    // or all-ones elements based on the sign bits of X. AVX1 masked store only
-    // cares about the sign bit of each mask element, so eliminate the compare:
-    // mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X
-    // Note that by waiting to match an x86-specific PCMPGT node, we're
-    // eliminating potentially more complex matching of a setcc node which has
-    // a full range of predicates.
+    // If the mask value has been legalized to a non-boolean vector, try to
+    // simplify ops leading up to it. We only demand the MSB of each lane.
     SDValue Mask = Mst->getMask();
-    if (Mask.getOpcode() == X86ISD::PCMPGT &&
-        ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) {
-      assert(Mask.getValueType() == Mask.getOperand(1).getValueType() &&
-             "Unexpected type for PCMPGT");
-      return DAG.getMaskedStore(
-          Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(),
-          Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand());
+    if (Mask.getScalarValueSizeInBits() != 1) {
+      TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                            !DCI.isBeforeLegalizeOps());
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
+      KnownBits Known;
+      if (TLI.SimplifyDemandedBits(Mask, DemandedMask, Known, TLO)) {
+        DCI.AddToWorklist(Mask.getNode());
+        DCI.CommitTargetLoweringOpt(TLO);
+        return SDValue(N, 0);
+      }
     }
 
     // TODO: AVX512 targets should also be able to simplify something like the
@@ -36556,7 +36556,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
   }
 
   // Resolve truncating stores.
-  EVT VT = Mst->getValue().getValueType();
   unsigned NumElems = VT.getVectorNumElements();
   EVT StVT = Mst->getMemoryVT();
   SDLoc dl(Mst);
@@ -40382,7 +40381,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::LOAD:           return combineLoad(N, DAG, DCI, Subtarget);
   case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return combineStore(N, DAG, Subtarget);
-  case ISD::MSTORE:         return combineMaskedStore(N, DAG, Subtarget);
+  case ISD::MSTORE:         return combineMaskedStore(N, DAG, DCI, Subtarget);
   case ISD::SINT_TO_FP:     return combineSIntToFP(N, DAG, Subtarget);
   case ISD::UINT_TO_FP:     return combineUIntToFP(N, DAG, Subtarget);
   case ISD::FADD:
author	Sanjay Patel <spatel@rotateright.com>	2018-10-09 14:04:14 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-10-09 14:04:14 +0000
commit	f5fac1826a867d5e65310456670cc1d5a495ed02 (patch)
tree	8b91cb196a284c34941c1b8346d3b386adeecf10 /llvm/lib/Target/X86
parent	0b24a86f6322d1fa3e458b8114fa9a88f39ea360 (diff)
download	bcm5719-llvm-f5fac1826a867d5e65310456670cc1d5a495ed02.tar.gz bcm5719-llvm-f5fac1826a867d5e65310456670cc1d5a495ed02.zip