[X86][TLI] DAGCombine: Unfold variable bit-clearing mask to two shifts.

Summary: This adds a reverse transform for the instcombine canonicalizations that were added in D47980, D47981. As discussed later, that was worse at least for the code size, and potentially for the performance, too. https://rise4fun.com/Alive/Zmpl Reviewers: craig.topper, RKSimon, spatel Reviewed By: spatel Subscribers: reames, llvm-commits Differential Revision: https://reviews.llvm.org/D48768 llvm-svn: 336585
author: Roman Lebedev <lebedev.ri@gmail.com> 2018-07-09 19:06:42 +0000
committer: Roman Lebedev <lebedev.ri@gmail.com> 2018-07-09 19:06:42 +0000
commit: 5ccae1750b26871670015755f2d2f1acd4860cb7 (patch)
tree: 269c2d892dcba4907b3862e286146bf4d96ed295 /llvm/lib
parent: 22a0c8dbc8bc008928b319d44ad296738940fe72 (diff)
download: bcm5719-llvm-5ccae1750b26871670015755f2d2f1acd4860cb7.tar.gz
bcm5719-llvm-5ccae1750b26871670015755f2d2f1acd4860cb7.zip
3 files changed, 74 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 42bc330c641..34be54db23b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -409,6 +409,7 @@ namespace {
     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
                               const SDLoc &DL);
     SDValue unfoldMaskedMerge(SDNode *N);
+    SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                           const SDLoc &DL, bool foldBooleans);
     SDValue rebuildSetCC(SDValue N);
@@ -4169,6 +4170,60 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
   return false;
 }
 
+// Unfold
+//    x &  (-1 'logical shift' y)
+// To
+//    (x 'opposite logical shift' y) 'logical shift' y
+// if it is better for performance.
+SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
+  assert(N->getOpcode() == ISD::AND);
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Do we actually prefer shifts over mask?
+  if (!TLI.preferShiftsToClearExtremeBits(N0))
+    return SDValue();
+
+  // Try to match  (-1 '[outer] logical shift' y)
+  unsigned OuterShift;
+  unsigned InnerShift; // The opposite direction to the OuterShift.
+  SDValue Y;           // Shift amount.
+  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
+    if (!M.hasOneUse())
+      return false;
+    OuterShift = M->getOpcode();
+    if (OuterShift == ISD::SHL)
+      InnerShift = ISD::SRL;
+    else if (OuterShift == ISD::SRL)
+      InnerShift = ISD::SHL;
+    else
+      return false;
+    if (!isAllOnesConstant(M->getOperand(0)))
+      return false;
+    Y = M->getOperand(1);
+    return true;
+  };
+
+  SDValue X;
+  if (matchMask(N1))
+    X = N0;
+  else if (matchMask(N0))
+    X = N1;
+  else
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  //     tmp = x   'opposite logical shift' y
+  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
+  //     ret = tmp 'logical shift' y
+  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
+
+  return T1;
+}
+
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -4466,6 +4521,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return BSwap;
   }
 
+  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
+    return Shifts;
+
   return SDValue();
 }
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 321fb950310..b0efc4d1ed5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4786,6 +4786,20 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const {
   return Subtarget.hasSSE2();
 }
 
+bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
+  EVT VT = Y.getValueType();
+
+  // For vectors, we don't have a preference, but we probably want a mask.
+  if (VT.isVector())
+    return false;
+
+  // 64-bit shifts on 32-bit targets produce really bad bloated code.
+  if (VT == MVT::i64 && !Subtarget.is64Bit())
+    return false;
+
+  return true;
+}
+
 MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
   MVT VT = MVT::getIntegerVT(NumBits);
   if (isTypeLegal(VT))
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index d7e33442181..4fadf0543c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -828,6 +828,8 @@ namespace llvm {
 
     bool hasAndNot(SDValue Y) const override;
 
+    bool preferShiftsToClearExtremeBits(SDValue Y) const override;
+
     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
       return VT.isScalarInteger();
     }
author	Roman Lebedev <lebedev.ri@gmail.com>	2018-07-09 19:06:42 +0000
committer	Roman Lebedev <lebedev.ri@gmail.com>	2018-07-09 19:06:42 +0000
commit	5ccae1750b26871670015755f2d2f1acd4860cb7 (patch)
tree	269c2d892dcba4907b3862e286146bf4d96ed295 /llvm/lib
parent	22a0c8dbc8bc008928b319d44ad296738940fe72 (diff)
download	bcm5719-llvm-5ccae1750b26871670015755f2d2f1acd4860cb7.tar.gz bcm5719-llvm-5ccae1750b26871670015755f2d2f1acd4860cb7.zip