[InstCombine] Shift amount reassociation in bittest: trunc-of-lshr (PR42399)

Summary: Finally, the fold i was looking forward to :) The legality check is muddy, i doubt i've groked the full generalization, but it handles all the cases i care about, and can come up with: https://rise4fun.com/Alive/26j I.e. we can perform the fold if **any** of the following is true: * The shift amount is either zero or one less than widest bitwidth * Either of the values being shifted has at most lowest bit set * The value that is being shifted by `shl` (which is not truncated) should have no less leading zeros than the total shift amount; * The value that is being shifted by `lshr` (which **is** truncated) should have no less leading zeros than the widest bit width minus total shift amount minus one I strongly suspect there is some better generalization, but i'm not aware of it as of right now. For now i also avoided using actual `computeKnownBits()`, but restricted it to constants. Reviewers: spatel, nikic, xbolva00 Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66383 llvm-svn: 370324
author: Roman Lebedev <lebedev.ri@gmail.com> 2019-08-29 10:26:23 +0000
committer: Roman Lebedev <lebedev.ri@gmail.com> 2019-08-29 10:26:23 +0000
commit: f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8 (patch)
tree: ae05376466f10cfb5faa01e3f08c58cbc241f1e8 /llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
parent: c65204148c13d6c2cdfa018b2dd2bf8c306cc7a5 (diff)
download: bcm5719-llvm-f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8.tar.gz
bcm5719-llvm-f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8.zip
1 files changed, 58 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 940e5554bb3..1117586549e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3379,7 +3379,7 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
 // we should move shifts to the same hand of 'and', i.e. rewrite as
 //   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
 // We are only interested in opposite logical shifts here.
-// One of the shifts can be truncated. For now, it can only be 'shl'.
+// One of the shifts can be truncated.
 // If we can, we want to end up creating 'lshr' shift.
 static Value *
 foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
@@ -3413,14 +3413,6 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
          "We did not look past any shifts while matching XShift though.");
   bool HadTrunc = WidestTy != I.getOperand(0)->getType();
 
-  if (HadTrunc) {
-    // We did indeed have a truncation. For now, let's only proceed if the 'shl'
-    // was truncated, since that does not require any extra legality checks.
-    // FIXME: trunc-of-lshr.
-    if (!match(YShift, m_Shl(m_Value(), m_Value())))
-      return nullptr;
-  }
-
   // If YShift is a 'lshr', swap the shifts around.
   if (match(YShift, m_LShr(m_Value(), m_Value())))
     std::swap(XShift, YShift);
@@ -3462,16 +3454,68 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
                       /*isNUW=*/false, SQ.getWithInstruction(&I)));
   if (!NewShAmt)
     return nullptr;
+  NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);
+  unsigned WidestBitWidth = WidestTy->getScalarSizeInBits();
+
   // Is the new shift amount smaller than the bit width?
   // FIXME: could also rely on ConstantRange.
-  if (!match(NewShAmt, m_SpecificInt_ICMP(
-                           ICmpInst::Predicate::ICMP_ULT,
-                           APInt(NewShAmt->getType()->getScalarSizeInBits(),
-                                 WidestTy->getScalarSizeInBits()))))
+  if (!match(NewShAmt,
+             m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+                                APInt(WidestBitWidth, WidestBitWidth))))
     return nullptr;
+
+  // An extra legality check is needed if we had trunc-of-lshr.
+  if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) {
+    auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ,
+                    WidestShift]() {
+      // It isn't obvious whether it's worth it to analyze non-constants here.
+      // Also, let's basically give up on non-splat cases, pessimizing vectors.
+      // If *any* of these preconditions matches we can perform the fold.
+      Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy()
+                                    ? NewShAmt->getSplatValue()
+                                    : NewShAmt;
+      // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold.
+      if (NewShAmtSplat &&
+          (NewShAmtSplat->isNullValue() ||
+           NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1))
+        return true;
+      // We consider *min* leading zeros so a single outlier
+      // blocks the transform as opposed to allowing it.
+      if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) {
+        KnownBits Known = computeKnownBits(C, SQ.DL);
+        unsigned MinLeadZero = Known.countMinLeadingZeros();
+        // If the value being shifted has at most lowest bit set we can fold.
+        unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
+        if (MaxActiveBits <= 1)
+          return true;
+        // Precondition:  NewShAmt u<= countLeadingZeros(C)
+        if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero))
+          return true;
+      }
+      if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) {
+        KnownBits Known = computeKnownBits(C, SQ.DL);
+        unsigned MinLeadZero = Known.countMinLeadingZeros();
+        // If the value being shifted has at most lowest bit set we can fold.
+        unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
+        if (MaxActiveBits <= 1)
+          return true;
+        // Precondition:  ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C)
+        if (NewShAmtSplat) {
+          APInt AdjNewShAmt =
+              (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger();
+          if (AdjNewShAmt.ule(MinLeadZero))
+            return true;
+        }
+      }
+      return false; // Can't tell if it's ok.
+    };
+    if (!CanFold())
+      return nullptr;
+  }
+
   // All good, we can do this fold.
-  NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);
   X = Builder.CreateZExt(X, WidestTy);
+  Y = Builder.CreateZExt(Y, WidestTy);
   // The shift is the same that was for X.
   Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
                   ? Builder.CreateLShr(X, NewShAmt)
author	Roman Lebedev <lebedev.ri@gmail.com>	2019-08-29 10:26:23 +0000
committer	Roman Lebedev <lebedev.ri@gmail.com>	2019-08-29 10:26:23 +0000
commit	f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8 (patch)
tree	ae05376466f10cfb5faa01e3f08c58cbc241f1e8 /llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
parent	c65204148c13d6c2cdfa018b2dd2bf8c306cc7a5 (diff)
download	bcm5719-llvm-f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8.tar.gz bcm5719-llvm-f13b0e3ed89f4247c0dfa3147dc3a18d10c96ee8.zip