[InstCombine] canonicalize rotate patterns with cmp/select

The cmp+branch variant of this pattern is shown in: https://bugs.llvm.org/show_bug.cgi?id=34924 ...and as discussed there, we probably can't transform that without a rotate intrinsic. We do have that now via funnel shift, but we're not quite ready to canonicalize IR to that form yet. The case with 'select' should already be transformed though, so that's this patch. The sequence with negation followed by masking is what we use in the backend and partly in clang (though that part should be updated). https://rise4fun.com/Alive/TplC %cmp = icmp eq i32 %shamt, 0 %sub = sub i32 32, %shamt %shr = lshr i32 %x, %shamt %shl = shl i32 %x, %sub %or = or i32 %shr, %shl %r = select i1 %cmp, i32 %x, i32 %or => %neg = sub i32 0, %shamt %masked = and i32 %shamt, 31 %maskedneg = and i32 %neg, 31 %shl2 = lshr i32 %x, %masked %shr2 = shl i32 %x, %maskedneg %r = or i32 %shl2, %shr2 llvm-svn: 346807
author: Sanjay Patel <spatel@rotateright.com> 2018-11-13 22:47:24 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-11-13 22:47:24 +0000
commit: f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33 (patch)
tree: 259dcf63ddac790b771d072158e2470d6744dd8c /llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
parent: e19dc6137fadf44aac0385357e3169350a2061c0 (diff)
download: bcm5719-llvm-f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33.tar.gz
bcm5719-llvm-f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33.zip
1 files changed, 63 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 88a72bb8eb5..26d0b522f01 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1546,6 +1546,66 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
   return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp);
 }
 
+/// Try to reduce a rotate pattern that includes a compare and select into a
+/// sequence of ALU ops only. Example:
+/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
+///              --> (a >> (-b & 31)) | (a << (b & 31))
+static Instruction *foldSelectRotate(SelectInst &Sel,
+                                     InstCombiner::BuilderTy &Builder) {
+  // The false value of the select must be a rotate of the true value.
+  Value *Or0, *Or1;
+  if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1)))))
+    return nullptr;
+
+  Value *TVal = Sel.getTrueValue();
+  Value *SA0, *SA1;
+  if (!match(Or0, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA0)))) ||
+      !match(Or1, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA1)))))
+    return nullptr;
+
+  auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
+  auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+  if (ShiftOpcode0 == ShiftOpcode1)
+    return nullptr;
+
+  // We have one of these patterns so far:
+  // select ?, TVal, (or (lshr TVal, SA0), (shl TVal, SA1))
+  // select ?, TVal, (or (shl TVal, SA0), (lshr TVal, SA1))
+  // This must be a power-of-2 rotate for a bitmasking transform to be valid.
+  unsigned Width = Sel.getType()->getScalarSizeInBits();
+  if (!isPowerOf2_32(Width))
+    return nullptr;
+
+  // Check the shift amounts to see if they are an opposite pair.
+  Value *ShAmt;
+  if (match(SA1, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA0)))))
+    ShAmt = SA0;
+  else if (match(SA0, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA1)))))
+    ShAmt = SA1;
+  else
+    return nullptr;
+
+  // Finally, see if the select is filtering out a shift-by-zero.
+  Value *Cond = Sel.getCondition();
+  ICmpInst::Predicate Pred;
+  if (!match(Cond, m_OneUse(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()))) ||
+      Pred != ICmpInst::ICMP_EQ)
+    return nullptr;
+
+  // This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way.
+  // Convert to safely bitmasked shifts.
+  // TODO: When we can canonicalize to funnel shift intrinsics without risk of
+  // performance regressions, replace this sequence with that call.
+  Value *NegShAmt = Builder.CreateNeg(ShAmt);
+  Value *MaskedShAmt = Builder.CreateAnd(ShAmt, Width - 1);
+  Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, Width - 1);
+  Value *NewSA0 = ShAmt == SA0 ? MaskedShAmt : MaskedNegShAmt;
+  Value *NewSA1 = ShAmt == SA1 ? MaskedShAmt : MaskedNegShAmt;
+  Value *NewSh0 = Builder.CreateBinOp(ShiftOpcode0, TVal, NewSA0);
+  Value *NewSh1 = Builder.CreateBinOp(ShiftOpcode1, TVal, NewSA1);
+  return BinaryOperator::CreateOr(NewSh0, NewSh1);
+}
+
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -2010,5 +2070,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI))
     return Select;
 
+  if (Instruction *Rot = foldSelectRotate(SI, Builder))
+    return Rot;
+
   return nullptr;
 }
author	Sanjay Patel <spatel@rotateright.com>	2018-11-13 22:47:24 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-11-13 22:47:24 +0000
commit	f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33 (patch)
tree	259dcf63ddac790b771d072158e2470d6744dd8c /llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
parent	e19dc6137fadf44aac0385357e3169350a2061c0 (diff)
download	bcm5719-llvm-f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33.tar.gz bcm5719-llvm-f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33.zip