2 files changed, 148 insertions, 108 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 40c3b79e88d..664edc7c9e5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3270,6 +3270,63 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
   return T1;
 }
 
+// Given pattern:
+//   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
+// we should move shifts to the same hand of 'and', i.e. rewrite as
+//   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
+// We are only interested in opposite logical shifts here.
+// If we can, we want to end up creating 'lshr' shift.
+static Value *
+foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
+                                           InstCombiner::BuilderTy &Builder) {
+  if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
+      !I.getOperand(0)->hasOneUse())
+    return nullptr;
+
+  auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
+  auto m_AnyLShr = m_LShr(m_Value(), m_Value());
+
+  // Look for an 'and' of two (opposite) logical shifts.
+  // Pick the single-use shift as XShift.
+  Value *XShift, *YShift;
+  if (!match(I.getOperand(0),
+             m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
+                     m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
+    return nullptr;
+
+  // If YShift is a single-use 'lshr', swap the shifts around.
+  if (match(YShift, m_OneUse(m_AnyLShr)))
+    std::swap(XShift, YShift);
+
+  // The shifts must be in opposite directions.
+  Instruction::BinaryOps XShiftOpcode =
+      cast<BinaryOperator>(XShift)->getOpcode();
+  if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
+    return nullptr; // Do not care about same-direction shifts here.
+
+  Value *X, *XShAmt, *Y, *YShAmt;
+  match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
+  match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
+
+  // Can we fold (XShAmt+YShAmt) ?
+  Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
+                                  SQ.getWithInstruction(&I));
+  if (!NewShAmt)
+    return nullptr;
+  // Is the new shift amount smaller than the bit width?
+  // FIXME: could also rely on ConstantRange.
+  unsigned BitWidth = X->getType()->getScalarSizeInBits();
+  if (!match(NewShAmt, m_SpecificInt_ULT(APInt(BitWidth, BitWidth))))
+    return nullptr;
+  // All good, we can do this fold. The shift is the same that was for X.
+  Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
+                  ? Builder.CreateLShr(X, NewShAmt)
+                  : Builder.CreateShl(X, NewShAmt);
+  Value *T1 = Builder.CreateAnd(T0, Y);
+  return Builder.CreateICmp(I.getPredicate(), T1,
+                            Constant::getNullValue(X->getType()));
+}
+
 /// Try to fold icmp (binop), X or icmp X, (binop).
 /// TODO: A large part of this logic is duplicated in InstSimplify's
 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -3625,6 +3682,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
   if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
     return replaceInstUsesWith(I, V);
 
+  if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
+    return replaceInstUsesWith(I, V);
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
index 7ce0bc8d937..c0bce48b3be 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
@@ -11,11 +11,10 @@
 
 define i1 @t0_const_lshr_shl_ne(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t0_const_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -25,11 +24,10 @@ define i1 @t0_const_lshr_shl_ne(i32 %x, i32 %y) {
 }
 define i1 @t1_const_shl_lshr_ne(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t1_const_shl_lshr_ne(
-; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = shl i32 %x, 1
   %t1 = lshr i32 %y, 1
@@ -41,11 +39,10 @@ define i1 @t1_const_shl_lshr_ne(i32 %x, i32 %y) {
 ; We are ok with 'eq' predicate too.
 define i1 @t2_const_lshr_shl_eq(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t2_const_lshr_shl_eq(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp eq i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -58,13 +55,10 @@ define i1 @t2_const_lshr_shl_eq(i32 %x, i32 %y) {
 
 define i1 @t3_const_after_fold_lshr_shl_ne(i32 %x, i32 %y, i32 %len) {
 ; CHECK-LABEL: @t3_const_after_fold_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -1
-; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[Y:%.*]], [[T2]]
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = sub i32 32, %len
   %t1 = lshr i32 %x, %t0
@@ -76,13 +70,10 @@ define i1 @t3_const_after_fold_lshr_shl_ne(i32 %x, i32 %y, i32 %len) {
 }
 define i1 @t4_const_after_fold_lshr_shl_ne(i32 %x, i32 %y, i32 %len) {
 ; CHECK-LABEL: @t4_const_after_fold_lshr_shl_ne(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[LEN]], -1
-; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[Y:%.*]], [[T2]]
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[T3]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp ne i32 [[T4]], 0
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = sub i32 32, %len
   %t1 = shl i32 %x, %t0
@@ -128,11 +119,10 @@ define i1 @t6_const_shl_lshr_ne(i32 %x, i32 %y, i32 %shamt0, i32 %shamt1) {
 
 define <2 x i1> @t7_const_lshr_shl_ne_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t7_const_lshr_shl_ne_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t0 = lshr <2 x i32> %x, <i32 1, i32 1>
   %t1 = shl <2 x i32> %y, <i32 1, i32 1>
@@ -142,11 +132,10 @@ define <2 x i1> @t7_const_lshr_shl_ne_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 }
 define <2 x i1> @t8_const_lshr_shl_ne_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t8_const_lshr_shl_ne_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 1, i32 2>
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t0 = lshr <2 x i32> %x, <i32 1, i32 2>
   %t1 = shl <2 x i32> %y, <i32 3, i32 4>
@@ -156,11 +145,10 @@ define <2 x i1> @t8_const_lshr_shl_ne_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 }
 define <3 x i1> @t9_const_lshr_shl_ne_vec_undef0(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t9_const_lshr_shl_ne_vec_undef0(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -170,11 +158,10 @@ define <3 x i1> @t9_const_lshr_shl_ne_vec_undef0(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t10_const_lshr_shl_ne_vec_undef1(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t10_const_lshr_shl_ne_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -184,11 +171,10 @@ define <3 x i1> @t10_const_lshr_shl_ne_vec_undef1(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t11_const_lshr_shl_ne_vec_undef2(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t11_const_lshr_shl_ne_vec_undef2(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -198,11 +184,10 @@ define <3 x i1> @t11_const_lshr_shl_ne_vec_undef2(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t12_const_lshr_shl_ne_vec_undef3(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t12_const_lshr_shl_ne_vec_undef3(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -212,11 +197,10 @@ define <3 x i1> @t12_const_lshr_shl_ne_vec_undef3(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t13_const_lshr_shl_ne_vec_undef4(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t13_const_lshr_shl_ne_vec_undef4(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -226,11 +210,10 @@ define <3 x i1> @t13_const_lshr_shl_ne_vec_undef4(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t14_const_lshr_shl_ne_vec_undef5(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t14_const_lshr_shl_ne_vec_undef5(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1>
@@ -240,11 +223,10 @@ define <3 x i1> @t14_const_lshr_shl_ne_vec_undef5(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t15_const_lshr_shl_ne_vec_undef6(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t15_const_lshr_shl_ne_vec_undef6(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl <3 x i32> [[Y:%.*]], <i32 1, i32 undef, i32 1>
-; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[T2]], <i32 0, i32 undef, i32 0>
-; CHECK-NEXT:    ret <3 x i1> [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP3]]
 ;
   %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1>
   %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1>
@@ -260,11 +242,10 @@ declare i32 @gen32()
 define i1 @t16_commutativity0(i32 %x) {
 ; CHECK-LABEL: @t16_commutativity0(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -277,11 +258,10 @@ define i1 @t16_commutativity0(i32 %x) {
 define i1 @t17_commutativity1(i32 %y) {
 ; CHECK-LABEL: @t17_commutativity1(
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -299,10 +279,10 @@ define i1 @t18_const_oneuse0(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t18_const_oneuse0(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   call void @use32(i32 %t0)
@@ -313,12 +293,12 @@ define i1 @t18_const_oneuse0(i32 %x, i32 %y) {
 }
 define i1 @t19_const_oneuse1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @t19_const_oneuse1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = lshr i32 %x, 1
   %t1 = shl i32 %y, 1
@@ -548,10 +528,10 @@ define i1 @t32_commutativity0_oneuse0(i32 %x) {
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -564,12 +544,12 @@ define i1 @t32_commutativity0_oneuse0(i32 %x) {
 define i1 @t33_commutativity0_oneuse1(i32 %x) {
 ; CHECK-LABEL: @t33_commutativity0_oneuse1(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %y = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -585,10 +565,10 @@ define i1 @t34_commutativity1_oneuse0(i32 %y) {
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
 ; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1
@@ -601,12 +581,12 @@ define i1 @t34_commutativity1_oneuse0(i32 %y) {
 define i1 @t35_commutativity1_oneuse1(i32 %y) {
 ; CHECK-LABEL: @t35_commutativity1_oneuse1(
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T0:%.*]] = lshr i32 [[X]], 1
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[Y:%.*]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[T1]])
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne i32 [[T2]], 0
-; CHECK-NEXT:    ret i1 [[T3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %x = call i32 @gen32()
   %t0 = lshr i32 %x, 1