5 files changed, 46 insertions, 198 deletions
diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h
index 477b25384a1..d4384609762 100644
--- a/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/llvm/include/llvm/Analysis/DemandedBits.h
@@ -44,11 +44,6 @@ public:
     F(F), AC(AC), DT(DT) {}
 
   /// Return the bits demanded from instruction I.
-  ///
-  /// The instruction must have integer of vector of integer type. For vector
-  /// instructions individual vector elements are not distinguished: A bit is
-  /// demanded if it is demanded for any of the vector elements. The size of
-  /// the return value corresponds to the scalar size in bits.
   APInt getDemandedBits(Instruction *I);
 
   /// Return true if, during analysis, I could not be reached.
diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index ea5a509eeb8..6bef77176cb 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -39,7 +39,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/Pass.h"
@@ -51,7 +50,6 @@
 #include <cstdint>
 
 using namespace llvm;
-using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "demanded-bits"
 
@@ -145,17 +143,17 @@ void DemandedBits::determineLiveOperandBits(
         }
         break;
       case Intrinsic::fshl:
-      case Intrinsic::fshr: {
-        const APInt *SA;
+      case Intrinsic::fshr:
         if (OperandNo == 2) {
           // Shift amount is modulo the bitwidth. For powers of two we have
           // SA % BW == SA & (BW - 1).
           if (isPowerOf2_32(BitWidth))
             AB = BitWidth - 1;
-        } else if (match(II->getOperand(2), m_APInt(SA))) {
+        } else if (auto *SA = dyn_cast<ConstantInt>(II->getOperand(2))) {
+          // TODO: Support vectors.
           // Normalize to funnel shift left. APInt shifts of BitWidth are well-
           // defined, so no need to special-case zero shifts here.
-          uint64_t ShiftAmt = SA->urem(BitWidth);
+          uint64_t ShiftAmt = SA->getValue().urem(BitWidth);
           if (II->getIntrinsicID() == Intrinsic::fshr)
             ShiftAmt = BitWidth - ShiftAmt;
 
@@ -166,7 +164,6 @@ void DemandedBits::determineLiveOperandBits(
         }
         break;
       }
-      }
     break;
   case Instruction::Add:
   case Instruction::Sub:
@@ -177,9 +174,8 @@ void DemandedBits::determineLiveOperandBits(
     AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
     break;
   case Instruction::Shl:
-    if (OperandNo == 0) {
-      const APInt *ShiftAmtC;
-      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
+    if (OperandNo == 0)
+      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.lshr(ShiftAmt);
 
@@ -191,12 +187,10 @@ void DemandedBits::determineLiveOperandBits(
         else if (S->hasNoUnsignedWrap())
           AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
       }
-    }
     break;
   case Instruction::LShr:
-    if (OperandNo == 0) {
-      const APInt *ShiftAmtC;
-      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
+    if (OperandNo == 0)
+      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
 
@@ -205,12 +199,10 @@ void DemandedBits::determineLiveOperandBits(
         if (cast<LShrOperator>(UserI)->isExact())
           AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
       }
-    }
     break;
   case Instruction::AShr:
-    if (OperandNo == 0) {
-      const APInt *ShiftAmtC;
-      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
+    if (OperandNo == 0)
+      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
         // Because the high input bit is replicated into the
@@ -225,7 +217,6 @@ void DemandedBits::determineLiveOperandBits(
         if (cast<AShrOperator>(UserI)->isExact())
           AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
       }
-    }
     break;
   case Instruction::And:
     AB = AOut;
@@ -283,15 +274,6 @@ void DemandedBits::determineLiveOperandBits(
     if (OperandNo != 0)
       AB = AOut;
     break;
-  case Instruction::ExtractElement:
-    if (OperandNo == 0)
-      AB = AOut;
-    break;
-  case Instruction::InsertElement:
-  case Instruction::ShuffleVector:
-    if (OperandNo == 0 || OperandNo == 1)
-      AB = AOut;
-    break;
   }
 }
 
@@ -327,9 +309,8 @@ void DemandedBits::performAnalysis() {
     // bits and add the instruction to the work list. For other instructions
     // add their operands to the work list (for integer values operands, mark
     // all bits as live).
-    Type *T = I.getType();
-    if (T->isIntOrIntVectorTy()) {
-      if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
+    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+      if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
         Worklist.push_back(&I);
 
       continue;
@@ -338,9 +319,8 @@ void DemandedBits::performAnalysis() {
     // Non-integer-typed instructions...
     for (Use &OI : I.operands()) {
       if (Instruction *J = dyn_cast<Instruction>(OI)) {
-        Type *T = J->getType();
-        if (T->isIntOrIntVectorTy())
-          AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
         Worklist.push_back(J);
       }
     }
@@ -356,13 +336,13 @@ void DemandedBits::performAnalysis() {
 
     LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
     APInt AOut;
-    if (UserI->getType()->isIntOrIntVectorTy()) {
+    if (UserI->getType()->isIntegerTy()) {
       AOut = AliveBits[UserI];
       LLVM_DEBUG(dbgs() << " Alive Out: " << AOut);
     }
     LLVM_DEBUG(dbgs() << "\n");
 
-    if (!UserI->getType()->isIntOrIntVectorTy())
+    if (!UserI->getType()->isIntegerTy())
       Visited.insert(UserI);
 
     KnownBits Known, Known2;
@@ -371,11 +351,10 @@ void DemandedBits::performAnalysis() {
     // operand is added to the work-list.
     for (Use &OI : UserI->operands()) {
       if (Instruction *I = dyn_cast<Instruction>(OI)) {
-        Type *T = I->getType();
-        if (T->isIntOrIntVectorTy()) {
-          unsigned BitWidth = T->getScalarSizeInBits();
+        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+          unsigned BitWidth = IT->getBitWidth();
           APInt AB = APInt::getAllOnesValue(BitWidth);
-          if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
+          if (UserI->getType()->isIntegerTy() && !AOut &&
               !isAlwaysLive(UserI)) {
             AB = APInt(BitWidth, 0);
           } else {
@@ -408,14 +387,13 @@ void DemandedBits::performAnalysis() {
 }
 
 APInt DemandedBits::getDemandedBits(Instruction *I) {
-  assert(I->getType()->isIntOrIntVectorTy() &&
-         "Not an integer or vector of integer instruction");
-
   performAnalysis();
+
+  const DataLayout &DL = I->getModule()->getDataLayout();
   auto Found = AliveBits.find(I);
   if (Found != AliveBits.end())
     return Found->second;
-  return APInt::getAllOnesValue(I->getType()->getScalarSizeInBits());
+  return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
 }
 
 bool DemandedBits::isInstructionDead(Instruction *I) {
diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp
index f63182e57c1..3a8ef073cb4 100644
--- a/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -38,8 +38,7 @@ STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
 /// instruction may need to be cleared of assumptions that can no longer be
 /// guaranteed correct.
 static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
-  assert(I->getType()->isIntOrIntVectorTy() &&
-         "Trivializing a non-integer value?");
+  assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?");
 
   // Initialize the worklist with eligible direct users.
   SmallVector<Instruction *, 16> WorkList;
@@ -47,13 +46,13 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
     // If all bits of a user are demanded, then we know that nothing below that
     // in the def-use chain needs to be changed.
     auto *J = dyn_cast<Instruction>(JU);
-    if (J && J->getType()->isIntOrIntVectorTy() &&
+    if (J && J->getType()->isSized() &&
         !DB.getDemandedBits(J).isAllOnesValue())
       WorkList.push_back(J);
 
-    // Note that we need to check for non-int types above before asking for
+    // Note that we need to check for unsized types above before asking for
     // demanded bits. Normally, the only way to reach an instruction with an
-    // non-int type is via an instruction that has side effects (or otherwise
+    // unsized type is via an instruction that has side effects (or otherwise
     // will demand its input bits). However, if we have a readnone function
     // that returns an unsized type (e.g., void), we must avoid asking for the
     // demanded bits of the function call's return value. A void-returning
@@ -79,7 +78,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
       // If all bits of a user are demanded, then we know that nothing below
       // that in the def-use chain needs to be changed.
       auto *K = dyn_cast<Instruction>(KU);
-      if (K && !Visited.count(K) && K->getType()->isIntOrIntVectorTy() &&
+      if (K && !Visited.count(K) && K->getType()->isSized() &&
           !DB.getDemandedBits(K).isAllOnesValue())
         WorkList.push_back(K);
     }
@@ -96,7 +95,7 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
     if (I.mayHaveSideEffects() && I.use_empty())
       continue;
 
-    if (I.getType()->isIntOrIntVectorTy() &&
+    if (I.getType()->isIntegerTy() &&
         !DB.getDemandedBits(&I).getBoolValue()) {
       // For live instructions that have all dead bits, first make them dead by
       // replacing all uses with something else. Then, if they don't need to
diff --git a/llvm/test/Analysis/DemandedBits/vectors.ll b/llvm/test/Analysis/DemandedBits/vectors.ll
deleted file mode 100644
index 36cde05fb7c..00000000000
--- a/llvm/test/Analysis/DemandedBits/vectors.ll
+++ /dev/null
@@ -1,136 +0,0 @@
-; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
-; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
-
-; CHECK-DAG: DemandedBits: 0xff00 for   %x = or <2 x i32> %a, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xff00 for   %y = or <2 x i32> %b, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xff00 for   %z = or <2 x i32> %x, %y
-; CHECK-DAG: DemandedBits: 0xff for   %u = lshr <2 x i32> %z, <i32 8, i32 8>
-; CHECK-DAG: DemandedBits: 0xff for   %r = trunc <2 x i32> %u to <2 x i8>
-define <2 x i8> @test_basic(<2 x i32> %a, <2 x i32> %b) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %y = or <2 x i32> %b, zeroinitializer
-  %z = or <2 x i32> %x, %y
-  %u = lshr <2 x i32> %z, <i32 8, i32 8>
-  %r = trunc <2 x i32> %u to <2 x i8>
-  ret <2 x i8> %r
-}
-
-; Vector-specific instructions
-
-; CHECK-DAG: DemandedBits: 0xff for   %x = or <2 x i32> %a, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xf0 for   %z = extractelement <2 x i32> %x, i32 1
-; CHECK-DAG: DemandedBits: 0xf for   %y = extractelement <2 x i32> %x, i32 0
-; CHECK-DAG: DemandedBits: 0xffffffff for   %u = and i32 %y, 15
-; CHECK-DAG: DemandedBits: 0xffffffff for   %v = and i32 %z, 240
-; CHECK-DAG: DemandedBits: 0xffffffff for   %r = or i32 %u, %v
-define i32 @test_extractelement(<2 x i32> %a) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %y = extractelement <2 x i32> %x, i32 0
-  %z = extractelement <2 x i32> %x, i32 1
-  %u = and i32 %y, 15
-  %v = and i32 %z, 240
-  %r = or i32 %u, %v
-  ret i32 %r
-}
-
-; CHECK-DAG: DemandedBits: 0xff for   %x = or i32 %a, 0
-; CHECK-DAG: DemandedBits: 0xff for   %y = or i32 %b, 0
-; CHECK-DAG: DemandedBits: 0xff for   %z = insertelement <2 x i32> undef, i32 %x, i32 0
-; CHECK-DAG: DemandedBits: 0xff for   %u = insertelement <2 x i32> %z, i32 %y, i32 1
-; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %u, <i32 255, i32 127>
-define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
-  %x = or i32 %a, 0
-  %y = or i32 %b, 0
-  %z = insertelement <2 x i32> undef, i32 %x, i32 0
-  %u = insertelement <2 x i32> %z, i32 %y, i32 1
-  %r = and <2 x i32> %u, <i32 255, i32 127>
-  ret <2 x i32> %r
-}
-
-; CHECK-DAG: DemandedBits: 0xff for   %x = or <2 x i32> %a, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xff for   %y = or <2 x i32> %b, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xff for   %z = shufflevector <2 x i32> %x, <2 x i32> %y, <3 x i32> <i32 0, i32 3, i32 1>
-; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <3 x i32> %z, <i32 255, i32 127, i32 0>
-define <3 x i32> @test_shufflevector(<2 x i32> %a, <2 x i32> %b) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %y = or <2 x i32> %b, zeroinitializer
-  %z = shufflevector <2 x i32> %x, <2 x i32> %y, <3 x i32> <i32 0, i32 3, i32 1>
-  %r = and <3 x i32> %z, <i32 255, i32 127, i32 0>
-  ret <3 x i32> %r
-}
-
-; Shifts with splat shift amounts
-
-; CHECK-DAG: DemandedBits: 0xf for   %x = or <2 x i32> %a, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xf0 for   %y = shl <2 x i32> %x, <i32 4, i32 4>
-; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %y, <i32 240, i32 240>
-define <2 x i32> @test_shl(<2 x i32> %a) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %y = shl <2 x i32> %x, <i32 4, i32 4>
-  %r = and <2 x i32> %y, <i32 240, i32 240>
-  ret <2 x i32> %r
-}
-
-; CHECK-DAG: DemandedBits: 0xf00 for   %x = or <2 x i32> %a, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xf0 for   %y = ashr <2 x i32> %x, <i32 4, i32 4>
-; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %y, <i32 240, i32 240>
-define <2 x i32> @test_ashr(<2 x i32> %a) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %y = ashr <2 x i32> %x, <i32 4, i32 4>
-  %r = and <2 x i32> %y, <i32 240, i32 240>
-  ret <2 x i32> %r
-}
-
-; CHECK-DAG: DemandedBits: 0xf00 for   %x = or <2 x i32> %a, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xf0 for   %y = lshr <2 x i32> %x, <i32 4, i32 4>
-; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %y, <i32 240, i32 240>
-define <2 x i32> @test_lshr(<2 x i32> %a) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %y = lshr <2 x i32> %x, <i32 4, i32 4>
-  %r = and <2 x i32> %y, <i32 240, i32 240>
-  ret <2 x i32> %r
-}
-
-declare <2 x i32> @llvm.fshl.i32(<2 x i32>, <2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.fshr.i32(<2 x i32>, <2 x i32>, <2 x i32>)
-
-; CHECK-DAG: DemandedBits: 0xf for   %x = or <2 x i32> %a, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xf0000000 for   %y = or <2 x i32> %b, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xff for   %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 4, i32 4>)
-; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %z, <i32 255, i32 255>
-define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %y = or <2 x i32> %b, zeroinitializer
-  %z = call <2 x i32> @llvm.fshl.i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 4, i32 4>)
-  %r = and <2 x i32> %z, <i32 255, i32 255>
-  ret <2 x i32> %r
-}
-
-; CHECK-DAG: DemandedBits: 0xf for   %x = or <2 x i32> %a, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xf0000000 for   %y = or <2 x i32> %b, zeroinitializer
-; CHECK-DAG: DemandedBits: 0xff for   %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 28, i32 28>)
-; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %z, <i32 255, i32 255>
-define <2 x i32> @test_fshr(<2 x i32> %a, <2 x i32> %b) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %y = or <2 x i32> %b, zeroinitializer
-  %z = call <2 x i32> @llvm.fshr.i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 28, i32 28>)
-  %r = and <2 x i32> %z, <i32 255, i32 255>
-  ret <2 x i32> %r
-}
-
-; FP / Int conversion. These have different input / output types.
-
-; CHECK-DAG: DemandedBits: 0xffffffff for   %x = or <2 x i32> %a, zeroinitializer
-define <2 x float> @test_uitofp(<2 x i32> %a) {
-  %x = or <2 x i32> %a, zeroinitializer
-  %r = uitofp <2 x i32> %x to <2 x float>
-  ret <2 x float> %r
-}
-
-; CHECK-DAG: DemandedBits: 0xffffffff for   %y = fptoui <2 x float> %x to <2 x i32>
-define <2 x i32> @test_fptoui(<2 x float> %a) {
-  %x = fadd <2 x float> %a, <float 1.0, float 1.0>
-  %y = fptoui <2 x float> %x to <2 x i32>
-  %r = and <2 x i32> %y, <i32 255, i32 255>
-  ret <2 x i32> %y
-}
diff --git a/llvm/test/Transforms/BDCE/vectors.ll b/llvm/test/Transforms/BDCE/vectors.ll
index fde22fd36b7..d5ac1504439 100644
--- a/llvm/test/Transforms/BDCE/vectors.ll
+++ b/llvm/test/Transforms/BDCE/vectors.ll
@@ -7,9 +7,12 @@
 
 define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test_basic(
+; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> [[A2]], <i32 4, i32 4>
 ; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
 ; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
-; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[B3]], <i32 3, i32 3>
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
+; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[C]], <i32 3, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
 ; CHECK-IO-LABEL: @test_basic(
@@ -33,9 +36,12 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
 ; Going vector -> scalar
 define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test_extractelement(
+; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> [[A2]], <i32 4, i32 4>
 ; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
 ; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
-; CHECK-NEXT:    [[D:%.*]] = extractelement <2 x i32> [[B3]], i32 0
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
+; CHECK-NEXT:    [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0
 ; CHECK-NEXT:    [[E:%.*]] = ashr i32 [[D]], 3
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -62,10 +68,14 @@ define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
 ; Going scalar -> vector
 define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test_insertelement(
-; CHECK-NEXT:    [[Y:%.*]] = insertelement <2 x i32> undef, i32 [[B:%.*]], i32 0
-; CHECK-NEXT:    [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1
+; CHECK-NEXT:    [[X:%.*]] = insertelement <2 x i32> undef, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[X2:%.*]] = insertelement <2 x i32> [[X]], i32 [[B:%.*]], i32 1
+; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> [[X2]], <i32 4, i32 4>
+; CHECK-NEXT:    [[Y:%.*]] = insertelement <2 x i32> undef, i32 [[B]], i32 0
+; CHECK-NEXT:    [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A]], i32 1
 ; CHECK-NEXT:    [[Y3:%.*]] = and <2 x i32> [[Y2]], <i32 8, i32 8>
-; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Y3]], <i32 3, i32 3>
+; CHECK-NEXT:    [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]]
+; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], <i32 3, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[U]]
 ;
 ; CHECK-IO-LABEL: @test_insertelement(
@@ -122,8 +132,10 @@ define <2 x i32> @test_conversion(<2 x i32> %a) {
 ; Assumption invalidation (adapted from invalidate-assumptions.ll)
 define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) {
 ; CHECK-LABEL: @test_assumption_invalidation(
+; CHECK-NEXT:    [[SETBIT:%.*]] = or <2 x i8> [[X:%.*]], <i8 64, i8 64>
 ; CHECK-NEXT:    [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> zeroinitializer, [[LITTLE_NUMBER]]
+; CHECK-NEXT:    [[BIG_NUMBER:%.*]] = shl <2 x i8> [[SETBIT]], <i8 1, i8 1>
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]]
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1>
 ; CHECK-NEXT:    ret <2 x i1> [[TRUNC]]
 ;