Reapply "[DemandedBits][BDCE] Support vectors of integers"

DemandedBits and BDCE currently only support scalar integers. This patch extends them to also handle vector integer operations. In this case bits are not tracked for individual vector elements, instead a bit is demanded if it is demanded for any of the elements. This matches the behavior of computeKnownBits in ValueTracking and SimplifyDemandedBits in InstCombine. Unlike the previous iteration of this patch, getDemandedBits() can now again be called on arbirary (sized) instructions, even if they don't have integer or vector of integer type. (For vector types the size of the returned mask will now be the scalar size in bits though.) The added LoopVectorize test case shows a case which triggered an assertion failure with the previous attempt, because getDemandedBits() was called on a pointer-typed instruction. Differential Revision: https://reviews.llvm.org/D55297 llvm-svn: 348602
author: Nikita Popov <nikita.ppv@gmail.com> 2018-12-07 15:38:13 +0000
committer: Nikita Popov <nikita.ppv@gmail.com> 2018-12-07 15:38:13 +0000
commit: 110cf05203caccd52373707044f4c37a785e85e3 (patch)
tree: 2b724112f5af36e49f9562be1459fae165cf0957 /llvm/lib
parent: b297379ef07829ac7f06c0e2058a889366c46a82 (diff)
download: bcm5719-llvm-110cf05203caccd52373707044f4c37a785e85e3.tar.gz
bcm5719-llvm-110cf05203caccd52373707044f4c37a785e85e3.zip
2 files changed, 51 insertions, 27 deletions
diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index 6bef77176cb..0382787fbef 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -39,6 +39,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/Pass.h"
@@ -50,6 +51,7 @@
 #include <cstdint>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "demanded-bits"
 
@@ -143,17 +145,17 @@ void DemandedBits::determineLiveOperandBits(
         }
         break;
       case Intrinsic::fshl:
-      case Intrinsic::fshr:
+      case Intrinsic::fshr: {
+        const APInt *SA;
         if (OperandNo == 2) {
           // Shift amount is modulo the bitwidth. For powers of two we have
           // SA % BW == SA & (BW - 1).
           if (isPowerOf2_32(BitWidth))
             AB = BitWidth - 1;
-        } else if (auto *SA = dyn_cast<ConstantInt>(II->getOperand(2))) {
-          // TODO: Support vectors.
+        } else if (match(II->getOperand(2), m_APInt(SA))) {
           // Normalize to funnel shift left. APInt shifts of BitWidth are well-
           // defined, so no need to special-case zero shifts here.
-          uint64_t ShiftAmt = SA->getValue().urem(BitWidth);
+          uint64_t ShiftAmt = SA->urem(BitWidth);
           if (II->getIntrinsicID() == Intrinsic::fshr)
             ShiftAmt = BitWidth - ShiftAmt;
 
@@ -164,6 +166,7 @@ void DemandedBits::determineLiveOperandBits(
         }
         break;
       }
+      }
     break;
   case Instruction::Add:
   case Instruction::Sub:
@@ -174,8 +177,9 @@ void DemandedBits::determineLiveOperandBits(
     AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
     break;
   case Instruction::Shl:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.lshr(ShiftAmt);
 
@@ -187,10 +191,12 @@ void DemandedBits::determineLiveOperandBits(
         else if (S->hasNoUnsignedWrap())
           AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::LShr:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
 
@@ -199,10 +205,12 @@ void DemandedBits::determineLiveOperandBits(
         if (cast<LShrOperator>(UserI)->isExact())
           AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::AShr:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
         // Because the high input bit is replicated into the
@@ -217,6 +225,7 @@ void DemandedBits::determineLiveOperandBits(
         if (cast<AShrOperator>(UserI)->isExact())
           AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::And:
     AB = AOut;
@@ -274,6 +283,15 @@ void DemandedBits::determineLiveOperandBits(
     if (OperandNo != 0)
       AB = AOut;
     break;
+  case Instruction::ExtractElement:
+    if (OperandNo == 0)
+      AB = AOut;
+    break;
+  case Instruction::InsertElement:
+  case Instruction::ShuffleVector:
+    if (OperandNo == 0 || OperandNo == 1)
+      AB = AOut;
+    break;
   }
 }
 
@@ -309,8 +327,9 @@ void DemandedBits::performAnalysis() {
     // bits and add the instruction to the work list. For other instructions
     // add their operands to the work list (for integer values operands, mark
     // all bits as live).
-    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-      if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
+    Type *T = I.getType();
+    if (T->isIntOrIntVectorTy()) {
+      if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
         Worklist.push_back(&I);
 
       continue;
@@ -319,8 +338,9 @@ void DemandedBits::performAnalysis() {
     // Non-integer-typed instructions...
     for (Use &OI : I.operands()) {
       if (Instruction *J = dyn_cast<Instruction>(OI)) {
-        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
-          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+        Type *T = J->getType();
+        if (T->isIntOrIntVectorTy())
+          AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
         Worklist.push_back(J);
       }
     }
@@ -336,13 +356,13 @@ void DemandedBits::performAnalysis() {
 
     LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
     APInt AOut;
-    if (UserI->getType()->isIntegerTy()) {
+    if (UserI->getType()->isIntOrIntVectorTy()) {
       AOut = AliveBits[UserI];
       LLVM_DEBUG(dbgs() << " Alive Out: " << AOut);
     }
     LLVM_DEBUG(dbgs() << "\n");
 
-    if (!UserI->getType()->isIntegerTy())
+    if (!UserI->getType()->isIntOrIntVectorTy())
       Visited.insert(UserI);
 
     KnownBits Known, Known2;
@@ -351,10 +371,11 @@ void DemandedBits::performAnalysis() {
     // operand is added to the work-list.
     for (Use &OI : UserI->operands()) {
       if (Instruction *I = dyn_cast<Instruction>(OI)) {
-        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
-          unsigned BitWidth = IT->getBitWidth();
+        Type *T = I->getType();
+        if (T->isIntOrIntVectorTy()) {
+          unsigned BitWidth = T->getScalarSizeInBits();
           APInt AB = APInt::getAllOnesValue(BitWidth);
-          if (UserI->getType()->isIntegerTy() && !AOut &&
+          if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
               !isAlwaysLive(UserI)) {
             AB = APInt(BitWidth, 0);
           } else {
@@ -389,11 +410,13 @@ void DemandedBits::performAnalysis() {
 APInt DemandedBits::getDemandedBits(Instruction *I) {
   performAnalysis();
 
-  const DataLayout &DL = I->getModule()->getDataLayout();
   auto Found = AliveBits.find(I);
   if (Found != AliveBits.end())
     return Found->second;
-  return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+
+  const DataLayout &DL = I->getModule()->getDataLayout();
+  return APInt::getAllOnesValue(
+      DL.getTypeSizeInBits(I->getType()->getScalarType()));
 }
 
 bool DemandedBits::isInstructionDead(Instruction *I) {
diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp
index 3a8ef073cb4..f63182e57c1 100644
--- a/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -38,7 +38,8 @@ STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
 /// instruction may need to be cleared of assumptions that can no longer be
 /// guaranteed correct.
 static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
-  assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?");
+  assert(I->getType()->isIntOrIntVectorTy() &&
+         "Trivializing a non-integer value?");
 
   // Initialize the worklist with eligible direct users.
   SmallVector<Instruction *, 16> WorkList;
@@ -46,13 +47,13 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
     // If all bits of a user are demanded, then we know that nothing below that
     // in the def-use chain needs to be changed.
     auto *J = dyn_cast<Instruction>(JU);
-    if (J && J->getType()->isSized() &&
+    if (J && J->getType()->isIntOrIntVectorTy() &&
         !DB.getDemandedBits(J).isAllOnesValue())
       WorkList.push_back(J);
 
-    // Note that we need to check for unsized types above before asking for
+    // Note that we need to check for non-int types above before asking for
     // demanded bits. Normally, the only way to reach an instruction with an
-    // unsized type is via an instruction that has side effects (or otherwise
+    // non-int type is via an instruction that has side effects (or otherwise
     // will demand its input bits). However, if we have a readnone function
     // that returns an unsized type (e.g., void), we must avoid asking for the
     // demanded bits of the function call's return value. A void-returning
@@ -78,7 +79,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
       // If all bits of a user are demanded, then we know that nothing below
       // that in the def-use chain needs to be changed.
       auto *K = dyn_cast<Instruction>(KU);
-      if (K && !Visited.count(K) && K->getType()->isSized() &&
+      if (K && !Visited.count(K) && K->getType()->isIntOrIntVectorTy() &&
           !DB.getDemandedBits(K).isAllOnesValue())
         WorkList.push_back(K);
     }
@@ -95,7 +96,7 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
     if (I.mayHaveSideEffects() && I.use_empty())
       continue;
 
-    if (I.getType()->isIntegerTy() &&
+    if (I.getType()->isIntOrIntVectorTy() &&
         !DB.getDemandedBits(&I).getBoolValue()) {
       // For live instructions that have all dead bits, first make them dead by
       // replacing all uses with something else. Then, if they don't need to
author	Nikita Popov <nikita.ppv@gmail.com>	2018-12-07 15:38:13 +0000
committer	Nikita Popov <nikita.ppv@gmail.com>	2018-12-07 15:38:13 +0000
commit	110cf05203caccd52373707044f4c37a785e85e3 (patch)
tree	2b724112f5af36e49f9562be1459fae165cf0957 /llvm/lib
parent	b297379ef07829ac7f06c0e2058a889366c46a82 (diff)
download	bcm5719-llvm-110cf05203caccd52373707044f4c37a785e85e3.tar.gz bcm5719-llvm-110cf05203caccd52373707044f4c37a785e85e3.zip