diff options
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 96 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VecUtils.cpp | 317 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VecUtils.h | 48 | 
3 files changed, 259 insertions, 202 deletions
| diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 91c04633244..c3cb03764b2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -41,14 +41,14 @@  using namespace llvm;  static cl::opt<int> -SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, -                 cl::desc("Only vectorize trees if the gain is above this " -                          "number. (gain = -cost of vectorization)")); +    SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, +                     cl::desc("Only vectorize trees if the gain is above this " +                              "number. (gain = -cost of vectorization)"));  namespace {  /// The SLPVectorizer Pass.  struct SLPVectorizer : public FunctionPass { -  typedef MapVector<Value*, BoUpSLP::StoreList> StoreListMap; +  typedef MapVector<Value *, BoUpSLP::StoreList> StoreListMap;    /// Pass identification, replacement for typeid    static char ID; @@ -78,7 +78,7 @@ struct SLPVectorizer : public FunctionPass {      if (!DL)        return false; -    DEBUG(dbgs()<<"SLP: Analyzing blocks in " << F.getName() << ".\n"); +    DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");      for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {        BasicBlock *BB = it; @@ -94,7 +94,7 @@ struct SLPVectorizer : public FunctionPass {        // Vectorize trees that end at stores.        if (unsigned count = collectStores(BB, R)) {          (void)count; -        DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n"); +        DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n");          BBChanged |= vectorizeStoreChains(R);        } @@ -108,7 +108,7 @@ struct SLPVectorizer : public FunctionPass {      }      if (Changed) { -      DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n"); +      DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");        DEBUG(verifyFunction(F));      }      return Changed; @@ -131,7 +131,7 @@ private:    unsigned collectStores(BasicBlock *BB, BoUpSLP &R);    /// \brief Try to vectorize a chain that starts at two arithmetic instrs. -  bool tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R); +  bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);    /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true    /// then we calculate the cost of extracting the scalars from the vector. @@ -139,7 +139,7 @@ private:    bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts);    /// \brief Try to vectorize a chain that may start at the operands of \V; -  bool tryToVectorize(BinaryOperator *V,  BoUpSLP &R); +  bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);    /// \brief Vectorize the stores that were collected in StoreRefs.    bool vectorizeStoreChains(BoUpSLP &R); @@ -188,8 +188,9 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {    return count;  } -bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R) { -  if (!A || !B) return false; +bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { +  if (!A || !B) +    return false;    Value *VL[] = { A, B };    return tryToVectorizeList(VL, R, true);  } @@ -199,11 +200,12 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,    if (VL.size() < 2)      return false; -  DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n"); +  DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");    // Check that all of the parts are scalar instructions of the same type.    Instruction *I0 = dyn_cast<Instruction>(VL[0]); -  if (!I0) return 0; +  if (!I0) +    return 0;    unsigned Opcode0 = I0->getOpcode(); @@ -217,17 +219,20 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,    }    int Cost = R.getTreeCost(VL); -  int ExtrCost =  NeedExtracts ? R.getScalarizationCost(VL) : 0; -  DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << -        " Cost of extract:" << ExtrCost << ".\n"); -  if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; -  DEBUG(dbgs()<<"SLP: Vectorizing pair.\n"); +  int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0; +  DEBUG(dbgs() << "SLP: Cost of pair:" << Cost +               << " Cost of extract:" << ExtrCost << ".\n"); +  if ((Cost + ExtrCost) >= -SLPCostThreshold) +    return false; +  DEBUG(dbgs() << "SLP: Vectorizing pair.\n");    R.vectorizeArith(VL);    return true;  } -bool SLPVectorizer::tryToVectorize(BinaryOperator *V,  BoUpSLP &R) { -  if (!V) return false; +bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { +  if (!V) +    return false; +      // Try to vectorize V.    if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))      return true; @@ -267,22 +272,27 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V,  BoUpSLP &R) {  bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {    bool Changed = false;    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { -    if (isa<DbgInfoIntrinsic>(it)) continue; +    if (isa<DbgInfoIntrinsic>(it)) +      continue;      // Try to vectorize reductions that use PHINodes.      if (PHINode *P = dyn_cast<PHINode>(it)) {        // Check that the PHI is a reduction PHI. -      if (P->getNumIncomingValues() != 2) return Changed; -      Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) : -                    (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : -                     0)); +      if (P->getNumIncomingValues() != 2) +        return Changed; +      Value *Rdx = +          (P->getIncomingBlock(0) == BB +               ? (P->getIncomingValue(0)) +               : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0));        // Check if this is a Binary Operator.        BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);        if (!BI)          continue;        Value *Inst = BI->getOperand(0); -      if (Inst == P) Inst = BI->getOperand(1); +      if (Inst == P) +        Inst = BI->getOperand(1); +              Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);        continue;      } @@ -295,7 +305,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {        }        for (int i = 0; i < 2; ++i)          if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) -          Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R); +          Changed |= +              tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);        continue;      }    } @@ -303,7 +314,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {    // Scan the PHINodes in our successors in search for pairing hints.    for (succ_iterator it = succ_begin(BB), e = succ_end(BB); it != e; ++it) {      BasicBlock *Succ = *it; -    SmallVector<Value*, 4> Incoming; +    SmallVector<Value *, 4> Incoming;      // Collect the incoming values from the PHIs.      for (BasicBlock::iterator instr = Succ->begin(), ie = Succ->end(); @@ -322,7 +333,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {      if (Incoming.size() > 1)        Changed |= tryToVectorizeList(Incoming, R, true);    } -   +    return Changed;  } @@ -334,8 +345,8 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {      if (it->second.size() < 2)        continue; -    DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " << -          it->second.size() << ".\n"); +    DEBUG(dbgs() << "SLP: Analyzing a store chain of length " +                 << it->second.size() << ".\n");      Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);    } @@ -343,7 +354,7 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {  }  bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) { -  SmallVector<Value*, 4> Seq; +  SmallVector<Value *, 4> Seq;    bool Changed = false;    for (int i = 0, e = Gathers.size(); i < e; ++i) {      InsertElementInst *IEI = dyn_cast_or_null<InsertElementInst>(Gathers[i]); @@ -359,13 +370,13 @@ bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {        Instruction *I = cast<Instruction>(Seq[0]);        BasicBlock *BB = I->getParent(); -      DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() << -            " in " << BB->getName() << ".\n"); +      DEBUG(dbgs() << "SLP: Inspecting a gather list of size " << Seq.size() +                   << " in " << BB->getName() << ".\n");        // Check if the gathered values have multiple uses. If they only have one        // user then we know that the insert/extract pair will go away.        bool HasMultipleUsers = false; -      for (int i=0; e = Seq.size(), i < e; ++i) { +      for (int i = 0; e = Seq.size(), i < e; ++i) {          if (!Seq[i]->hasOneUse()) {            HasMultipleUsers = true;            break; @@ -375,8 +386,8 @@ bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {        BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));        if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) { -        DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() << -              " in " << BB->getName() << ".\n"); +        DEBUG(dbgs() << "SLP: Vectorized a gather list of len " << Seq.size() +                     << " in " << BB->getName() << ".\n");          Changed = true;        } @@ -418,8 +429,10 @@ void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,      // hoist this instruction.      Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));      Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1)); -    if (CurrVec && L->contains(CurrVec)) continue; -    if (NewElem && L->contains(NewElem)) continue; +    if (CurrVec && L->contains(CurrVec)) +      continue; +    if (NewElem && L->contains(NewElem)) +      continue;      // We can hoist this instruction. Move it to the pre-header.      Insert->moveBefore(Location); @@ -438,8 +451,5 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)  INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)  namespace llvm { -  Pass *createSLPVectorizerPass() { -    return new SLPVectorizer(); -  } +Pass *createSLPVectorizerPass() { return new SLPVectorizer(); }  } - diff --git a/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/llvm/lib/Transforms/Vectorize/VecUtils.cpp index 88c457d1176..16189860420 100644 --- a/llvm/lib/Transforms/Vectorize/VecUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VecUtils.cpp @@ -45,8 +45,8 @@ static const unsigned RecursionMaxDepth = 6;  namespace llvm {  BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, -                 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) : -  Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) { +                 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) +    : Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {    numberInstructions();  } @@ -55,7 +55,7 @@ void BoUpSLP::numberInstructions() {    InstrIdx.clear();    InstrVec.clear();    // Number the instructions in the block. -  for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { +  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {      InstrIdx[it] = Loc++;      InstrVec.push_back(it);      assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); @@ -63,14 +63,18 @@ void BoUpSLP::numberInstructions() {  }  Value *BoUpSLP::getPointerOperand(Value *I) { -  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); -  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); +  if (LoadInst *LI = dyn_cast<LoadInst>(I)) +    return LI->getPointerOperand(); +  if (StoreInst *SI = dyn_cast<StoreInst>(I)) +    return SI->getPointerOperand();    return 0;  }  unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { -  if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); -  if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace(); +  if (LoadInst *L = dyn_cast<LoadInst>(I)) +    return L->getPointerAddressSpace(); +  if (StoreInst *S = dyn_cast<StoreInst>(I)) +    return S->getPointerAddressSpace();    return -1;  } @@ -81,10 +85,12 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {    unsigned ASB = getAddressSpaceOperand(B);    // Check that the address spaces match and that the pointers are valid. -  if (!PtrA || !PtrB || (ASA != ASB)) return false; +  if (!PtrA || !PtrB || (ASA != ASB)) +    return false;    // Check that A and B are of the same type. -  if (PtrA->getType() != PtrB->getType()) return false; +  if (PtrA->getType() != PtrB->getType()) +    return false;    // Calculate the distance.    const SCEV *PtrSCEVA = SE->getSCEV(PtrA); @@ -93,7 +99,8 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {    const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);    // Non constant distance. -  if (!ConstOffSCEV) return false; +  if (!ConstOffSCEV) +    return false;    int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();    Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); @@ -105,25 +112,29 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {  bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {    unsigned ChainLen = Chain.size(); -    DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<ChainLen<< "\n"); +  DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen +               << "\n");    Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();    unsigned Sz = DL->getTypeSizeInBits(StoreTy);    unsigned VF = MinVecRegSize / Sz; -  if (!isPowerOf2_32(Sz) || VF < 2) return false; +  if (!isPowerOf2_32(Sz) || VF < 2) +    return false;    bool Changed = false;    // Look for profitable vectorizable trees at all offsets, starting at zero.    for (unsigned i = 0, e = ChainLen; i < e; ++i) { -    if (i + VF > e) break; -    DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); +    if (i + VF > e) +      break; +    DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i +                 << "\n");      ArrayRef<Value *> Operands = Chain.slice(i, VF);      int Cost = getTreeCost(Operands);      DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");      if (Cost < CostThreshold) {        DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); -      Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF))); +      Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands, VF)));        vectorizeTree(Operands, VF);        i += VF - 1;        Changed = true; @@ -135,8 +146,8 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {    int Cost = getTreeCost(Chain);    if (Cost < CostThreshold) { -    DEBUG(dbgs() << "SLP: Found store chain cost = "<< Cost <<" for size = " << -          ChainLen << "\n"); +    DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost +                 << " for size = " << ChainLen << "\n");      Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Chain, ChainLen)));      vectorizeTree(Chain, ChainLen);      return true; @@ -146,8 +157,8 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {  }  bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) { -  SetVector<Value*> Heads, Tails; -  SmallDenseMap<Value*, Value*> ConsecutiveChain; +  SetVector<Value *> Heads, Tails; +  SmallDenseMap<Value *, Value *> ConsecutiveChain;    // We may run into multiple chains that merge into a single chain. We mark the    // stores that we vectorized so that we don't visit the same store twice. @@ -158,7 +169,9 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {    // all of the pairs of loads that follow each other.    for (unsigned i = 0, e = Stores.size(); i < e; ++i)      for (unsigned j = 0; j < e; ++j) { -      if (i == j) continue; +      if (i == j) +        continue; +              if (isConsecutiveAccess(Stores[i], Stores[j])) {          Tails.insert(Stores[j]);          Heads.insert(Stores[i]); @@ -167,9 +180,10 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {      }    // For stores that start but don't end a link in the chain: -  for (SetVector<Value*>::iterator it = Heads.begin(), e = Heads.end(); +  for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();         it != e; ++it) { -    if (Tails.count(*it)) continue; +    if (Tails.count(*it)) +      continue;      // We found a store instr that starts a chain. Now follow the chain and try      // to vectorize it. @@ -177,7 +191,8 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {      Value *I = *it;      // Collect the chain into a list.      while (Tails.count(I) || Heads.count(I)) { -      if (VectorizedStores.count(I)) break; +      if (VectorizedStores.count(I)) +        break;        Operands.push_back(I);        // Move to the next value in the chain.        I = ConsecutiveChain[I]; @@ -212,8 +227,10 @@ int BoUpSLP::getScalarizationCost(Type *Ty) {  }  AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { -  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI); -  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI); +  if (StoreInst *SI = dyn_cast<StoreInst>(I)) +    return AA->getLocation(SI); +  if (LoadInst *LI = dyn_cast<LoadInst>(I)) +    return AA->getLocation(LI);    return AliasAnalysis::Location();  } @@ -224,11 +241,14 @@ Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {    /// the source may alias.    for (++I; I != E; ++I) {      // Ignore store instructions that are marked as 'ignore'. -    if (MemBarrierIgnoreList.count(I)) continue; +    if (MemBarrierIgnoreList.count(I)) +      continue;      if (Src->mayWriteToMemory()) /* Write */ { -      if (!I->mayReadOrWriteMemory()) continue; +      if (!I->mayReadOrWriteMemory()) +        continue;      } else /* Read */ { -      if (!I->mayWriteToMemory()) continue; +      if (!I->mayWriteToMemory()) +        continue;      }      AliasAnalysis::Location A = getLocation(&*I);      AliasAnalysis::Location B = getLocation(Src); @@ -244,7 +264,7 @@ Value *BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {    Instruction *Loc = getInsertionPoint(LastIdx);    Builder.SetInsertPoint(Loc); -  assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx  && +  assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&           "Vectorizing with in-tree users");    Value *Vec = vectorizeTree(Operands, Operands.size()); @@ -283,15 +303,16 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {    // Check that instructions with multiple users can be vectorized. Mark unsafe    // instructions. -  for (SetVector<Value*>::iterator it = MultiUserVals.begin(), -       e = MultiUserVals.end(); it != e; ++it) { +  for (SetVector<Value *>::iterator it = MultiUserVals.begin(), +                                    e = MultiUserVals.end(); +       it != e; ++it) {      // Check that all of the users of this instr are within the tree      // and that they are all from the same lane.      int Lane = -1;      for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();           I != E; ++I) {        if (LaneMap.find(*I) == LaneMap.end()) { -        DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n"); +        DEBUG(dbgs() << "SLP: Instr " << **it << " has multiple users.\n");          // We don't have an ordering problem if the user is not in this basic          // block. @@ -305,23 +326,23 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {          int Idx = InstrIdx[Inst];          if (Idx < LastRootIndex) {            MustScalarize.insert(*it); -          DEBUG(dbgs()<<"SLP: Adding to MustScalarize " -                "because of an unsafe out of tree usage.\n"); +          DEBUG(dbgs() << "SLP: Adding to MustScalarize " +                          "because of an unsafe out of tree usage.\n");            break;          } - -        DEBUG(dbgs()<<"SLP: Adding to MustExtract " -              "because of a safe out of tree usage.\n"); +        DEBUG(dbgs() << "SLP: Adding to MustExtract " +                        "because of a safe out of tree usage.\n");          MustExtract.insert(*it);          continue;        } -      if (Lane == -1) Lane = LaneMap[*I]; +      if (Lane == -1) +        Lane = LaneMap[*I];        if (Lane != LaneMap[*I]) {          MustScalarize.insert(*it); -        DEBUG(dbgs()<<"SLP: Adding " << **it << -              " to MustScalarize because multiple lane use it: " -              << Lane << " and " << LaneMap[*I] << ".\n"); +        DEBUG(dbgs() << "SLP: Adding " << **it +                     << " to MustScalarize because multiple lane use it: " +                     << Lane << " and " << LaneMap[*I] << ".\n");          break;        }      } @@ -360,12 +381,16 @@ static bool CanReuseExtract(ArrayRef<Value *> VL, unsigned VF,  }  void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) { -  if (Depth == RecursionMaxDepth) return; +  if (Depth == RecursionMaxDepth) +    return;    // Don't handle vectors. -  if (VL[0]->getType()->isVectorTy()) return; +  if (VL[0]->getType()->isVectorTy()) +    return; +      if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) -    if (SI->getValueOperand()->getType()->isVectorTy()) return; +    if (SI->getValueOperand()->getType()->isVectorTy()) +      return;    // Check if all of the operands are constants.    bool AllConst = true; @@ -375,27 +400,32 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {      AllSameScalar &= (VL[0] == VL[i]);      Instruction *I = dyn_cast<Instruction>(VL[i]);      // If one of the instructions is out of this BB, we need to scalarize all. -    if (I && I->getParent() != BB) return; +    if (I && I->getParent() != BB) +      return;    }    // If all of the operands are identical or constant we have a simple solution. -  if (AllConst || AllSameScalar) return; +  if (AllConst || AllSameScalar) +    return;    // Scalarize unknown structures.    Instruction *VL0 = dyn_cast<Instruction>(VL[0]); -  if (!VL0) return; +  if (!VL0) +    return;    unsigned Opcode = VL0->getOpcode();    for (unsigned i = 0, e = VL.size(); i < e; ++i) {      Instruction *I = dyn_cast<Instruction>(VL[i]);      // If not all of the instructions are identical then we have to scalarize. -    if (!I || Opcode != I->getOpcode()) return; +    if (!I || Opcode != I->getOpcode()) +      return;    }    for (int i = 0, e = VL.size(); i < e; ++i) {      // Check that the instruction is only used within      // one lane. -    if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; +    if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) +      return;      // Make this instruction as 'seen' and remember the lane.      LaneMap[VL[i]] = i;    } @@ -407,70 +437,71 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {      // within our tree. At depth zero we have no local users, only external      // users that we don't care about.      if (Depth && I && I->getNumUses() > 1) { -      DEBUG(dbgs()<<"SLP: Adding to MultiUserVals " -            "because it has multiple users:" << *I << " \n"); +      DEBUG(dbgs() << "SLP: Adding to MultiUserVals " +                      "because it has multiple users:" << *I << " \n");        MultiUserVals.insert(I);      }    }    switch (Opcode) { -    case Instruction::ExtractElement: { -      VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size()); -      // No need to follow ExtractElements that are going to be optimized away. -      if (CanReuseExtract(VL, VL.size(), VecTy)) return; -      // Fall through. -    } -    case Instruction::ZExt: -    case Instruction::SExt: -    case Instruction::FPToUI: -    case Instruction::FPToSI: -    case Instruction::FPExt: -    case Instruction::PtrToInt: -    case Instruction::IntToPtr: -    case Instruction::SIToFP: -    case Instruction::UIToFP: -    case Instruction::Trunc: -    case Instruction::FPTrunc: -    case Instruction::BitCast: -    case Instruction::Select: -    case Instruction::ICmp: -    case Instruction::FCmp: -    case Instruction::Add: -    case Instruction::FAdd: -    case Instruction::Sub: -    case Instruction::FSub: -    case Instruction::Mul: -    case Instruction::FMul: -    case Instruction::UDiv: -    case Instruction::SDiv: -    case Instruction::FDiv: -    case Instruction::URem: -    case Instruction::SRem: -    case Instruction::FRem: -    case Instruction::Shl: -    case Instruction::LShr: -    case Instruction::AShr: -    case Instruction::And: -    case Instruction::Or: -    case Instruction::Xor: { -      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { -        ValueList Operands; -        // Prepare the operand vector. -        for (unsigned j = 0; j < VL.size(); ++j) -          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); - -        getTreeUses_rec(Operands, Depth+1); -      } +  case Instruction::ExtractElement: { +    VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size()); +    // No need to follow ExtractElements that are going to be optimized away. +    if (CanReuseExtract(VL, VL.size(), VecTy))        return; -    } -    case Instruction::Store: { +    // Fall through. +  } +  case Instruction::ZExt: +  case Instruction::SExt: +  case Instruction::FPToUI: +  case Instruction::FPToSI: +  case Instruction::FPExt: +  case Instruction::PtrToInt: +  case Instruction::IntToPtr: +  case Instruction::SIToFP: +  case Instruction::UIToFP: +  case Instruction::Trunc: +  case Instruction::FPTrunc: +  case Instruction::BitCast: +  case Instruction::Select: +  case Instruction::ICmp: +  case Instruction::FCmp: +  case Instruction::Add: +  case Instruction::FAdd: +  case Instruction::Sub: +  case Instruction::FSub: +  case Instruction::Mul: +  case Instruction::FMul: +  case Instruction::UDiv: +  case Instruction::SDiv: +  case Instruction::FDiv: +  case Instruction::URem: +  case Instruction::SRem: +  case Instruction::FRem: +  case Instruction::Shl: +  case Instruction::LShr: +  case Instruction::AShr: +  case Instruction::And: +  case Instruction::Or: +  case Instruction::Xor: { +    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {        ValueList Operands; +      // Prepare the operand vector.        for (unsigned j = 0; j < VL.size(); ++j) -        Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); -      getTreeUses_rec(Operands, Depth+1); -      return; +        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); + +      getTreeUses_rec(Operands, Depth + 1);      } -    default: +    return; +  } +  case Instruction::Store: { +    ValueList Operands; +    for (unsigned j = 0; j < VL.size(); ++j) +      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); +    getTreeUses_rec(Operands, Depth + 1); +    return; +  } +  default:      return;    }  } @@ -482,10 +513,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {      ScalarTy = SI->getValueOperand()->getType();    /// Don't mess with vectors. -  if (ScalarTy->isVectorTy()) return max_cost; +  if (ScalarTy->isVectorTy()) +    return max_cost; +      VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); -  if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); +  if (Depth == RecursionMaxDepth) +    return getScalarizationCost(VecTy);    // Check if all of the operands are constants.    bool AllConst = true; @@ -503,7 +537,8 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {    }    // Is this a simple vector constant. -  if (AllConst) return 0; +  if (AllConst) +    return 0;    // If all of the operands are identical we can broadcast them.    Instruction *VL0 = dyn_cast<Instruction>(VL[0]); @@ -523,14 +558,16 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {    if (MustScalarizeFlag)      return getScalarizationCost(VecTy); -  if (!VL0) return getScalarizationCost(VecTy); +  if (!VL0) +    return getScalarizationCost(VecTy);    assert(VL0->getParent() == BB && "Wrong BB");    unsigned Opcode = VL0->getOpcode();    for (unsigned i = 0, e = VL.size(); i < e; ++i) {      Instruction *I = dyn_cast<Instruction>(VL[i]);      // If not all of the instructions are identical then we have to scalarize. -    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); +    if (!I || Opcode != I->getOpcode()) +      return getScalarizationCost(VecTy);    }    // Check if it is safe to sink the loads or the stores. @@ -538,12 +575,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {      int MaxIdx = getLastIndex(VL, VL.size());      Instruction *Last = InstrVec[MaxIdx]; -    for (unsigned i = 0, e = VL.size(); i < e; ++i ) { -      if (VL[i] == Last) continue; +    for (unsigned i = 0, e = VL.size(); i < e; ++i) { +      if (VL[i] == Last) +        continue;        Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);        if (Barrier) { -        DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << -              *Last << "\n because of " << *Barrier << "\n"); +        DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last +                     << "\n because of " << *Barrier << "\n");          return max_cost;        }      } @@ -554,7 +592,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {    for (unsigned i = 0, e = VL.size(); i < e; ++i)      if (MustExtract.count(VL[i]))        ExternalUserExtractCost += -        TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i); +          TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);    switch (Opcode) {    case Instruction::ExtractElement: { @@ -585,8 +623,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {          return getScalarizationCost(VecTy);      } -    Cost += getTreeCost_rec(Operands, Depth+1); -    if (Cost >= max_cost) return max_cost; +    Cost += getTreeCost_rec(Operands, Depth + 1); +    if (Cost >= max_cost) +      return max_cost;      // Calculate the cost of this instruction.      int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), @@ -635,8 +674,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {        for (unsigned j = 0; j < VL.size(); ++j)          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); -      Cost += getTreeCost_rec(Operands, Depth+1); -      if (Cost >= max_cost) return max_cost; +      Cost += getTreeCost_rec(Operands, Depth + 1); +      if (Cost >= max_cost) +        return max_cost;      }      // Calculate the cost of this instruction. @@ -645,12 +685,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {      if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||          Opcode == Instruction::Select) {        VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); -      ScalarCost = VecTy->getNumElements() * -        TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty()); +      ScalarCost = +          VecTy->getNumElements() * +          TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());        VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);      } else {        ScalarCost = VecTy->getNumElements() * -      TTI->getArithmeticInstrCost(Opcode, ScalarTy); +                   TTI->getArithmeticInstrCost(Opcode, ScalarTy);        VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);      }      Cost += (VecCost - ScalarCost); @@ -658,21 +699,21 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {    }    case Instruction::Load: {      // If we are scalarize the loads, add the cost of forming the vector. -    for (unsigned i = 0, e = VL.size()-1; i < e; ++i) -      if (!isConsecutiveAccess(VL[i], VL[i+1])) +    for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) +      if (!isConsecutiveAccess(VL[i], VL[i + 1]))          return getScalarizationCost(VecTy);      // Cost of wide load - cost of scalar loads.      int ScalarLdCost = VecTy->getNumElements() * -      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); +                       TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);      int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);      return VecLdCost - ScalarLdCost + ExternalUserExtractCost;    }    case Instruction::Store: {      // We know that we can merge the stores. Calculate the cost.      int ScalarStCost = VecTy->getNumElements() * -      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); -    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); +                       TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); +    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);      int StoreCost = VecStCost - ScalarStCost;      ValueList Operands; @@ -692,7 +733,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {  int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {    int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; -  for (unsigned i = 0; i < VF; ++i ) +  for (unsigned i = 0; i < VF; ++i)      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);    return MaxIdx;  } @@ -716,7 +757,7 @@ int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {  int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {    assert(I->getParent() == BB && "Invalid parent for instruction I");    assert(J->getParent() == BB && "Invalid parent for instruction J"); -  return std::max(InstrIdx[I],InstrIdx[J]); +  return std::max(InstrIdx[I], InstrIdx[J]);  }  Instruction *BoUpSLP::getInsertionPoint(unsigned Index) { @@ -725,7 +766,7 @@ Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {  Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {    Value *Vec = UndefValue::get(Ty); -  for (unsigned i=0; i < Ty->getNumElements(); ++i) { +  for (unsigned i = 0; i < Ty->getNumElements(); ++i) {      // Generate the 'InsertElement' instruction.      Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));      // Remember that this instruction is used as part of a 'gather' sequence. @@ -748,8 +789,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {    Value *V = vectorizeTree_rec(VL, VF);    int LastInstrIdx = getLastIndex(VL, VL.size()); -  for (SetVector<Value*>::iterator it = MustExtract.begin(), -       e = MustExtract.end(); it != e; ++it) { +  for (SetVector<Value *>::iterator it = MustExtract.begin(), +                                    e = MustExtract.end(); +       it != e; ++it) {      Instruction *I = cast<Instruction>(*it);      // This is a scalarized value, so we can use the original value. @@ -770,7 +812,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {           ++U) {        Instruction *UI = cast<Instruction>(*U);        if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx) -        UI->replaceUsesOfWith(I ,Extract); +        UI->replaceUsesOfWith(I, Extract);        Replaced = true;      }      assert(Replaced && "Must replace at least one outside user"); @@ -814,7 +856,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {      return Scalarize(VL, VecTy);    if (VectorizedValues.count(VL0)) { -    Value * Vec = VectorizedValues[VL0]; +    Value *Vec = VectorizedValues[VL0];      for (int i = 0; i < VF; ++i)        VectorizedValues[VL[i]] = Vec;      return Vec; @@ -886,7 +928,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {        VectorizedValues[VL[i]] = V;      return V; -    }    case Instruction::Select: {      ValueList TrueVec, FalseVec, CondVec; @@ -933,7 +974,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {      Value *LHS = vectorizeTree_rec(LHSVL, VF);      Value *RHS = vectorizeTree_rec(RHSVL, VF);      BinaryOperator *BinOp = cast<BinaryOperator>(VL0); -    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS); +    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);      for (int i = 0; i < VF; ++i)        VectorizedValues[VL[i]] = V; @@ -946,7 +987,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {      // Check if all of the loads are consecutive.      for (unsigned i = 1, e = VF; i < e; ++i) -      if (!isConsecutiveAccess(VL[i-1], VL[i])) +      if (!isConsecutiveAccess(VL[i - 1], VL[i]))          return Scalarize(VL, VecTy);      // Loads are inserted at the head of the tree because we don't want to sink @@ -972,8 +1013,8 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {        ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());      Value *VecValue = vectorizeTree_rec(ValueOp, VF); -    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), -                                          VecTy->getPointerTo()); +    Value *VecPtr = +        Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo());      Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);      for (int i = 0; i < VF; ++i) diff --git a/llvm/lib/Transforms/Vectorize/VecUtils.h b/llvm/lib/Transforms/Vectorize/VecUtils.h index 28a61e3c0dd..c9fe6d23ab6 100644 --- a/llvm/lib/Transforms/Vectorize/VecUtils.h +++ b/llvm/lib/Transforms/Vectorize/VecUtils.h @@ -25,23 +25,29 @@  namespace llvm { -class BasicBlock; class Instruction; class Type; -class VectorType; class StoreInst; class Value; -class ScalarEvolution; class DataLayout; -class TargetTransformInfo; class AliasAnalysis; +class BasicBlock; +class Instruction; +class Type; +class VectorType; +class StoreInst; +class Value; +class ScalarEvolution; +class DataLayout; +class TargetTransformInfo; +class AliasAnalysis;  class Loop;  /// Bottom Up SLP vectorization utility class. -struct BoUpSLP  { -  typedef SmallVector<Value*, 8> ValueList; -  typedef SmallVector<Instruction*, 16> InstrList; -  typedef SmallPtrSet<Value*, 16> ValueSet; -  typedef SmallVector<StoreInst*, 8> StoreList; -  static const int max_cost = 1<<20; +struct BoUpSLP { +  typedef SmallVector<Value *, 8> ValueList; +  typedef SmallVector<Instruction *, 16> InstrList; +  typedef SmallPtrSet<Value *, 16> ValueSet; +  typedef SmallVector<StoreInst *, 8> StoreList; +  static const int max_cost = 1 << 20;    // \brief C'tor.    BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl, -         TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp); +          TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);    /// \brief Take the pointer operand from the Load/Store instruction.    /// \returns NULL if this is not a valid Load/Store instruction. @@ -73,13 +79,13 @@ struct BoUpSLP  {    bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);    /// \brief Vectorize a group of scalars into a vector tree. -  /// \returns the vectorized value.  +  /// \returns the vectorized value.    Value *vectorizeArith(ArrayRef<Value *> Operands);    /// \returns the list of new instructions that were added in order to collect    /// scalars into vectors. This list can be used to further optimize the gather    /// sequences. -  InstrList &getGatherSeqInstructions() {return GatherInstructions; } +  InstrList &getGatherSeqInstructions() { return GatherInstructions; }  private:    /// \brief This method contains the recursive part of getTreeCost. @@ -130,9 +136,9 @@ private:  private:    /// Maps instructions to numbers and back. -  SmallDenseMap<Value*, int> InstrIdx; +  SmallDenseMap<Value *, int> InstrIdx;    /// Maps integers to Instructions. -  std::vector<Instruction*> InstrVec; +  std::vector<Instruction *> InstrVec;    // -- containers that are used during getTreeCost -- // @@ -144,14 +150,14 @@ private:    /// Contains values that have users outside of the vectorized graph.    /// We need to generate extract instructions for these values.    /// NOTICE: The vectorization methods also use this set. -  SetVector<Value*> MustExtract; +  SetVector<Value *> MustExtract;    /// Contains a list of values that are used outside the current tree. This    /// set must be reset between runs. -  SetVector<Value*> MultiUserVals; +  SetVector<Value *> MultiUserVals;    /// Maps values in the tree to the vector lanes that uses them. This map must    /// be reset between runs of getCost. -  std::map<Value*, int> LaneMap; +  std::map<Value *, int> LaneMap;    /// A list of instructions to ignore while sinking    /// memory instructions. This map must be reset between runs of getCost.    ValueSet MemBarrierIgnoreList; @@ -159,8 +165,8 @@ private:    // -- Containers that are used during vectorizeTree -- //    /// Maps between the first scalar to the vector. This map must be reset -  ///between runs. -  DenseMap<Value*, Value*> VectorizedValues; +  /// between runs. +  DenseMap<Value *, Value *> VectorizedValues;    // -- Containers that are used after vectorization by the caller -- // @@ -169,7 +175,7 @@ private:    /// Iterating over this list is faster than calling LICM.    /// Notice: We insert NULL ptrs to separate between the different gather    /// sequences. -   InstrList GatherInstructions; +  InstrList GatherInstructions;    /// Instruction builder to construct the vectorized tree.    IRBuilder<> Builder; | 

