diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 80 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VecUtils.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VecUtils.h | 7 |
3 files changed, 80 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2b07fa2c015..91c04633244 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -99,7 +99,10 @@ struct SLPVectorizer : public FunctionPass { } // Try to hoist some of the scalarization code to the preheader. - if (BBChanged) hoistGatherSequence(LI, BB, R); + if (BBChanged) { + hoistGatherSequence(LI, BB, R); + Changed |= vectorizeUsingGatherHints(R.getGatherSeqInstructions()); + } Changed |= BBChanged; } @@ -130,8 +133,10 @@ private: /// \brief Try to vectorize a chain that starts at two arithmetic instrs. bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); - /// \brief Try to vectorize a list of operands. - bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); + /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true + /// then we calculate the cost of extracting the scalars from the vector. + /// \returns true if a value was vectorized. + bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts); /// \brief Try to vectorize a chain that may start at the operands of \V; bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); @@ -143,6 +148,13 @@ private: /// all of the sources are loop invariant. void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R); + /// \brief Try to vectorize additional sequences in different basic blocks + /// based on values that we gathered in previous blocks. The list \p Gathers + /// holds the gather InsertElement instructions that were generated during + /// vectorization. + /// \returns True if some code was vectorized. + bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers); + /// \brief Scan the basic block and look for patterns that are likely to start /// a vectorization chain. bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R); @@ -179,10 +191,11 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { if (!A || !B) return false; Value *VL[] = { A, B }; - return tryToVectorizeList(VL, R); + return tryToVectorizeList(VL, R, true); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, + bool NeedExtracts) { if (VL.size() < 2) return false; @@ -204,7 +217,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { } int Cost = R.getTreeCost(VL); - int ExtrCost = R.getScalarizationCost(VL); + int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0; DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << " Cost of extract:" << ExtrCost << ".\n"); if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; @@ -307,7 +320,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } if (Incoming.size() > 1) - Changed |= tryToVectorizeList(Incoming, R); + Changed |= tryToVectorizeList(Incoming, R, true); } return Changed; @@ -329,6 +342,51 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { return Changed; } +bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) { + SmallVector<Value*, 4> Seq; + bool Changed = false; + for (int i = 0, e = Gathers.size(); i < e; ++i) { + InsertElementInst *IEI = dyn_cast_or_null<InsertElementInst>(Gathers[i]); + + if (IEI) { + if (Instruction *I = dyn_cast<Instruction>(IEI->getOperand(1))) + Seq.push_back(I); + } else { + + if (!Seq.size()) + continue; + + Instruction *I = cast<Instruction>(Seq[0]); + BasicBlock *BB = I->getParent(); + + DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() << + " in " << BB->getName() << ".\n"); + + // Check if the gathered values have multiple uses. If they only have one + // user then we know that the insert/extract pair will go away. + bool HasMultipleUsers = false; + for (int i=0; e = Seq.size(), i < e; ++i) { + if (!Seq[i]->hasOneUse()) { + HasMultipleUsers = true; + break; + } + } + + BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB)); + + if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) { + DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() << + " in " << BB->getName() << ".\n"); + Changed = true; + } + + Seq.clear(); + } + } + + return Changed; +} + void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R) { // Check if this block is inside a loop. @@ -344,12 +402,14 @@ void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, // Mark the insertion point for the block. Instruction *Location = PreHeader->getTerminator(); - BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions(); - for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end(); + BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions(); + for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e = Gathers.end(); it != e; ++it) { - InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); + InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it); // The InsertElement sequence can be simplified into a constant. + // Also Ignore NULL pointers because they are only here to separate + // sequences. if (!Insert) continue; diff --git a/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/llvm/lib/Transforms/Vectorize/VecUtils.cpp index e79f08a56dd..88c457d1176 100644 --- a/llvm/lib/Transforms/Vectorize/VecUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VecUtils.cpp @@ -731,9 +731,13 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { // Remember that this instruction is used as part of a 'gather' sequence. // The caller of the bottom-up slp vectorizer can try to hoist the sequence // if the users are outside of the basic block. - GatherInstructions.push_back(Vec); + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Vec)) + GatherInstructions.push_back(IEI); } + // Mark the end of the gather sequence. + GatherInstructions.push_back(0); + for (unsigned i = 0; i < Ty->getNumElements(); ++i) VectorizedValues[VL[i]] = Vec; diff --git a/llvm/lib/Transforms/Vectorize/VecUtils.h b/llvm/lib/Transforms/Vectorize/VecUtils.h index f76ae8413d7..28a61e3c0dd 100644 --- a/llvm/lib/Transforms/Vectorize/VecUtils.h +++ b/llvm/lib/Transforms/Vectorize/VecUtils.h @@ -34,6 +34,7 @@ class Loop; /// Bottom Up SLP vectorization utility class. struct BoUpSLP { typedef SmallVector<Value*, 8> ValueList; + typedef SmallVector<Instruction*, 16> InstrList; typedef SmallPtrSet<Value*, 16> ValueSet; typedef SmallVector<StoreInst*, 8> StoreList; static const int max_cost = 1<<20; @@ -78,7 +79,7 @@ struct BoUpSLP { /// \returns the list of new instructions that were added in order to collect /// scalars into vectors. This list can be used to further optimize the gather /// sequences. - ValueList &getGatherSeqInstructions() {return GatherInstructions; } + InstrList &getGatherSeqInstructions() {return GatherInstructions; } private: /// \brief This method contains the recursive part of getTreeCost. @@ -166,7 +167,9 @@ private: /// A list of instructions that are used when gathering scalars into vectors. /// In many cases these instructions can be hoisted outside of the BB. /// Iterating over this list is faster than calling LICM. - ValueList GatherInstructions; + /// Notice: We insert NULL ptrs to separate between the different gather + /// sequences. + InstrList GatherInstructions; /// Instruction builder to construct the vectorized tree. IRBuilder<> Builder; |