diff options
author | NAKAMURA Takumi <geek4civic@gmail.com> | 2017-11-21 09:41:01 +0000 |
---|---|---|
committer | NAKAMURA Takumi <geek4civic@gmail.com> | 2017-11-21 09:41:01 +0000 |
commit | 519ea284af84f77fea8ffdc9d9041b91ba8b49ee (patch) | |
tree | ac2088c8b1d73db6af9751ec5fe00b9f5bfad6dd /llvm/lib/Transforms | |
parent | d7c85137aa392e8879dd7d37f3bb951def683e08 (diff) | |
download | bcm5719-llvm-519ea284af84f77fea8ffdc9d9041b91ba8b49ee.tar.gz bcm5719-llvm-519ea284af84f77fea8ffdc9d9041b91ba8b49ee.zip |
SLPVectorizer.cpp: Avoid std::stable_sort(properlyDominates()).
properlyDominates() shouldn't be used as sort key. It causes different output between stdlibc++ and libc++.
Instead, I introduced RPOT. In most cases, it works for CSE.
llvm-svn: 318743
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 32 |
1 files changed, 9 insertions, 23 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b01a9e8b42b..d30c1063c0d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -597,7 +597,7 @@ public: unsigned getTreeSize() const { return VectorizableTree.size(); } /// \brief Perform LICM and CSE on the newly generated gather sequences. - void optimizeGatherSequence(); + void optimizeGatherSequence(Function &F); /// \returns true if it is beneficial to reverse the vector order. bool shouldReorder() const { @@ -3310,7 +3310,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { return VectorizableTree[0].VectorizedValue; } -void BoUpSLP::optimizeGatherSequence() { +void BoUpSLP::optimizeGatherSequence(Function &F) { DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. @@ -3344,30 +3344,16 @@ void BoUpSLP::optimizeGatherSequence() { Insert->moveBefore(PreHeader->getTerminator()); } - // Make a list of all reachable blocks in our CSE queue. - SmallVector<const DomTreeNode *, 8> CSEWorkList; - CSEWorkList.reserve(CSEBlocks.size()); - for (BasicBlock *BB : CSEBlocks) - if (DomTreeNode *N = DT->getNode(BB)) { - assert(DT->isReachableFromEntry(N)); - CSEWorkList.push_back(N); - } - - // Sort blocks by domination. This ensures we visit a block after all blocks - // dominating it are visited. - std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), - [this](const DomTreeNode *A, const DomTreeNode *B) { - return DT->properlyDominates(A, B); - }); - // Perform O(N^2) search over the gather sequences and merge identical // instructions. TODO: We can further optimize this scan if we split the // instructions into different buckets based on the insert lane. SmallVector<Instruction *, 16> Visited; - for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) { - assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && - "Worklist not sorted properly!"); - BasicBlock *BB = (*I)->getBlock(); + ReversePostOrderTraversal<Function *> RPOT(&F); + for (auto BB : RPOT) { + // Traverse CSEBlocks by RPOT order. + if (!CSEBlocks.count(BB)) + continue; + // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; @@ -4235,7 +4221,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, } if (Changed) { - R.optimizeGatherSequence(); + R.optimizeGatherSequence(F); DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); DEBUG(verifyFunction(F)); } |