diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b21a9e0250a..af74acb4150 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2457,7 +2457,28 @@ int BoUpSLP::getTreeCost() { unsigned BundleWidth = VectorizableTree[0].Scalars.size(); - for (TreeEntry &TE : VectorizableTree) { + for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) { + TreeEntry &TE = VectorizableTree[I]; + + // We create duplicate tree entries for gather sequences that have multiple + // uses. However, we should not compute the cost of duplicate sequences. + // For example, if we have a build vector (i.e., insertelement sequence) + // that is used by more than one vector instruction, we only need to + // compute the cost of the insertelement instructions once. The redundent + // instructions will be eliminated by CSE. + // + // We should consider not creating duplicate tree entries for gather + // sequences, and instead add additional edges to the tree representing + // their uses. Since such an approach results in fewer total entries, + // existing heuristics based on tree size may yeild different results. + // + if (TE.NeedToGather && + std::any_of(std::next(VectorizableTree.begin(), I + 1), + VectorizableTree.end(), [TE](TreeEntry &Entry) { + return Entry.NeedToGather && Entry.isSame(TE.Scalars); + })) + continue; + int C = getEntryCost(&TE); DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " << *TE.Scalars[0] << ".\n"); |