summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorAyal Zaks <ayal.zaks@intel.com>2019-11-27 00:08:29 +0200
committerAyal Zaks <ayal.zaks@intel.com>2019-12-03 19:50:24 +0200
commit6ed9cef25f915d4533f261c401cee29d8d8012d5 (patch)
treeb51f4fd4b3161bd19ad9cc333f794e69fff92167 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent96c8024e2eb05278206b1eb59208bad0f3c68f2e (diff)
downloadbcm5719-llvm-6ed9cef25f915d4533f261c401cee29d8d8012d5.tar.gz
bcm5719-llvm-6ed9cef25f915d4533f261c401cee29d8d8012d5.zip
[LV] Scalar with predication must not be uniform
Fix PR40816: avoid considering scalar-with-predication instructions as also uniform-after-vectorization. Instructions identified as "scalar with predication" will be "vectorized" using a replicating region. If such instructions are also optimized as "uniform after vectorization", namely when only the first of VF lanes is used, such a replicating region becomes erroneous - only the first instance of the region can and should be formed. Fix such cases by not considering such instructions as "uniform after vectorization". Differential Revision: https://reviews.llvm.org/D70298
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp39
1 files changed, 22 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2766bc24f84..f614c3a29e5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4668,14 +4668,26 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
SetVector<Instruction *> Worklist;
BasicBlock *Latch = TheLoop->getLoopLatch();
+ // Instructions that are scalar with predication must not be considered
+ // uniform after vectorization, because that would create an erroneous
+ // replicating region where only a single instance out of VF should be formed.
+ // TODO: optimize such seldom cases if found important, see PR40816.
+ auto addToWorklistIfAllowed = [&](Instruction *I) -> void {
+ if (isScalarWithPredication(I, VF)) {
+ LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: "
+ << *I << "\n");
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *I << "\n");
+ Worklist.insert(I);
+ };
+
// Start with the conditional branch. If the branch condition is an
// instruction contained in the loop that is only used by the branch, it is
// uniform.
auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
- if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) {
- Worklist.insert(Cmp);
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n");
- }
+ if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
+ addToWorklistIfAllowed(Cmp);
// Holds consecutive and consecutive-like pointers. Consecutive-like pointers
// are pointers that are treated like consecutive pointers during
@@ -4734,10 +4746,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
// Add to the Worklist all consecutive and consecutive-like pointers that
// aren't also identified as possibly non-uniform.
for (auto *V : ConsecutiveLikePtrs)
- if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end()) {
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n");
- Worklist.insert(V);
- }
+ if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end())
+ addToWorklistIfAllowed(V);
// Expand Worklist in topological order: whenever a new instruction
// is added , its users should be already inside Worklist. It ensures
@@ -4763,10 +4773,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
return Worklist.count(J) ||
(OI == getLoadStorePointerOperand(J) &&
isUniformDecision(J, VF));
- })) {
- Worklist.insert(OI);
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n");
- }
+ }))
+ addToWorklistIfAllowed(OI);
}
}
@@ -4808,11 +4816,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
continue;
// The induction variable and its update instruction will remain uniform.
- Worklist.insert(Ind);
- Worklist.insert(IndUpdate);
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n");
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate
- << "\n");
+ addToWorklistIfAllowed(Ind);
+ addToWorklistIfAllowed(IndUpdate);
}
Uniforms[VF].insert(Worklist.begin(), Worklist.end());
OpenPOWER on IntegriCloud