From 6ed9cef25f915d4533f261c401cee29d8d8012d5 Mon Sep 17 00:00:00 2001 From: Ayal Zaks Date: Wed, 27 Nov 2019 00:08:29 +0200 Subject: [LV] Scalar with predication must not be uniform Fix PR40816: avoid considering scalar-with-predication instructions as also uniform-after-vectorization. Instructions identified as "scalar with predication" will be "vectorized" using a replicating region. If such instructions are also optimized as "uniform after vectorization", namely when only the first of VF lanes is used, such a replicating region becomes erroneous - only the first instance of the region can and should be formed. Fix such cases by not considering such instructions as "uniform after vectorization". Differential Revision: https://reviews.llvm.org/D70298 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 39 ++++++++++++++----------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2766bc24f84..f614c3a29e5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4668,14 +4668,26 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { SetVector Worklist; BasicBlock *Latch = TheLoop->getLoopLatch(); + // Instructions that are scalar with predication must not be considered + // uniform after vectorization, because that would create an erroneous + // replicating region where only a single instance out of VF should be formed. + // TODO: optimize such seldom cases if found important, see PR40816. + auto addToWorklistIfAllowed = [&](Instruction *I) -> void { + if (isScalarWithPredication(I, VF)) { + LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: " + << *I << "\n"); + return; + } + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *I << "\n"); + Worklist.insert(I); + }; + // Start with the conditional branch. If the branch condition is an // instruction contained in the loop that is only used by the branch, it is // uniform. auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0)); - if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) { - Worklist.insert(Cmp); - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n"); - } + if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) + addToWorklistIfAllowed(Cmp); // Holds consecutive and consecutive-like pointers. Consecutive-like pointers // are pointers that are treated like consecutive pointers during @@ -4734,10 +4746,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // Add to the Worklist all consecutive and consecutive-like pointers that // aren't also identified as possibly non-uniform. for (auto *V : ConsecutiveLikePtrs) - if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end()) { - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n"); - Worklist.insert(V); - } + if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end()) + addToWorklistIfAllowed(V); // Expand Worklist in topological order: whenever a new instruction // is added , its users should be already inside Worklist. It ensures @@ -4763,10 +4773,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { return Worklist.count(J) || (OI == getLoadStorePointerOperand(J) && isUniformDecision(J, VF)); - })) { - Worklist.insert(OI); - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n"); - } + })) + addToWorklistIfAllowed(OI); } } @@ -4808,11 +4816,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { continue; // The induction variable and its update instruction will remain uniform. - Worklist.insert(Ind); - Worklist.insert(IndUpdate); - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n"); - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate - << "\n"); + addToWorklistIfAllowed(Ind); + addToWorklistIfAllowed(IndUpdate); } Uniforms[VF].insert(Worklist.begin(), Worklist.end()); -- cgit v1.2.3