summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp41
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp8
3 files changed, 57 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7ea842baa5e..21e75d55a8c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1176,18 +1176,17 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
return false;
}
- // TODO: handle reductions when tail is folded by masking.
- if (!Reductions.empty()) {
- reportVectorizationFailure(
- "Loop has reductions, cannot fold tail by masking",
- "Cannot fold tail by masking in the presence of reductions.",
- "ReductionFoldingTailByMasking", ORE, TheLoop);
- return false;
- }
+ SmallPtrSet<const Value *, 8> ReductionLiveOuts;
- // TODO: handle outside users when tail is folded by masking.
+ for (auto &Reduction : *getReductionVars())
+ ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
+
+ // TODO: handle non-reduction outside users when tail is folded by masking.
for (auto *AE : AllowedExit) {
- // Check that all users of allowed exit values are inside the loop.
+ // Check that all users of allowed exit values are inside the loop or
+ // are the live-out of a reduction.
+ if (ReductionLiveOuts.count(AE))
+ continue;
for (User *U : AE->users()) {
Instruction *UI = cast<Instruction>(U);
if (TheLoop->contains(UI))
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 870ac700571..478174f8251 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3678,6 +3678,26 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
setDebugLocFromInst(Builder, LoopExitInst);
+ // If tail is folded by masking, the vector value to leave the loop should be
+ // a Select choosing between the vectorized LoopExitInst and vectorized Phi,
+ // instead of the former.
+ if (Cost->foldTailByMasking()) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *VecLoopExitInst =
+ VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
+ Value *Sel = nullptr;
+ for (User *U : VecLoopExitInst->users()) {
+ if (isa<SelectInst>(U)) {
+ assert(!Sel && "Reduction exit feeding two selects");
+ Sel = U;
+ } else
+ assert(isa<PHINode>(U) && "Reduction exit must feed Phi's or select");
+ }
+ assert(Sel && "Reduction exit feeds no select");
+ VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, Sel);
+ }
+ }
+
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
@@ -6939,8 +6959,15 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
// If the tail is to be folded by masking, the primary induction variable
// needs to be represented in VPlan for it to model early-exit masking.
- if (CM.foldTailByMasking())
+ // Also, both the Phi and the live-out instruction of each reduction are
+ // required in order to introduce a select between them in VPlan.
+ if (CM.foldTailByMasking()) {
NeedDef.insert(Legal->getPrimaryInduction());
+ for (auto &Reduction : *Legal->getReductionVars()) {
+ NeedDef.insert(Reduction.first);
+ NeedDef.insert(Reduction.second.getLoopExitInstr());
+ }
+ }
// Collect instructions from the original loop that will become trivially dead
// in the vectorized loop. We don't need to vectorize these instructions. For
@@ -7067,6 +7094,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
delete PreEntry;
+ // Finally, if tail is folded by masking, introduce selects between the phi
+ // and the live-out instruction of each reduction, at the end of the latch.
+ if (CM.foldTailByMasking()) {
+ Builder.setInsertPoint(VPBB);
+ auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
+ for (auto &Reduction : *Legal->getReductionVars()) {
+ VPValue *Phi = Plan->getVPValue(Reduction.first);
+ VPValue *Red = Plan->getVPValue(Reduction.second.getLoopExitInstr());
+ Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi});
+ }
+ }
+
std::string PlanName;
raw_string_ostream RSO(PlanName);
unsigned VF = Range.Start;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 517d759d7bf..14adb478cd8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -309,6 +309,14 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, V, Part);
break;
}
+ case Instruction::Select: {
+ Value *Cond = State.get(getOperand(0), Part);
+ Value *Op1 = State.get(getOperand(1), Part);
+ Value *Op2 = State.get(getOperand(2), Part);
+ Value *V = Builder.CreateSelect(Cond, Op1, Op2);
+ State.set(this, V, Part);
+ break;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
OpenPOWER on IntegriCloud