diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 87 |
1 files changed, 84 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 0780e70809d..511b31a28c3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -44,6 +44,7 @@ #include <vector> using namespace llvm; +extern cl::opt<bool> EnableVPlanNativePath; #define DEBUG_TYPE "vplan" @@ -124,6 +125,20 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); auto &PredVPSuccessors = PredVPBB->getSuccessors(); BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; + + // In outer loop vectorization scenario, the predecessor BBlock may not yet + // be visited(backedge). Mark the VPBasicBlock for fixup at the end of + // vectorization. We do not encounter this case in inner loop vectorization + // as we start out by building a loop skeleton with the vector loop header + // and latch blocks. As a result, we never enter this function for the + // header block in the non VPlan-native path. + if (!PredBB) { + assert(EnableVPlanNativePath && + "Unexpected null predecessor in non VPlan-native path"); + CFG.VPBBsToFix.push_back(PredVPBB); + continue; + } + assert(PredBB && "Predecessor basic-block not found building successor."); auto *PredBBTerminator = PredBB->getTerminator(); LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); @@ -185,6 +200,35 @@ void VPBasicBlock::execute(VPTransformState *State) { for (VPRecipeBase &Recipe : Recipes) Recipe.execute(*State); + VPValue *CBV; + if (EnableVPlanNativePath && (CBV = getCondBit())) { + Value *IRCBV = CBV->getUnderlyingValue(); + assert(IRCBV && "Unexpected null underlying value for condition bit"); + + // Delete the condition bit at this point - it should be no longer needed. + delete CBV; + setCondBit(nullptr); + + // Condition bit value in a VPBasicBlock is used as the branch selector. In + // the VPlan-native path case, since all branches are uniform we generate a + // branch instruction using the condition value from vector lane 0 and dummy + // successors. The successors are fixed later when the successor blocks are + // visited. + Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0); + NewCond = State->Builder.CreateExtractElement(NewCond, + State->Builder.getInt32(0)); + + // Replace the temporary unreachable terminator with the new conditional + // branch. + auto *CurrentTerminator = NewBB->getTerminator(); + assert(isa<UnreachableInst>(CurrentTerminator) && + "Expected to replace unreachable terminator with conditional " + "branch."); + auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond); + CondBr->setSuccessor(0, nullptr); + ReplaceInstWithInst(CurrentTerminator, CondBr); + } + LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); } @@ -194,6 +238,20 @@ void VPRegionBlock::execute(VPTransformState *State) { if (!isReplicator()) { // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { + if (EnableVPlanNativePath) { + // The inner loop vectorization path does not represent loop preheader + // and exit blocks as part of the VPlan. In the VPlan-native path, skip + // vectorizing loop preheader block. In future, we may replace this + // check with the check for loop preheader. + if (Block->getNumPredecessors() == 0) + continue; + + // Skip vectorizing loop exit block. In future, we may replace this + // check with the check for loop exit. + if (Block->getNumSuccessors() == 0) + continue; + } + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); Block->execute(State); } @@ -319,11 +377,32 @@ void VPlan::execute(VPTransformState *State) { for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State); + // Setup branch terminator successors for VPBBs in VPBBsToFix based on + // VPBB's successors. + for (auto VPBB : State->CFG.VPBBsToFix) { + assert(EnableVPlanNativePath && + "Unexpected VPBBsToFix in non VPlan-native path"); + BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; + assert(BB && "Unexpected null basic block for VPBB"); + + unsigned Idx = 0; + auto *BBTerminator = BB->getTerminator(); + + for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { + VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); + BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); + ++Idx; + } + } + // 3. Merge the temporary latch created with the last basic-block filled. BasicBlock *LastBB = State->CFG.PrevBB; // Connect LastBB to VectorLatchBB to facilitate their merge. - assert(isa<UnreachableInst>(LastBB->getTerminator()) && - "Expected VPlan CFG to terminate with unreachable"); + assert((EnableVPlanNativePath || + isa<UnreachableInst>(LastBB->getTerminator())) && + "Expected InnerLoop VPlan CFG to terminate with unreachable"); + assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) && + "Expected VPlan CFG to terminate with branch in NativePath"); LastBB->getTerminator()->eraseFromParent(); BranchInst::Create(VectorLatchBB, LastBB); @@ -333,7 +412,9 @@ void VPlan::execute(VPTransformState *State) { assert(Merged && "Could not merge last basic block with latch."); VectorLatchBB = LastBB; - updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); + // We do not attempt to preserve DT for outer loop vectorization currently. + if (!EnableVPlanNativePath) + updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); } void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, |