diff options
author | Hideki Saito <hideki.saito@intel.com> | 2018-09-14 00:36:00 +0000 |
---|---|---|
committer | Hideki Saito <hideki.saito@intel.com> | 2018-09-14 00:36:00 +0000 |
commit | ea7f3035a01786e7d69bb1ce1e038cefb6657a2e (patch) | |
tree | b299e3a031c52d2f45c9665eb0e7a58991dc23cc /llvm/lib/Transforms/Vectorize/VPlan.cpp | |
parent | ce9e2965ecd0c6d9623113486c15dfc45ef4303e (diff) | |
download | bcm5719-llvm-ea7f3035a01786e7d69bb1ce1e038cefb6657a2e.tar.gz bcm5719-llvm-ea7f3035a01786e7d69bb1ce1e038cefb6657a2e.zip |
[VPlan] Implement initial vector code generation support for simple outer loops.
Summary:
[VPlan] Implement vector code generation support for simple outer loops.
Context: Patch Series #1 for outer loop vectorization support in LV using VPlan. (RFC: http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html).
This patch introduces vector code generation support for simple outer loops that are currently supported in the VPlanNativePath. Changes here essentially do the following:
- force vector code generation using explicit vectorize_width
- add conservative early returns in cost model and other places for VPlanNativePath
- add code for setting up outer loop inductions
- support for widening non-induction PHIs that can result from inner loops and uniform conditional branches
- support for generating uniform inner branches
We plan to add a handful C outer loop executable tests once the initial code generation support is committed. This patch is expected to be NFC for the inner loop vectorizer path. Since we are moving in the direction of supporting outer loop vectorization in LV, it may also be time to rename classes such as InnerLoopVectorizer.
Reviewers: fhahn, rengolin, hsaito, dcaballe, mkuper, hfinkel, Ayal
Reviewed By: fhahn, hsaito
Subscribers: dmgreen, bollu, tschuett, rkruppe, rogfer01, llvm-commits
Differential Revision: https://reviews.llvm.org/D50820
llvm-svn: 342197
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 87 |
1 files changed, 84 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 0780e70809d..511b31a28c3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -44,6 +44,7 @@ #include <vector> using namespace llvm; +extern cl::opt<bool> EnableVPlanNativePath; #define DEBUG_TYPE "vplan" @@ -124,6 +125,20 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); auto &PredVPSuccessors = PredVPBB->getSuccessors(); BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; + + // In outer loop vectorization scenario, the predecessor BBlock may not yet + // be visited(backedge). Mark the VPBasicBlock for fixup at the end of + // vectorization. We do not encounter this case in inner loop vectorization + // as we start out by building a loop skeleton with the vector loop header + // and latch blocks. As a result, we never enter this function for the + // header block in the non VPlan-native path. + if (!PredBB) { + assert(EnableVPlanNativePath && + "Unexpected null predecessor in non VPlan-native path"); + CFG.VPBBsToFix.push_back(PredVPBB); + continue; + } + assert(PredBB && "Predecessor basic-block not found building successor."); auto *PredBBTerminator = PredBB->getTerminator(); LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); @@ -185,6 +200,35 @@ void VPBasicBlock::execute(VPTransformState *State) { for (VPRecipeBase &Recipe : Recipes) Recipe.execute(*State); + VPValue *CBV; + if (EnableVPlanNativePath && (CBV = getCondBit())) { + Value *IRCBV = CBV->getUnderlyingValue(); + assert(IRCBV && "Unexpected null underlying value for condition bit"); + + // Delete the condition bit at this point - it should be no longer needed. + delete CBV; + setCondBit(nullptr); + + // Condition bit value in a VPBasicBlock is used as the branch selector. In + // the VPlan-native path case, since all branches are uniform we generate a + // branch instruction using the condition value from vector lane 0 and dummy + // successors. The successors are fixed later when the successor blocks are + // visited. + Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0); + NewCond = State->Builder.CreateExtractElement(NewCond, + State->Builder.getInt32(0)); + + // Replace the temporary unreachable terminator with the new conditional + // branch. + auto *CurrentTerminator = NewBB->getTerminator(); + assert(isa<UnreachableInst>(CurrentTerminator) && + "Expected to replace unreachable terminator with conditional " + "branch."); + auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond); + CondBr->setSuccessor(0, nullptr); + ReplaceInstWithInst(CurrentTerminator, CondBr); + } + LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); } @@ -194,6 +238,20 @@ void VPRegionBlock::execute(VPTransformState *State) { if (!isReplicator()) { // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { + if (EnableVPlanNativePath) { + // The inner loop vectorization path does not represent loop preheader + // and exit blocks as part of the VPlan. In the VPlan-native path, skip + // vectorizing loop preheader block. In future, we may replace this + // check with the check for loop preheader. + if (Block->getNumPredecessors() == 0) + continue; + + // Skip vectorizing loop exit block. In future, we may replace this + // check with the check for loop exit. + if (Block->getNumSuccessors() == 0) + continue; + } + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); Block->execute(State); } @@ -319,11 +377,32 @@ void VPlan::execute(VPTransformState *State) { for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State); + // Setup branch terminator successors for VPBBs in VPBBsToFix based on + // VPBB's successors. + for (auto VPBB : State->CFG.VPBBsToFix) { + assert(EnableVPlanNativePath && + "Unexpected VPBBsToFix in non VPlan-native path"); + BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; + assert(BB && "Unexpected null basic block for VPBB"); + + unsigned Idx = 0; + auto *BBTerminator = BB->getTerminator(); + + for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { + VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); + BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); + ++Idx; + } + } + // 3. Merge the temporary latch created with the last basic-block filled. BasicBlock *LastBB = State->CFG.PrevBB; // Connect LastBB to VectorLatchBB to facilitate their merge. - assert(isa<UnreachableInst>(LastBB->getTerminator()) && - "Expected VPlan CFG to terminate with unreachable"); + assert((EnableVPlanNativePath || + isa<UnreachableInst>(LastBB->getTerminator())) && + "Expected InnerLoop VPlan CFG to terminate with unreachable"); + assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) && + "Expected VPlan CFG to terminate with branch in NativePath"); LastBB->getTerminator()->eraseFromParent(); BranchInst::Create(VectorLatchBB, LastBB); @@ -333,7 +412,9 @@ void VPlan::execute(VPTransformState *State) { assert(Merged && "Could not merge last basic block with latch."); VectorLatchBB = LastBB; - updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); + // We do not attempt to preserve DT for outer loop vectorization currently. + if (!EnableVPlanNativePath) + updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); } void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, |