diff options
author | Ayal Zaks <ayal.zaks@intel.com> | 2017-07-19 05:16:39 +0000 |
---|---|---|
committer | Ayal Zaks <ayal.zaks@intel.com> | 2017-07-19 05:16:39 +0000 |
commit | 8c452d76ed7d3782d7a99eb22c61ef4191afa920 (patch) | |
tree | 5b23c142c6633c0cdcb2071945ef622710fc9e1e /llvm/lib/Transforms | |
parent | b77279083c75f18129948a372e3b411c23b11158 (diff) | |
download | bcm5719-llvm-8c452d76ed7d3782d7a99eb22c61ef4191afa920.tar.gz bcm5719-llvm-8c452d76ed7d3782d7a99eb22c61ef4191afa920.zip |
[LV] Test once if vector trip count is zero, instead of twice
Generate a single test to decide if there are enough iterations to jump to the
vectorized loop, or else go to the scalar remainder loop. This test compares the
Scalar Trip Count: if STC < VF * UF go to the scalar loop. If
requiresScalarEpilogue() holds, at-least one iteration must remain scalar; the
rest can be used to form vector iterations. So in this case the test checks
instead if (STC - 1) < VF * UF by comparing STC <= VF * UF, and going to the
scalar loop if so. Otherwise the vector loop is entered for at-least one vector
iteration.
This test covers the case where incrementing the backedge-taken count will
overflow leading to an incorrect trip count of zero. In this (rare) case we will
also avoid the vector loop and jump to the scalar loop.
This patch simplifies the existing tests and effectively removes the basic-block
originally named "min.iters.checked", leaving the single test in block
"vector.ph".
Original observation and initial patch by Evgeny Stupachenko.
Differential Revision: https://reviews.llvm.org/D34150
llvm-svn: 308421
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 64 |
1 files changed, 20 insertions, 44 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index eb82ee283d4..012b10c8a9b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -574,11 +574,9 @@ protected: /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(Loop *NewLoop); - /// Emit a bypass check to see if the trip count would overflow, or we - /// wouldn't have enough iterations to execute one vector loop. + /// Emit a bypass check to see if the vector trip count is zero, including if + /// it overflows. void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); - /// Emit a bypass check to see if the vector trip count is nonzero. - void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass); /// Emit a bypass check to see if all of the SCEV assumptions we've /// had to make are correct. void emitSCEVChecks(Loop *L, BasicBlock *Bypass); @@ -3289,37 +3287,16 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, BasicBlock *BB = L->getLoopPreheader(); IRBuilder<> Builder(BB->getTerminator()); - // Generate code to check that the loop's trip count that we computed by - // adding one to the backedge-taken count will not overflow. - Value *CheckMinIters = Builder.CreateICmpULT( - Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); + // Generate code to check if the loop's trip count is less than VF * UF, or + // equal to it in case a scalar epilogue is required; this implies that the + // vector trip count is zero. This check also covers the case where adding one + // to the backedge-taken count overflowed leading to an incorrect trip count + // of zero. In this case we will also jump to the scalar loop. + auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE + : ICmpInst::ICMP_ULT; + Value *CheckMinIters = Builder.CreateICmp( + P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); - BasicBlock *NewBB = - BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked"); - // Update dominator tree immediately if the generated block is a - // LoopBypassBlock because SCEV expansions to generate loop bypass - // checks may query it before the current function is finished. - DT->addNewBlock(NewBB, BB); - if (L->getParentLoop()) - L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); - ReplaceInstWithInst(BB->getTerminator(), - BranchInst::Create(Bypass, NewBB, CheckMinIters)); - LoopBypassBlocks.push_back(BB); -} - -void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, - BasicBlock *Bypass) { - Value *TC = getOrCreateVectorTripCount(L); - BasicBlock *BB = L->getLoopPreheader(); - IRBuilder<> Builder(BB->getTerminator()); - - // Now, compare the new count to zero. If it is zero skip the vector loop and - // jump to the scalar loop. - Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()), - "cmp.zero"); - - // Generate code to check that the loop's trip count that we computed by - // adding one to the backedge-taken count will not overflow. BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass @@ -3328,7 +3305,7 @@ void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), - BranchInst::Create(Bypass, NewBB, Cmp)); + BranchInst::Create(Bypass, NewBB, CheckMinIters)); LoopBypassBlocks.push_back(BB); } @@ -3477,14 +3454,13 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() { Value *StartIdx = ConstantInt::get(IdxTy, 0); - // We need to test whether the backedge-taken count is uint##_max. Adding one - // to it will cause overflow and an incorrect loop trip count in the vector - // body. In case of overflow we want to directly jump to the scalar remainder - // loop. - emitMinimumIterationCountCheck(Lp, ScalarPH); // Now, compare the new count to zero. If it is zero skip the vector loop and - // jump to the scalar loop. - emitVectorLoopEnteredCheck(Lp, ScalarPH); + // jump to the scalar loop. This check also covers the case where the + // backedge-taken count is uint##_max: adding one to it will overflow leading + // to an incorrect trip count of zero. In this (rare) case we will also jump + // to the scalar loop. + emitMinimumIterationCountCheck(Lp, ScalarPH); + // Generate the code to check any assumptions that we've made for SCEV // expressions. emitSCEVChecks(Lp, ScalarPH); @@ -3527,7 +3503,7 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() { // We know what the end value is. EndValue = CountRoundDown; } else { - IRBuilder<> B(LoopBypassBlocks.back()->getTerminator()); + IRBuilder<> B(Lp->getLoopPreheader()->getTerminator()); Type *StepType = II.getStep()->getType(); Instruction::CastOps CastOp = CastInst::getCastOpcode(CountRoundDown, true, StepType, true); @@ -4168,7 +4144,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // To do so, we need to generate the 'identity' vector and override // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator()); + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); // This is the vector-clone of the value that leaves the loop. Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType(); |