diff options
author | Artem Belevich <tra@google.com> | 2016-12-01 22:52:15 +0000 |
---|---|---|
committer | Artem Belevich <tra@google.com> | 2016-12-01 22:52:15 +0000 |
commit | 704395a25a409e21d517572080602d21c3b6fed8 (patch) | |
tree | 2a6a713c5d1f8b9c7ea63f145f8d4db1c253d608 /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | |
parent | 709a4cc2381d9c530331ba8ff9244628ae252727 (diff) | |
download | bcm5719-llvm-704395a25a409e21d517572080602d21c3b6fed8.tar.gz bcm5719-llvm-704395a25a409e21d517572080602d21c3b6fed8.zip |
Revert "[SLP] Fix for PR6246: vectorization for scalar ops on vector elements."
This reverts r288412 which causes severe compile-time regression.
llvm-svn: 288431
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 138 |
1 files changed, 66 insertions, 72 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fa67aa25cdc..d1b569d4cd3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3870,9 +3870,10 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, unsigned Opcode0 = I0->getOpcode(); + // FIXME: Register size should be a parameter to this function, so we can + // try different vectorization factors. unsigned Sz = R.getVectorElementSize(I0); - unsigned MinVF = R.getMinVecRegSize() / Sz; - unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF); + unsigned VF = R.getMinVecRegSize() / Sz; for (Value *V : VL) { Type *Ty = V->getType(); @@ -3888,83 +3889,76 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, // Keep track of values that were deleted by vectorizing in the loop below. SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); - unsigned NextInst = 0, MaxInst = VL.size(); - for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; - VF /= 2) { - for (unsigned I = NextInst; I < MaxInst; ++I) { - unsigned OpsWidth = 0; + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + unsigned OpsWidth = 0; - if (I + VF > MaxInst) - OpsWidth = MaxInst - I; - else - OpsWidth = VF; + if (i + VF > e) + OpsWidth = e - i; + else + OpsWidth = VF; - if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) - break; + if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) + break; - // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) - continue; + // Check that a previous iteration of this loop did not delete the Value. + if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth)) + continue; - DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " - << "\n"); - ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); - - ArrayRef<Value *> BuildVectorSlice; - if (!BuildVector.empty()) - BuildVectorSlice = BuildVector.slice(I, OpsWidth); - - R.buildTree(Ops, BuildVectorSlice); - // TODO: check if we can allow reordering for more cases. - if (AllowReorder && R.shouldReorder()) { - // Conceptually, there is nothing actually preventing us from trying to - // reorder a larger list. In fact, we do exactly this when vectorizing - // reductions. However, at this point, we only expect to get here from - // tryToVectorizePair(). - assert(Ops.size() == 2); - assert(BuildVectorSlice.empty()); - Value *ReorderedOps[] = {Ops[1], Ops[0]}; - R.buildTree(ReorderedOps, None); - } - if (R.isTreeTinyAndNotFullyVectorizable()) - continue; + DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " + << "\n"); + ArrayRef<Value *> Ops = VL.slice(i, OpsWidth); + + ArrayRef<Value *> BuildVectorSlice; + if (!BuildVector.empty()) + BuildVectorSlice = BuildVector.slice(i, OpsWidth); + + R.buildTree(Ops, BuildVectorSlice); + // TODO: check if we can allow reordering for more cases. + if (AllowReorder && R.shouldReorder()) { + // Conceptually, there is nothing actually preventing us from trying to + // reorder a larger list. In fact, we do exactly this when vectorizing + // reductions. However, at this point, we only expect to get here from + // tryToVectorizePair(). + assert(Ops.size() == 2); + assert(BuildVectorSlice.empty()); + Value *ReorderedOps[] = { Ops[1], Ops[0] }; + R.buildTree(ReorderedOps, None); + } + if (R.isTreeTinyAndNotFullyVectorizable()) + continue; - R.computeMinimumValueSizes(); - int Cost = R.getTreeCost(); - - if (Cost < -SLPCostThreshold) { - DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - Value *VectorizedRoot = R.vectorizeTree(); - - // Reconstruct the build vector by extracting the vectorized root. This - // way we handle the case where some elements of the vector are - // undefined. - // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) - if (!BuildVectorSlice.empty()) { - // The insert point is the last build vector instruction. The - // vectorized root will precede it. This guarantees that we get an - // instruction. The vectorized tree could have been constant folded. - Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back()); - unsigned VecIdx = 0; - for (auto &V : BuildVectorSlice) { - IRBuilder<NoFolder> Builder(InsertAfter->getParent(), - ++BasicBlock::iterator(InsertAfter)); - Instruction *I = cast<Instruction>(V); - assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I)); - Instruction *Extract = - cast<Instruction>(Builder.CreateExtractElement( - VectorizedRoot, Builder.getInt32(VecIdx++))); - I->setOperand(1, Extract); - I->removeFromParent(); - I->insertAfter(Extract); - InsertAfter = I; - } + R.computeMinimumValueSizes(); + int Cost = R.getTreeCost(); + + if (Cost < -SLPCostThreshold) { + DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); + Value *VectorizedRoot = R.vectorizeTree(); + + // Reconstruct the build vector by extracting the vectorized root. This + // way we handle the case where some elements of the vector are undefined. + // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) + if (!BuildVectorSlice.empty()) { + // The insert point is the last build vector instruction. The vectorized + // root will precede it. This guarantees that we get an instruction. The + // vectorized tree could have been constant folded. + Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back()); + unsigned VecIdx = 0; + for (auto &V : BuildVectorSlice) { + IRBuilder<NoFolder> Builder(InsertAfter->getParent(), + ++BasicBlock::iterator(InsertAfter)); + Instruction *I = cast<Instruction>(V); + assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I)); + Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement( + VectorizedRoot, Builder.getInt32(VecIdx++))); + I->setOperand(1, Extract); + I->removeFromParent(); + I->insertAfter(Extract); + InsertAfter = I; } - // Move to the next bundle. - I += VF - 1; - NextInst = I + 1; - Changed = true; } + // Move to the next bundle. + i += VF - 1; + Changed = true; } } |