summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2016-12-01 22:52:15 +0000
committerArtem Belevich <tra@google.com>2016-12-01 22:52:15 +0000
commit704395a25a409e21d517572080602d21c3b6fed8 (patch)
tree2a6a713c5d1f8b9c7ea63f145f8d4db1c253d608 /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
parent709a4cc2381d9c530331ba8ff9244628ae252727 (diff)
downloadbcm5719-llvm-704395a25a409e21d517572080602d21c3b6fed8.tar.gz
bcm5719-llvm-704395a25a409e21d517572080602d21c3b6fed8.zip
Revert "[SLP] Fix for PR6246: vectorization for scalar ops on vector elements."
This reverts r288412 which causes severe compile-time regression. llvm-svn: 288431
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp138
1 files changed, 66 insertions, 72 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fa67aa25cdc..d1b569d4cd3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3870,9 +3870,10 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned Opcode0 = I0->getOpcode();
+ // FIXME: Register size should be a parameter to this function, so we can
+ // try different vectorization factors.
unsigned Sz = R.getVectorElementSize(I0);
- unsigned MinVF = R.getMinVecRegSize() / Sz;
- unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+ unsigned VF = R.getMinVecRegSize() / Sz;
for (Value *V : VL) {
Type *Ty = V->getType();
@@ -3888,83 +3889,76 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
- unsigned NextInst = 0, MaxInst = VL.size();
- for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
- VF /= 2) {
- for (unsigned I = NextInst; I < MaxInst; ++I) {
- unsigned OpsWidth = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ unsigned OpsWidth = 0;
- if (I + VF > MaxInst)
- OpsWidth = MaxInst - I;
- else
- OpsWidth = VF;
+ if (i + VF > e)
+ OpsWidth = e - i;
+ else
+ OpsWidth = VF;
- if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
- break;
+ if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+ break;
- // Check that a previous iteration of this loop did not delete the Value.
- if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
- continue;
+ // Check that a previous iteration of this loop did not delete the Value.
+ if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
+ continue;
- DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
- << "\n");
- ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
-
- ArrayRef<Value *> BuildVectorSlice;
- if (!BuildVector.empty())
- BuildVectorSlice = BuildVector.slice(I, OpsWidth);
-
- R.buildTree(Ops, BuildVectorSlice);
- // TODO: check if we can allow reordering for more cases.
- if (AllowReorder && R.shouldReorder()) {
- // Conceptually, there is nothing actually preventing us from trying to
- // reorder a larger list. In fact, we do exactly this when vectorizing
- // reductions. However, at this point, we only expect to get here from
- // tryToVectorizePair().
- assert(Ops.size() == 2);
- assert(BuildVectorSlice.empty());
- Value *ReorderedOps[] = {Ops[1], Ops[0]};
- R.buildTree(ReorderedOps, None);
- }
- if (R.isTreeTinyAndNotFullyVectorizable())
- continue;
+ DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+ << "\n");
+ ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
+
+ ArrayRef<Value *> BuildVectorSlice;
+ if (!BuildVector.empty())
+ BuildVectorSlice = BuildVector.slice(i, OpsWidth);
+
+ R.buildTree(Ops, BuildVectorSlice);
+ // TODO: check if we can allow reordering for more cases.
+ if (AllowReorder && R.shouldReorder()) {
+ // Conceptually, there is nothing actually preventing us from trying to
+ // reorder a larger list. In fact, we do exactly this when vectorizing
+ // reductions. However, at this point, we only expect to get here from
+ // tryToVectorizePair().
+ assert(Ops.size() == 2);
+ assert(BuildVectorSlice.empty());
+ Value *ReorderedOps[] = { Ops[1], Ops[0] };
+ R.buildTree(ReorderedOps, None);
+ }
+ if (R.isTreeTinyAndNotFullyVectorizable())
+ continue;
- R.computeMinimumValueSizes();
- int Cost = R.getTreeCost();
-
- if (Cost < -SLPCostThreshold) {
- DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
- Value *VectorizedRoot = R.vectorizeTree();
-
- // Reconstruct the build vector by extracting the vectorized root. This
- // way we handle the case where some elements of the vector are
- // undefined.
- // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
- if (!BuildVectorSlice.empty()) {
- // The insert point is the last build vector instruction. The
- // vectorized root will precede it. This guarantees that we get an
- // instruction. The vectorized tree could have been constant folded.
- Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
- unsigned VecIdx = 0;
- for (auto &V : BuildVectorSlice) {
- IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
- ++BasicBlock::iterator(InsertAfter));
- Instruction *I = cast<Instruction>(V);
- assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
- Instruction *Extract =
- cast<Instruction>(Builder.CreateExtractElement(
- VectorizedRoot, Builder.getInt32(VecIdx++)));
- I->setOperand(1, Extract);
- I->removeFromParent();
- I->insertAfter(Extract);
- InsertAfter = I;
- }
+ R.computeMinimumValueSizes();
+ int Cost = R.getTreeCost();
+
+ if (Cost < -SLPCostThreshold) {
+ DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
+ Value *VectorizedRoot = R.vectorizeTree();
+
+ // Reconstruct the build vector by extracting the vectorized root. This
+ // way we handle the case where some elements of the vector are undefined.
+ // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
+ if (!BuildVectorSlice.empty()) {
+ // The insert point is the last build vector instruction. The vectorized
+ // root will precede it. This guarantees that we get an instruction. The
+ // vectorized tree could have been constant folded.
+ Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
+ unsigned VecIdx = 0;
+ for (auto &V : BuildVectorSlice) {
+ IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
+ ++BasicBlock::iterator(InsertAfter));
+ Instruction *I = cast<Instruction>(V);
+ assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
+ Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
+ VectorizedRoot, Builder.getInt32(VecIdx++)));
+ I->setOperand(1, Extract);
+ I->removeFromParent();
+ I->insertAfter(Extract);
+ InsertAfter = I;
}
- // Move to the next bundle.
- I += VF - 1;
- NextInst = I + 1;
- Changed = true;
}
+ // Move to the next bundle.
+ i += VF - 1;
+ Changed = true;
}
}
OpenPOWER on IntegriCloud