summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorRenato Golin <renato.golin@linaro.org>2016-12-02 16:56:26 +0000
committerRenato Golin <renato.golin@linaro.org>2016-12-02 16:56:26 +0000
commit5b8e7ecdb37814c4657c4dd25bc7b5c59db55c84 (patch)
treed3ff1b85d9ac92c4c4219f15f4b376e62ca0fddb /llvm/lib/Transforms/Vectorize
parent8c1c8e5d7c3c86f556bf02c369ebc7cfdf0c3d2f (diff)
downloadbcm5719-llvm-5b8e7ecdb37814c4657c4dd25bc7b5c59db55c84.tar.gz
bcm5719-llvm-5b8e7ecdb37814c4657c4dd25bc7b5c59db55c84.zip
Revert "[SLP] Fix for PR6246: vectorization for scalar ops on vector elements."
This reverts commit r288497, as it broke the AArch64 build of Compiler-RT's builtins (twice: once in r288412 and once in r288497). We should investigate this offline. llvm-svn: 288508
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp140
1 files changed, 66 insertions, 74 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c3409897862..d1b569d4cd3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3870,11 +3870,10 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned Opcode0 = I0->getOpcode();
+ // FIXME: Register size should be a parameter to this function, so we can
+ // try different vectorization factors.
unsigned Sz = R.getVectorElementSize(I0);
- unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
- unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
- if (MaxVF < 2)
- return false;
+ unsigned VF = R.getMinVecRegSize() / Sz;
for (Value *V : VL) {
Type *Ty = V->getType();
@@ -3890,83 +3889,76 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
- unsigned NextInst = 0, MaxInst = VL.size();
- for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
- VF /= 2) {
- for (unsigned I = NextInst; I < MaxInst; ++I) {
- unsigned OpsWidth = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ unsigned OpsWidth = 0;
- if (I + VF > MaxInst)
- OpsWidth = MaxInst - I;
- else
- OpsWidth = VF;
+ if (i + VF > e)
+ OpsWidth = e - i;
+ else
+ OpsWidth = VF;
- if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
- break;
+ if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+ break;
- // Check that a previous iteration of this loop did not delete the Value.
- if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
- continue;
+ // Check that a previous iteration of this loop did not delete the Value.
+ if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
+ continue;
- DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
- << "\n");
- ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
-
- ArrayRef<Value *> BuildVectorSlice;
- if (!BuildVector.empty())
- BuildVectorSlice = BuildVector.slice(I, OpsWidth);
-
- R.buildTree(Ops, BuildVectorSlice);
- // TODO: check if we can allow reordering for more cases.
- if (AllowReorder && R.shouldReorder()) {
- // Conceptually, there is nothing actually preventing us from trying to
- // reorder a larger list. In fact, we do exactly this when vectorizing
- // reductions. However, at this point, we only expect to get here from
- // tryToVectorizePair().
- assert(Ops.size() == 2);
- assert(BuildVectorSlice.empty());
- Value *ReorderedOps[] = {Ops[1], Ops[0]};
- R.buildTree(ReorderedOps, None);
- }
- if (R.isTreeTinyAndNotFullyVectorizable())
- continue;
+ DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+ << "\n");
+ ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
+
+ ArrayRef<Value *> BuildVectorSlice;
+ if (!BuildVector.empty())
+ BuildVectorSlice = BuildVector.slice(i, OpsWidth);
+
+ R.buildTree(Ops, BuildVectorSlice);
+ // TODO: check if we can allow reordering for more cases.
+ if (AllowReorder && R.shouldReorder()) {
+ // Conceptually, there is nothing actually preventing us from trying to
+ // reorder a larger list. In fact, we do exactly this when vectorizing
+ // reductions. However, at this point, we only expect to get here from
+ // tryToVectorizePair().
+ assert(Ops.size() == 2);
+ assert(BuildVectorSlice.empty());
+ Value *ReorderedOps[] = { Ops[1], Ops[0] };
+ R.buildTree(ReorderedOps, None);
+ }
+ if (R.isTreeTinyAndNotFullyVectorizable())
+ continue;
- R.computeMinimumValueSizes();
- int Cost = R.getTreeCost();
-
- if (Cost < -SLPCostThreshold) {
- DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
- Value *VectorizedRoot = R.vectorizeTree();
-
- // Reconstruct the build vector by extracting the vectorized root. This
- // way we handle the case where some elements of the vector are
- // undefined.
- // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
- if (!BuildVectorSlice.empty()) {
- // The insert point is the last build vector instruction. The
- // vectorized root will precede it. This guarantees that we get an
- // instruction. The vectorized tree could have been constant folded.
- Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
- unsigned VecIdx = 0;
- for (auto &V : BuildVectorSlice) {
- IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
- ++BasicBlock::iterator(InsertAfter));
- Instruction *I = cast<Instruction>(V);
- assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
- Instruction *Extract =
- cast<Instruction>(Builder.CreateExtractElement(
- VectorizedRoot, Builder.getInt32(VecIdx++)));
- I->setOperand(1, Extract);
- I->removeFromParent();
- I->insertAfter(Extract);
- InsertAfter = I;
- }
+ R.computeMinimumValueSizes();
+ int Cost = R.getTreeCost();
+
+ if (Cost < -SLPCostThreshold) {
+ DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
+ Value *VectorizedRoot = R.vectorizeTree();
+
+ // Reconstruct the build vector by extracting the vectorized root. This
+ // way we handle the case where some elements of the vector are undefined.
+ // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
+ if (!BuildVectorSlice.empty()) {
+ // The insert point is the last build vector instruction. The vectorized
+ // root will precede it. This guarantees that we get an instruction. The
+ // vectorized tree could have been constant folded.
+ Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
+ unsigned VecIdx = 0;
+ for (auto &V : BuildVectorSlice) {
+ IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
+ ++BasicBlock::iterator(InsertAfter));
+ Instruction *I = cast<Instruction>(V);
+ assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
+ Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
+ VectorizedRoot, Builder.getInt32(VecIdx++)));
+ I->setOperand(1, Extract);
+ I->removeFromParent();
+ I->insertAfter(Extract);
+ InsertAfter = I;
}
- // Move to the next bundle.
- I += VF - 1;
- NextInst = I + 1;
- Changed = true;
}
+ // Move to the next bundle.
+ i += VF - 1;
+ Changed = true;
}
}
OpenPOWER on IntegriCloud