Revert "[SLP] Fix for PR6246: vectorization for scalar ops on vector elements."

This reverts r288412 which causes severe compile-time regression. llvm-svn: 288431
author: Artem Belevich <tra@google.com> 2016-12-01 22:52:15 +0000
committer: Artem Belevich <tra@google.com> 2016-12-01 22:52:15 +0000
commit: 704395a25a409e21d517572080602d21c3b6fed8 (patch)
tree: 2a6a713c5d1f8b9c7ea63f145f8d4db1c253d608 /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
parent: 709a4cc2381d9c530331ba8ff9244628ae252727 (diff)
download: bcm5719-llvm-704395a25a409e21d517572080602d21c3b6fed8.tar.gz
bcm5719-llvm-704395a25a409e21d517572080602d21c3b6fed8.zip
1 files changed, 66 insertions, 72 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fa67aa25cdc..d1b569d4cd3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3870,9 +3870,10 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
 
   unsigned Opcode0 = I0->getOpcode();
 
+  // FIXME: Register size should be a parameter to this function, so we can
+  // try different vectorization factors.
   unsigned Sz = R.getVectorElementSize(I0);
-  unsigned MinVF = R.getMinVecRegSize() / Sz;
-  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+  unsigned VF = R.getMinVecRegSize() / Sz;
 
   for (Value *V : VL) {
     Type *Ty = V->getType();
@@ -3888,83 +3889,76 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
   // Keep track of values that were deleted by vectorizing in the loop below.
   SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
 
-  unsigned NextInst = 0, MaxInst = VL.size();
-  for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
-       VF /= 2) {
-    for (unsigned I = NextInst; I < MaxInst; ++I) {
-      unsigned OpsWidth = 0;
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    unsigned OpsWidth = 0;
 
-      if (I + VF > MaxInst)
-        OpsWidth = MaxInst - I;
-      else
-        OpsWidth = VF;
+    if (i + VF > e)
+      OpsWidth = e - i;
+    else
+      OpsWidth = VF;
 
-      if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
-        break;
+    if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+      break;
 
-      // Check that a previous iteration of this loop did not delete the Value.
-      if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
-        continue;
+    // Check that a previous iteration of this loop did not delete the Value.
+    if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
+      continue;
 
-      DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
-                   << "\n");
-      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
-
-      ArrayRef<Value *> BuildVectorSlice;
-      if (!BuildVector.empty())
-        BuildVectorSlice = BuildVector.slice(I, OpsWidth);
-
-      R.buildTree(Ops, BuildVectorSlice);
-      // TODO: check if we can allow reordering for more cases.
-      if (AllowReorder && R.shouldReorder()) {
-        // Conceptually, there is nothing actually preventing us from trying to
-        // reorder a larger list. In fact, we do exactly this when vectorizing
-        // reductions. However, at this point, we only expect to get here from
-        // tryToVectorizePair().
-        assert(Ops.size() == 2);
-        assert(BuildVectorSlice.empty());
-        Value *ReorderedOps[] = {Ops[1], Ops[0]};
-        R.buildTree(ReorderedOps, None);
-      }
-      if (R.isTreeTinyAndNotFullyVectorizable())
-        continue;
+    DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+                 << "\n");
+    ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
+
+    ArrayRef<Value *> BuildVectorSlice;
+    if (!BuildVector.empty())
+      BuildVectorSlice = BuildVector.slice(i, OpsWidth);
+
+    R.buildTree(Ops, BuildVectorSlice);
+    // TODO: check if we can allow reordering for more cases.
+    if (AllowReorder && R.shouldReorder()) {
+      // Conceptually, there is nothing actually preventing us from trying to
+      // reorder a larger list. In fact, we do exactly this when vectorizing
+      // reductions. However, at this point, we only expect to get here from
+      // tryToVectorizePair().
+      assert(Ops.size() == 2);
+      assert(BuildVectorSlice.empty());
+      Value *ReorderedOps[] = { Ops[1], Ops[0] };
+      R.buildTree(ReorderedOps, None);
+    }
+    if (R.isTreeTinyAndNotFullyVectorizable())
+      continue;
 
-      R.computeMinimumValueSizes();
-      int Cost = R.getTreeCost();
-
-      if (Cost < -SLPCostThreshold) {
-        DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
-        Value *VectorizedRoot = R.vectorizeTree();
-
-        // Reconstruct the build vector by extracting the vectorized root. This
-        // way we handle the case where some elements of the vector are
-        // undefined.
-        //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
-        if (!BuildVectorSlice.empty()) {
-          // The insert point is the last build vector instruction. The
-          // vectorized root will precede it. This guarantees that we get an
-          // instruction. The vectorized tree could have been constant folded.
-          Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
-          unsigned VecIdx = 0;
-          for (auto &V : BuildVectorSlice) {
-            IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
-                                        ++BasicBlock::iterator(InsertAfter));
-            Instruction *I = cast<Instruction>(V);
-            assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
-            Instruction *Extract =
-                cast<Instruction>(Builder.CreateExtractElement(
-                    VectorizedRoot, Builder.getInt32(VecIdx++)));
-            I->setOperand(1, Extract);
-            I->removeFromParent();
-            I->insertAfter(Extract);
-            InsertAfter = I;
-          }
+    R.computeMinimumValueSizes();
+    int Cost = R.getTreeCost();
+
+    if (Cost < -SLPCostThreshold) {
+      DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
+      Value *VectorizedRoot = R.vectorizeTree();
+
+      // Reconstruct the build vector by extracting the vectorized root. This
+      // way we handle the case where some elements of the vector are undefined.
+      //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
+      if (!BuildVectorSlice.empty()) {
+        // The insert point is the last build vector instruction. The vectorized
+        // root will precede it. This guarantees that we get an instruction. The
+        // vectorized tree could have been constant folded.
+        Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
+        unsigned VecIdx = 0;
+        for (auto &V : BuildVectorSlice) {
+          IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
+                                      ++BasicBlock::iterator(InsertAfter));
+          Instruction *I = cast<Instruction>(V);
+          assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
+          Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
+              VectorizedRoot, Builder.getInt32(VecIdx++)));
+          I->setOperand(1, Extract);
+          I->removeFromParent();
+          I->insertAfter(Extract);
+          InsertAfter = I;
         }
-        // Move to the next bundle.
-        I += VF - 1;
-        NextInst = I + 1;
-        Changed = true;
       }
+      // Move to the next bundle.
+      i += VF - 1;
+      Changed = true;
     }
   }
author	Artem Belevich <tra@google.com>	2016-12-01 22:52:15 +0000
committer	Artem Belevich <tra@google.com>	2016-12-01 22:52:15 +0000
commit	704395a25a409e21d517572080602d21c3b6fed8 (patch)
tree	2a6a713c5d1f8b9c7ea63f145f8d4db1c253d608 /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
parent	709a4cc2381d9c530331ba8ff9244628ae252727 (diff)
download	bcm5719-llvm-704395a25a409e21d517572080602d21c3b6fed8.tar.gz bcm5719-llvm-704395a25a409e21d517572080602d21c3b6fed8.zip