summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2018-04-03 14:40:33 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2018-04-03 14:40:33 +0000
commit2fc3b18922e2454a2ad2b68de348f8b846456652 (patch)
treebaf74cf3edef4e6d95eef58808fe69f5a0288a1d /llvm/lib/Transforms/Vectorize
parent9dc781634680b4ceec095810653cdd68967d15f0 (diff)
downloadbcm5719-llvm-2fc3b18922e2454a2ad2b68de348f8b846456652.tar.gz
bcm5719-llvm-2fc3b18922e2454a2ad2b68de348f8b846456652.zip
Revert "[SLP] Fix PR36481: vectorize reassociated instructions."
This reverts commit r328980 and r329046. Makes the vectorizer crash. llvm-svn: 329071
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp325
1 files changed, 95 insertions, 230 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f0ff12a668b..b9b507d34d1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -452,21 +452,16 @@ static bool allSameType(ArrayRef<Value *> VL) {
}
/// \returns True if Extract{Value,Element} instruction extracts element Idx.
-static Optional<unsigned> getExtractIndex(Instruction *E) {
- unsigned Opcode = E->getOpcode();
- assert((Opcode == Instruction::ExtractElement ||
- Opcode == Instruction::ExtractValue) &&
- "Expected extractelement or extractvalue instruction.");
+static bool matchExtractIndex(Instruction *E, unsigned Idx, unsigned Opcode) {
+ assert(Opcode == Instruction::ExtractElement ||
+ Opcode == Instruction::ExtractValue);
if (Opcode == Instruction::ExtractElement) {
- auto *CI = dyn_cast<ConstantInt>(E->getOperand(1));
- if (!CI)
- return None;
- return CI->getZExtValue();
+ ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));
+ return CI && CI->getZExtValue() == Idx;
+ } else {
+ ExtractValueInst *EI = cast<ExtractValueInst>(E);
+ return EI->getNumIndices() == 1 && *EI->idx_begin() == Idx;
}
- ExtractValueInst *EI = cast<ExtractValueInst>(E);
- if (EI->getNumIndices() != 1)
- return None;
- return *EI->idx_begin();
}
/// \returns True if in-tree use also needs extract. This refers to
@@ -591,7 +586,6 @@ public:
MustGather.clear();
ExternalUses.clear();
NumOpsWantToKeepOrder.clear();
- NumOpsWantToKeepOriginalOrder = 0;
for (auto &Iter : BlocksSchedules) {
BlockScheduling *BS = Iter.second.get();
BS->clear();
@@ -604,18 +598,14 @@ public:
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
- /// \returns The best order of instructions for vectorization.
- Optional<ArrayRef<unsigned>> bestOrder() const {
- auto I = std::max_element(
- NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(),
- [](const decltype(NumOpsWantToKeepOrder)::value_type &D1,
- const decltype(NumOpsWantToKeepOrder)::value_type &D2) {
- return D1.second < D2.second;
- });
- if (I == NumOpsWantToKeepOrder.end() || I->getSecond() <= NumOpsWantToKeepOriginalOrder)
- return None;
-
- return makeArrayRef(I->getFirst());
+ /// \returns true if it is beneficial to reverse the vector order.
+ bool shouldReorder() const {
+ return std::accumulate(
+ NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(), 0,
+ [](int Val1,
+ const decltype(NumOpsWantToKeepOrder)::value_type &Val2) {
+ return Val1 + (Val2.second < 0 ? 1 : -1);
+ }) > 0;
}
/// \return The vector element size in bits to use when vectorizing the
@@ -662,13 +652,9 @@ private:
/// This is the recursive part of buildTree.
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
- /// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
- /// be vectorized to use the original vector (or aggregate "bitcast" to a
- /// vector) and sets \p CurrentOrder to the identity permutation; otherwise
- /// returns false, setting \p CurrentOrder to either an empty vector or a
- /// non-identity permutation that allows to reuse extract instructions.
- bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
- SmallVectorImpl<unsigned> &CurrentOrder) const;
+ /// \returns True if the ExtractElement/ExtractValue instructions in VL can
+ /// be vectorized to use the original vector (or aggregate "bitcast" to a vector).
+ bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue) const;
/// Vectorize a single entry in the tree.
Value *vectorizeTree(TreeEntry *E);
@@ -732,9 +718,6 @@ private:
/// Does this sequence require some shuffling?
SmallVector<unsigned, 4> ReuseShuffleIndices;
- /// Does this entry require reordering?
- ArrayRef<unsigned> ReorderIndices;
-
/// Points back to the VectorizableTree.
///
/// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
@@ -750,8 +733,7 @@ private:
/// Create a new VectorizableTree entry.
void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int &UserTreeIdx,
- ArrayRef<unsigned> ReuseShuffleIndices = None,
- ArrayRef<unsigned> ReorderIndices = None) {
+ ArrayRef<unsigned> ReuseShuffleIndices = None) {
VectorizableTree.emplace_back(VectorizableTree);
int idx = VectorizableTree.size() - 1;
TreeEntry *Last = &VectorizableTree[idx];
@@ -759,7 +741,6 @@ private:
Last->NeedToGather = !Vectorized;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());
- Last->ReorderIndices = ReorderIndices;
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
@@ -1221,38 +1202,10 @@ private:
/// List of users to ignore during scheduling and that don't need extracting.
ArrayRef<Value *> UserIgnoreList;
- using OrdersType = SmallVector<unsigned, 4>;
- /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
- /// sorted SmallVectors of unsigned.
- struct OrdersTypeDenseMapInfo {
- static OrdersType getEmptyKey() {
- OrdersType V;
- V.push_back(~1U);
- return V;
- }
-
- static OrdersType getTombstoneKey() {
- OrdersType V;
- V.push_back(~2U);
- return V;
- }
-
- static unsigned getHashValue(const OrdersType &V) {
- return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
- }
-
- static bool isEqual(const OrdersType &LHS, const OrdersType &RHS) {
- return LHS == RHS;
- }
- };
-
- /// Contains orders of operations along with the number of bundles that have
- /// operations in this order. It stores only those orders that require
- /// reordering, if reordering is not required it is counted using \a
- /// NumOpsWantToKeepOriginalOrder.
- DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo> NumOpsWantToKeepOrder;
- /// Number of bundles that do not require reordering.
- unsigned NumOpsWantToKeepOriginalOrder = 0;
+ /// Number of operation bundles that contain consecutive operations - number
+ /// of operation bundles that contain consecutive operations in reversed
+ /// order.
+ DenseMap<unsigned, int> NumOpsWantToKeepOrder;
// Analysis and block reference.
Function *F;
@@ -1604,35 +1557,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
- OrdersType CurrentOrder;
- bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
+ bool Reuse = canReuseExtract(VL, VL0);
if (Reuse) {
DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
- ++NumOpsWantToKeepOriginalOrder;
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
- ReuseShuffleIndicies);
- return;
- }
- if (!CurrentOrder.empty()) {
- DEBUG({
- dbgs() << "SLP: Reusing or shuffling of reordered extract sequence "
- "with order";
- for (unsigned Idx : CurrentOrder)
- dbgs() << " " << Idx;
- dbgs() << "\n";
- });
- // Insert new order with initial value 0, if it does not exist,
- // otherwise return the iterator to the existing one.
- auto StoredCurrentOrderAndNum =
- NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
- ++StoredCurrentOrderAndNum->getSecond();
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
- StoredCurrentOrderAndNum->getFirst());
- return;
+ ++NumOpsWantToKeepOrder[S.Opcode];
+ } else {
+ SmallVector<Value *, 4> ReverseVL(VL.rbegin(), VL.rend());
+ if (canReuseExtract(ReverseVL, VL0))
+ --NumOpsWantToKeepOrder[S.Opcode];
+ BS.cancelScheduling(VL, VL0);
}
- DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
- newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies);
- BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, Reuse, UserTreeIdx, ReuseShuffleIndicies);
return;
}
case Instruction::Load: {
@@ -1654,55 +1589,51 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Make sure all loads in the bundle are simple - we can't vectorize
// atomic or volatile loads.
- SmallVector<Value *, 4> PointerOps(VL.size());
- auto POIter = PointerOps.begin();
- for (Value *V : VL) {
- auto *L = cast<LoadInst>(V);
+ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
+ LoadInst *L = cast<LoadInst>(VL[i]);
if (!L->isSimple()) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
- *POIter = L->getPointerOperand();
- ++POIter;
- }
-
- OrdersType CurrentOrder;
- // Check the order of pointer operands.
- if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
- Value *Ptr0;
- Value *PtrN;
- if (CurrentOrder.empty()) {
- Ptr0 = PointerOps.front();
- PtrN = PointerOps.back();
+ }
+
+ // Check if the loads are consecutive, reversed, or neither.
+ // TODO: What we really want is to sort the loads, but for now, check
+ // the two likely directions.
+ bool Consecutive = true;
+ bool ReverseConsecutive = true;
+ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
+ Consecutive = false;
+ break;
} else {
- Ptr0 = PointerOps[CurrentOrder.front()];
- PtrN = PointerOps[CurrentOrder.back()];
+ ReverseConsecutive = false;
}
- const SCEV *Scev0 = SE->getSCEV(Ptr0);
- const SCEV *ScevN = SE->getSCEV(PtrN);
- const auto *Diff =
- dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
- uint64_t Size = DL->getTypeAllocSize(ScalarTy);
- // Check that the sorted loads are consecutive.
- if (Diff && Diff->getAPInt().getZExtValue() == (VL.size() - 1) * Size) {
- if (CurrentOrder.empty()) {
- // Original loads are consecutive and does not require reordering.
- ++NumOpsWantToKeepOriginalOrder;
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
- ReuseShuffleIndicies);
- DEBUG(dbgs() << "SLP: added a vector of loads.\n");
- } else {
- // Need to reorder.
- auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
- ++I->getSecond();
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
- ReuseShuffleIndicies, I->getFirst());
- DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
+ }
+
+ if (Consecutive) {
+ ++NumOpsWantToKeepOrder[S.Opcode];
+ newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ DEBUG(dbgs() << "SLP: added a vector of loads.\n");
+ return;
+ }
+
+ // If none of the load pairs were consecutive when checked in order,
+ // check the reverse order.
+ if (ReverseConsecutive)
+ for (unsigned i = VL.size() - 1; i > 0; --i)
+ if (!isConsecutiveAccess(VL[i], VL[i - 1], *DL, *SE)) {
+ ReverseConsecutive = false;
+ break;
}
- return;
- }
+
+ if (ReverseConsecutive) {
+ --NumOpsWantToKeepOrder[S.Opcode];
+ newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ DEBUG(dbgs() << "SLP: added a vector of reversed loads.\n");
+ return;
}
DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
@@ -2013,8 +1944,7 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
return N;
}
-bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
- SmallVectorImpl<unsigned> &CurrentOrder) const {
+bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue) const {
Instruction *E0 = cast<Instruction>(OpValue);
assert(E0->getOpcode() == Instruction::ExtractElement ||
E0->getOpcode() == Instruction::ExtractValue);
@@ -2023,8 +1953,6 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
// correct offset.
Value *Vec = E0->getOperand(0);
- CurrentOrder.clear();
-
// We have to extract from a vector/aggregate with the same number of elements.
unsigned NElts;
if (E0->getOpcode() == Instruction::ExtractValue) {
@@ -2044,40 +1972,15 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
return false;
// Check that all of the indices extract from the correct offset.
- bool ShouldKeepOrder = true;
- unsigned E = VL.size();
- // Assign to all items the initial value E + 1 so we can check if the extract
- // instruction index was used already.
- // Also, later we can check that all the indices are used and we have a
- // consecutive access in the extract instructions, by checking that no
- // element of CurrentOrder still has value E + 1.
- CurrentOrder.assign(E, E + 1);
- unsigned I = 0;
- for (; I < E; ++I) {
- auto *Inst = cast<Instruction>(VL[I]);
+ for (unsigned I = 0, E = VL.size(); I < E; ++I) {
+ Instruction *Inst = cast<Instruction>(VL[I]);
+ if (!matchExtractIndex(Inst, I, Inst->getOpcode()))
+ return false;
if (Inst->getOperand(0) != Vec)
- break;
- Optional<unsigned> Idx = getExtractIndex(Inst);
- if (!Idx)
- break;
- const unsigned ExtIdx = *Idx;
- if (ExtIdx != I) {
- if (ExtIdx >= E || CurrentOrder[ExtIdx] != E + 1)
- break;
- ShouldKeepOrder = false;
- CurrentOrder[ExtIdx] = I;
- } else {
- if (CurrentOrder[I] != E + 1)
- break;
- CurrentOrder[I] = I;
- }
- }
- if (I < E) {
- CurrentOrder.clear();
- return false;
+ return false;
}
- return ShouldKeepOrder;
+ return true;
}
bool BoUpSLP::areAllUsersVectorized(Instruction *I) const {
@@ -2179,13 +2082,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx);
}
}
- if (!E->NeedToGather) {
+ if (canReuseExtract(VL, S.OpValue)) {
int DeadCost = ReuseShuffleCost;
- if (!E->ReorderIndices.empty()) {
- // TODO: Merge this shuffle with the ReuseShuffleCost.
- DeadCost += TTI->getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
- }
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
Instruction *E = cast<Instruction>(VL[i]);
// If all users are going to be vectorized, instruction can be
@@ -2348,8 +2246,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
VecTy, alignment, 0, VL0);
- if (!E->ReorderIndices.empty()) {
- // TODO: Merge this shuffle with the ReuseShuffleCost.
+ if (!isConsecutiveAccess(VL[0], VL[1], *DL, *SE)) {
VecLdCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}
@@ -2519,13 +2416,13 @@ int BoUpSLP::getSpillCost() {
LiveValues.insert(cast<Instruction>(&*J));
}
- DEBUG({
+ DEBUG(
dbgs() << "SLP: #LV: " << LiveValues.size();
for (auto *X : LiveValues)
dbgs() << " " << X->getName();
dbgs() << ", Looking at ";
Inst->dump();
- });
+ );
// Now find the sequence of instructions between PrevInst and Inst.
BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
@@ -3047,15 +2944,6 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
return V;
}
-static void inversePermutation(ArrayRef<unsigned> Indices,
- SmallVectorImpl<unsigned> &Mask) {
- Mask.clear();
- const unsigned E = Indices.size();
- Mask.resize(E);
- for (unsigned I = 0; I < E; ++I)
- Mask[Indices[I]] = I;
-}
-
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
IRBuilder<>::InsertPointGuard Guard(Builder);
@@ -3132,19 +3020,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::ExtractElement: {
- if (!E->NeedToGather) {
+ if (canReuseExtract(E->Scalars, VL0)) {
Value *V = VL0->getOperand(0);
- if (!E->ReorderIndices.empty()) {
- OrdersType Mask;
- inversePermutation(E->ReorderIndices, Mask);
- Builder.SetInsertPoint(VL0);
- V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), Mask,
- "reorder_shuffle");
- }
if (NeedToShuffleReuses) {
- // TODO: Merge this shuffle with the ReorderShuffleMask.
- if (!E->ReorderIndices.empty())
- Builder.SetInsertPoint(VL0);
+ Builder.SetInsertPoint(VL0);
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
@@ -3165,21 +3044,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::ExtractValue: {
- if (!E->NeedToGather) {
+ if (canReuseExtract(E->Scalars, VL0)) {
LoadInst *LI = cast<LoadInst>(VL0->getOperand(0));
Builder.SetInsertPoint(LI);
PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
LoadInst *V = Builder.CreateAlignedLoad(Ptr, LI->getAlignment());
Value *NewV = propagateMetadata(V, E->Scalars);
- if (!E->ReorderIndices.empty()) {
- OrdersType Mask;
- inversePermutation(E->ReorderIndices, Mask);
- NewV = Builder.CreateShuffleVector(NewV, UndefValue::get(VecTy), Mask,
- "reorder_shuffle");
- }
if (NeedToShuffleReuses) {
- // TODO: Merge this shuffle with the ReorderShuffleMask.
NewV = Builder.CreateShuffleVector(
NewV, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle");
}
@@ -3353,9 +3225,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
- bool IsReorder = !E->ReorderIndices.empty();
- if (IsReorder)
- VL0 = cast<Instruction>(E->Scalars[E->ReorderIndices.front()]);
+ bool IsReversed =
+ !isConsecutiveAccess(E->Scalars[0], E->Scalars[1], *DL, *SE);
+ if (IsReversed)
+ VL0 = cast<Instruction>(E->Scalars.back());
setInsertPointAfterBundle(E->Scalars, VL0);
LoadInst *LI = cast<LoadInst>(VL0);
@@ -3379,14 +3252,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
LI->setAlignment(Alignment);
Value *V = propagateMetadata(LI, E->Scalars);
- if (IsReorder) {
- OrdersType Mask;
- inversePermutation(E->ReorderIndices, Mask);
- V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
- Mask, "reorder_shuffle");
+ if (IsReversed) {
+ SmallVector<uint32_t, 4> Mask(E->Scalars.size());
+ std::iota(Mask.rbegin(), Mask.rend(), 0);
+ V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), Mask);
}
if (NeedToShuffleReuses) {
- // TODO: Merge this shuffle with the ReorderShuffleMask.
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
@@ -4954,10 +4825,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
R.buildTree(Ops);
- Optional<ArrayRef<unsigned>> Order = R.bestOrder();
// TODO: check if we can allow reordering for more cases.
- if (AllowReorder && Order) {
- // TODO: reorder tree nodes without tree rebuilding.
+ if (AllowReorder && R.shouldReorder()) {
// Conceptually, there is nothing actually preventing us from trying to
// reorder a larger list. In fact, we do exactly this when vectorizing
// reductions. However, at this point, we only expect to get here when
@@ -5703,13 +5572,9 @@ public:
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
V.buildTree(VL, ExternallyUsedValues, IgnoreList);
- Optional<ArrayRef<unsigned>> Order = V.bestOrder();
- if (Order) {
- // TODO: reorder tree nodes without tree rebuilding.
- SmallVector<Value *, 4> ReorderedOps(VL.size());
- llvm::transform(*Order, ReorderedOps.begin(),
- [VL](const unsigned Idx) { return VL[Idx]; });
- V.buildTree(ReorderedOps, ExternallyUsedValues, IgnoreList);
+ if (V.shouldReorder()) {
+ SmallVector<Value *, 8> Reversed(VL.rbegin(), VL.rend());
+ V.buildTree(Reversed, ExternallyUsedValues, IgnoreList);
}
if (V.isTreeTinyAndNotFullyVectorizable())
break;
@@ -6214,7 +6079,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Try to vectorize them.
unsigned NumElts = (SameTypeIt - IncIt);
- DEBUG(dbgs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
+ DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
// The order in which the phi nodes appear in the program does not matter.
// So allow tryToVectorizeList to reorder them if it is beneficial. This
// is done when there are exactly two elements since tryToVectorizeList
OpenPOWER on IntegriCloud