summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-04-03 17:14:47 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-04-03 17:14:47 +0000
commit428e9d9d878441c010daf6b62399d1df69bc9433 (patch)
tree94167e908a09a4c2f901b0fe07f2c556a7857f00 /llvm/lib/Transforms
parentbe1e2621905b3d61032065caeb2d6ae7e1e3fb54 (diff)
downloadbcm5719-llvm-428e9d9d878441c010daf6b62399d1df69bc9433.tar.gz
bcm5719-llvm-428e9d9d878441c010daf6b62399d1df69bc9433.zip
[SLP] Fix PR36481: vectorize reassociated instructions.
Summary: If the load/extractelement/extractvalue instructions are not originally consecutive, the SLP vectorizer is unable to vectorize them. Patch allows reordering of such instructions. Patch does not support reordering of the repeated instruction, this must be handled in the separate patch. Reviewers: RKSimon, spatel, hfinkel, mkuper, Ayal, ashahid Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D43776 llvm-svn: 329085
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp320
1 files changed, 228 insertions, 92 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f6c5827ab93..87b18b703b4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -452,16 +452,21 @@ static bool allSameType(ArrayRef<Value *> VL) {
}
/// \returns True if Extract{Value,Element} instruction extracts element Idx.
-static bool matchExtractIndex(Instruction *E, unsigned Idx, unsigned Opcode) {
- assert(Opcode == Instruction::ExtractElement ||
- Opcode == Instruction::ExtractValue);
+static Optional<unsigned> getExtractIndex(Instruction *E) {
+ unsigned Opcode = E->getOpcode();
+ assert((Opcode == Instruction::ExtractElement ||
+ Opcode == Instruction::ExtractValue) &&
+ "Expected extractelement or extractvalue instruction.");
if (Opcode == Instruction::ExtractElement) {
- ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));
- return CI && CI->getZExtValue() == Idx;
- } else {
- ExtractValueInst *EI = cast<ExtractValueInst>(E);
- return EI->getNumIndices() == 1 && *EI->idx_begin() == Idx;
+ auto *CI = dyn_cast<ConstantInt>(E->getOperand(1));
+ if (!CI)
+ return None;
+ return CI->getZExtValue();
}
+ ExtractValueInst *EI = cast<ExtractValueInst>(E);
+ if (EI->getNumIndices() != 1)
+ return None;
+ return *EI->idx_begin();
}
/// \returns True if in-tree use also needs extract. This refers to
@@ -586,6 +591,7 @@ public:
MustGather.clear();
ExternalUses.clear();
NumOpsWantToKeepOrder.clear();
+ NumOpsWantToKeepOriginalOrder = 0;
for (auto &Iter : BlocksSchedules) {
BlockScheduling *BS = Iter.second.get();
BS->clear();
@@ -598,14 +604,18 @@ public:
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
- /// \returns true if it is beneficial to reverse the vector order.
- bool shouldReorder() const {
- return std::accumulate(
- NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(), 0,
- [](int Val1,
- const decltype(NumOpsWantToKeepOrder)::value_type &Val2) {
- return Val1 + (Val2.second < 0 ? 1 : -1);
- }) > 0;
+ /// \returns The best order of instructions for vectorization.
+ Optional<ArrayRef<unsigned>> bestOrder() const {
+ auto I = std::max_element(
+ NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(),
+ [](const decltype(NumOpsWantToKeepOrder)::value_type &D1,
+ const decltype(NumOpsWantToKeepOrder)::value_type &D2) {
+ return D1.second < D2.second;
+ });
+ if (I == NumOpsWantToKeepOrder.end() || I->getSecond() <= NumOpsWantToKeepOriginalOrder)
+ return None;
+
+ return makeArrayRef(I->getFirst());
}
/// \return The vector element size in bits to use when vectorizing the
@@ -652,9 +662,13 @@ private:
/// This is the recursive part of buildTree.
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
- /// \returns True if the ExtractElement/ExtractValue instructions in VL can
- /// be vectorized to use the original vector (or aggregate "bitcast" to a vector).
- bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue) const;
+ /// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
+ /// be vectorized to use the original vector (or aggregate "bitcast" to a
+ /// vector) and sets \p CurrentOrder to the identity permutation; otherwise
+ /// returns false, setting \p CurrentOrder to either an empty vector or a
+ /// non-identity permutation that allows to reuse extract instructions.
+ bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
+ SmallVectorImpl<unsigned> &CurrentOrder) const;
/// Vectorize a single entry in the tree.
Value *vectorizeTree(TreeEntry *E);
@@ -718,6 +732,9 @@ private:
/// Does this sequence require some shuffling?
SmallVector<unsigned, 4> ReuseShuffleIndices;
+ /// Does this entry require reordering?
+ ArrayRef<unsigned> ReorderIndices;
+
/// Points back to the VectorizableTree.
///
/// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
@@ -733,7 +750,8 @@ private:
/// Create a new VectorizableTree entry.
void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int &UserTreeIdx,
- ArrayRef<unsigned> ReuseShuffleIndices = None) {
+ ArrayRef<unsigned> ReuseShuffleIndices = None,
+ ArrayRef<unsigned> ReorderIndices = None) {
VectorizableTree.emplace_back(VectorizableTree);
int idx = VectorizableTree.size() - 1;
TreeEntry *Last = &VectorizableTree[idx];
@@ -741,6 +759,7 @@ private:
Last->NeedToGather = !Vectorized;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());
+ Last->ReorderIndices = ReorderIndices;
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
@@ -1202,10 +1221,38 @@ private:
/// List of users to ignore during scheduling and that don't need extracting.
ArrayRef<Value *> UserIgnoreList;
- /// Number of operation bundles that contain consecutive operations - number
- /// of operation bundles that contain consecutive operations in reversed
- /// order.
- DenseMap<unsigned, int> NumOpsWantToKeepOrder;
+ using OrdersType = SmallVector<unsigned, 4>;
+ /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
+ /// sorted SmallVectors of unsigned.
+ struct OrdersTypeDenseMapInfo {
+ static OrdersType getEmptyKey() {
+ OrdersType V;
+ V.push_back(~1U);
+ return V;
+ }
+
+ static OrdersType getTombstoneKey() {
+ OrdersType V;
+ V.push_back(~2U);
+ return V;
+ }
+
+ static unsigned getHashValue(const OrdersType &V) {
+ return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+ }
+
+ static bool isEqual(const OrdersType &LHS, const OrdersType &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ /// Contains orders of operations along with the number of bundles that have
+ /// operations in this order. It stores only those orders that require
+ /// reordering, if reordering is not required it is counted using \a
+ /// NumOpsWantToKeepOriginalOrder.
+ DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo> NumOpsWantToKeepOrder;
+ /// Number of bundles that do not require reordering.
+ unsigned NumOpsWantToKeepOriginalOrder = 0;
// Analysis and block reference.
Function *F;
@@ -1557,17 +1604,35 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
- bool Reuse = canReuseExtract(VL, VL0);
+ OrdersType CurrentOrder;
+ bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
if (Reuse) {
DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
- ++NumOpsWantToKeepOrder[S.Opcode];
- } else {
- SmallVector<Value *, 4> ReverseVL(VL.rbegin(), VL.rend());
- if (canReuseExtract(ReverseVL, VL0))
- --NumOpsWantToKeepOrder[S.Opcode];
- BS.cancelScheduling(VL, VL0);
+ ++NumOpsWantToKeepOriginalOrder;
+ newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
+ ReuseShuffleIndicies);
+ return;
+ }
+ if (!CurrentOrder.empty()) {
+ DEBUG({
+ dbgs() << "SLP: Reusing or shuffling of reordered extract sequence "
+ "with order";
+ for (unsigned Idx : CurrentOrder)
+ dbgs() << " " << Idx;
+ dbgs() << "\n";
+ });
+ // Insert new order with initial value 0, if it does not exist,
+ // otherwise return the iterator to the existing one.
+ auto StoredCurrentOrderAndNum =
+ NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
+ ++StoredCurrentOrderAndNum->getSecond();
+ newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
+ StoredCurrentOrderAndNum->getFirst());
+ return;
}
- newTreeEntry(VL, Reuse, UserTreeIdx, ReuseShuffleIndicies);
+ DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
+ newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies);
+ BS.cancelScheduling(VL, VL0);
return;
}
case Instruction::Load: {
@@ -1589,51 +1654,55 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Make sure all loads in the bundle are simple - we can't vectorize
// atomic or volatile loads.
- for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
- LoadInst *L = cast<LoadInst>(VL[i]);
+ SmallVector<Value *, 4> PointerOps(VL.size());
+ auto POIter = PointerOps.begin();
+ for (Value *V : VL) {
+ auto *L = cast<LoadInst>(V);
if (!L->isSimple()) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
- }
-
- // Check if the loads are consecutive, reversed, or neither.
- // TODO: What we really want is to sort the loads, but for now, check
- // the two likely directions.
- bool Consecutive = true;
- bool ReverseConsecutive = true;
- for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
- if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
- Consecutive = false;
- break;
+ *POIter = L->getPointerOperand();
+ ++POIter;
+ }
+
+ OrdersType CurrentOrder;
+ // Check the order of pointer operands.
+ if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
+ Value *Ptr0;
+ Value *PtrN;
+ if (CurrentOrder.empty()) {
+ Ptr0 = PointerOps.front();
+ PtrN = PointerOps.back();
} else {
- ReverseConsecutive = false;
+ Ptr0 = PointerOps[CurrentOrder.front()];
+ PtrN = PointerOps[CurrentOrder.back()];
}
- }
-
- if (Consecutive) {
- ++NumOpsWantToKeepOrder[S.Opcode];
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
- DEBUG(dbgs() << "SLP: added a vector of loads.\n");
- return;
- }
-
- // If none of the load pairs were consecutive when checked in order,
- // check the reverse order.
- if (ReverseConsecutive)
- for (unsigned i = VL.size() - 1; i > 0; --i)
- if (!isConsecutiveAccess(VL[i], VL[i - 1], *DL, *SE)) {
- ReverseConsecutive = false;
- break;
+ const SCEV *Scev0 = SE->getSCEV(Ptr0);
+ const SCEV *ScevN = SE->getSCEV(PtrN);
+ const auto *Diff =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
+ uint64_t Size = DL->getTypeAllocSize(ScalarTy);
+ // Check that the sorted loads are consecutive.
+ if (Diff && Diff->getAPInt().getZExtValue() == (VL.size() - 1) * Size) {
+ if (CurrentOrder.empty()) {
+ // Original loads are consecutive and does not require reordering.
+ ++NumOpsWantToKeepOriginalOrder;
+ newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
+ ReuseShuffleIndicies);
+ DEBUG(dbgs() << "SLP: added a vector of loads.\n");
+ } else {
+ // Need to reorder.
+ auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
+ ++I->getSecond();
+ newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
+ ReuseShuffleIndicies, I->getFirst());
+ DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
}
-
- if (ReverseConsecutive) {
- --NumOpsWantToKeepOrder[S.Opcode];
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
- DEBUG(dbgs() << "SLP: added a vector of reversed loads.\n");
- return;
+ return;
+ }
}
DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
@@ -1944,7 +2013,8 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
return N;
}
-bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue) const {
+bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
+ SmallVectorImpl<unsigned> &CurrentOrder) const {
Instruction *E0 = cast<Instruction>(OpValue);
assert(E0->getOpcode() == Instruction::ExtractElement ||
E0->getOpcode() == Instruction::ExtractValue);
@@ -1953,6 +2023,8 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue) const {
// correct offset.
Value *Vec = E0->getOperand(0);
+ CurrentOrder.clear();
+
// We have to extract from a vector/aggregate with the same number of elements.
unsigned NElts;
if (E0->getOpcode() == Instruction::ExtractValue) {
@@ -1972,15 +2044,40 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue) const {
return false;
// Check that all of the indices extract from the correct offset.
- for (unsigned I = 0, E = VL.size(); I < E; ++I) {
- Instruction *Inst = cast<Instruction>(VL[I]);
- if (!matchExtractIndex(Inst, I, Inst->getOpcode()))
- return false;
+ bool ShouldKeepOrder = true;
+ unsigned E = VL.size();
+ // Assign to all items the initial value E + 1 so we can check if the extract
+ // instruction index was used already.
+ // Also, later we can check that all the indices are used and we have a
+ // consecutive access in the extract instructions, by checking that no
+ // element of CurrentOrder still has value E + 1.
+ CurrentOrder.assign(E, E + 1);
+ unsigned I = 0;
+ for (; I < E; ++I) {
+ auto *Inst = cast<Instruction>(VL[I]);
if (Inst->getOperand(0) != Vec)
- return false;
+ break;
+ Optional<unsigned> Idx = getExtractIndex(Inst);
+ if (!Idx)
+ break;
+ const unsigned ExtIdx = *Idx;
+ if (ExtIdx != I) {
+ if (ExtIdx >= E || CurrentOrder[ExtIdx] != E + 1)
+ break;
+ ShouldKeepOrder = false;
+ CurrentOrder[ExtIdx] = I;
+ } else {
+ if (CurrentOrder[I] != E + 1)
+ break;
+ CurrentOrder[I] = I;
+ }
+ }
+ if (I < E) {
+ CurrentOrder.clear();
+ return false;
}
- return true;
+ return ShouldKeepOrder;
}
bool BoUpSLP::areAllUsersVectorized(Instruction *I) const {
@@ -2082,8 +2179,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx);
}
}
- if (canReuseExtract(VL, S.OpValue)) {
+ if (!E->NeedToGather) {
int DeadCost = ReuseShuffleCost;
+ if (!E->ReorderIndices.empty()) {
+ // TODO: Merge this shuffle with the ReuseShuffleCost.
+ DeadCost += TTI->getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ }
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
Instruction *E = cast<Instruction>(VL[i]);
// If all users are going to be vectorized, instruction can be
@@ -2246,7 +2348,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
VecTy, alignment, 0, VL0);
- if (!isConsecutiveAccess(VL[0], VL[1], *DL, *SE)) {
+ if (!E->ReorderIndices.empty()) {
+ // TODO: Merge this shuffle with the ReuseShuffleCost.
VecLdCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}
@@ -2944,6 +3047,15 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
return V;
}
+static void inversePermutation(ArrayRef<unsigned> Indices,
+ SmallVectorImpl<unsigned> &Mask) {
+ Mask.clear();
+ const unsigned E = Indices.size();
+ Mask.resize(E);
+ for (unsigned I = 0; I < E; ++I)
+ Mask[Indices[I]] = I;
+}
+
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
IRBuilder<>::InsertPointGuard Guard(Builder);
@@ -3020,10 +3132,19 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::ExtractElement: {
- if (canReuseExtract(E->Scalars, VL0)) {
+ if (!E->NeedToGather) {
Value *V = VL0->getOperand(0);
- if (NeedToShuffleReuses) {
+ if (!E->ReorderIndices.empty()) {
+ OrdersType Mask;
+ inversePermutation(E->ReorderIndices, Mask);
Builder.SetInsertPoint(VL0);
+ V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), Mask,
+ "reorder_shuffle");
+ }
+ if (NeedToShuffleReuses) {
+ // TODO: Merge this shuffle with the ReorderShuffleMask.
+ if (!E->ReorderIndices.empty())
+ Builder.SetInsertPoint(VL0);
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
@@ -3044,14 +3165,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::ExtractValue: {
- if (canReuseExtract(E->Scalars, VL0)) {
+ if (!E->NeedToGather) {
LoadInst *LI = cast<LoadInst>(VL0->getOperand(0));
Builder.SetInsertPoint(LI);
PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
LoadInst *V = Builder.CreateAlignedLoad(Ptr, LI->getAlignment());
Value *NewV = propagateMetadata(V, E->Scalars);
+ if (!E->ReorderIndices.empty()) {
+ OrdersType Mask;
+ inversePermutation(E->ReorderIndices, Mask);
+ NewV = Builder.CreateShuffleVector(NewV, UndefValue::get(VecTy), Mask,
+ "reorder_shuffle");
+ }
if (NeedToShuffleReuses) {
+ // TODO: Merge this shuffle with the ReorderShuffleMask.
NewV = Builder.CreateShuffleVector(
NewV, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle");
}
@@ -3225,10 +3353,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
- bool IsReversed =
- !isConsecutiveAccess(E->Scalars[0], E->Scalars[1], *DL, *SE);
- if (IsReversed)
- VL0 = cast<Instruction>(E->Scalars.back());
+ bool IsReorder = !E->ReorderIndices.empty();
+ if (IsReorder)
+ VL0 = cast<Instruction>(E->Scalars[E->ReorderIndices.front()]);
setInsertPointAfterBundle(E->Scalars, VL0);
LoadInst *LI = cast<LoadInst>(VL0);
@@ -3252,12 +3379,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
LI->setAlignment(Alignment);
Value *V = propagateMetadata(LI, E->Scalars);
- if (IsReversed) {
- SmallVector<uint32_t, 4> Mask(E->Scalars.size());
- std::iota(Mask.rbegin(), Mask.rend(), 0);
- V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), Mask);
+ if (IsReorder) {
+ OrdersType Mask;
+ inversePermutation(E->ReorderIndices, Mask);
+ V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ Mask, "reorder_shuffle");
}
if (NeedToShuffleReuses) {
+ // TODO: Merge this shuffle with the ReorderShuffleMask.
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
@@ -4825,8 +4954,10 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
R.buildTree(Ops);
+ Optional<ArrayRef<unsigned>> Order = R.bestOrder();
// TODO: check if we can allow reordering for more cases.
- if (AllowReorder && R.shouldReorder()) {
+ if (AllowReorder && Order) {
+ // TODO: reorder tree nodes without tree rebuilding.
// Conceptually, there is nothing actually preventing us from trying to
// reorder a larger list. In fact, we do exactly this when vectorizing
// reductions. However, at this point, we only expect to get here when
@@ -5572,9 +5703,14 @@ public:
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
V.buildTree(VL, ExternallyUsedValues, IgnoreList);
- if (V.shouldReorder()) {
- SmallVector<Value *, 8> Reversed(VL.rbegin(), VL.rend());
- V.buildTree(Reversed, ExternallyUsedValues, IgnoreList);
+ Optional<ArrayRef<unsigned>> Order = V.bestOrder();
+ // TODO: Handle orders of size less than number of elements in the vector.
+ if (Order && Order->size() == VL.size()) {
+ // TODO: reorder tree nodes without tree rebuilding.
+ SmallVector<Value *, 4> ReorderedOps(VL.size());
+ llvm::transform(*Order, ReorderedOps.begin(),
+ [VL](const unsigned Idx) { return VL[Idx]; });
+ V.buildTree(ReorderedOps, ExternallyUsedValues, IgnoreList);
}
if (V.isTreeTinyAndNotFullyVectorizable())
break;
OpenPOWER on IntegriCloud