summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h125
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanSLP.cpp469
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h16
5 files changed, 623 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 27a4d241b32..06eaadf58c3 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMVectorize
VPlan.cpp
VPlanHCFGBuilder.cpp
VPlanHCFGTransforms.cpp
+ VPlanSLP.cpp
VPlanVerifier.cpp
ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 541378dee2a..dad733b4651 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -338,6 +338,12 @@ void VPInstruction::print(raw_ostream &O) const {
case VPInstruction::ICmpULE:
O << "icmp ule";
break;
+ case VPInstruction::SLPLoad:
+ O << "combined load";
+ break;
+ case VPInstruction::SLPStore:
+ O << "combined store";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -681,6 +687,13 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
+void VPValue::replaceAllUsesWith(VPValue *New) {
+ for (VPUser *User : users())
+ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
+ if (User->getOperand(I) == this)
+ User->setOperand(I, New);
+}
+
void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
Old2NewTy &Old2New,
InterleavedAccessInfo &IAI) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 66333220971..dd50db346b0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -60,6 +60,7 @@ class Value;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
+class VPlanSlp;
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
@@ -609,10 +610,16 @@ public:
/// the VPInstruction is also a single def-use vertex.
class VPInstruction : public VPUser, public VPRecipeBase {
friend class VPlanHCFGTransforms;
+ friend class VPlanSlp;
public:
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
- enum { Not = Instruction::OtherOpsEnd + 1, ICmpULE };
+ enum {
+ Not = Instruction::OtherOpsEnd + 1,
+ ICmpULE,
+ SLPLoad,
+ SLPStore,
+ };
private:
typedef unsigned char OpcodeTy;
@@ -622,6 +629,13 @@ private:
/// modeled instruction.
void generateInstruction(VPTransformState &State, unsigned Part);
+protected:
+ Instruction *getUnderlyingInstr() {
+ return cast_or_null<Instruction>(getUnderlyingValue());
+ }
+
+ void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
+
public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
: VPUser(VPValue::VPInstructionSC, Operands),
@@ -635,6 +649,11 @@ public:
return V->getVPValueID() == VPValue::VPInstructionSC;
}
+ VPInstruction *clone() const {
+ SmallVector<VPValue *, 2> Operands(operands());
+ return new VPInstruction(Opcode, Operands);
+ }
+
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *R) {
return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
@@ -652,6 +671,14 @@ public:
/// Print the VPInstruction.
void print(raw_ostream &O) const;
+
+ /// Return true if this instruction may modify memory.
+ bool mayWriteToMemory() const {
+ // TODO: we can use attributes of the called function to rule out memory
+ // modifications.
+ return Opcode == Instruction::Store || Opcode == Instruction::Call ||
+ Opcode == Instruction::Invoke || Opcode == SLPStore;
+ }
};
/// VPWidenRecipe is a recipe for producing a copy of vector type for each
@@ -1508,6 +1535,102 @@ public:
}
};
+/// Class that maps (parts of) an existing VPlan to trees of combined
+/// VPInstructions.
+class VPlanSlp {
+private:
+ enum class OpMode { Failed, Load, Opcode };
+
+ /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
+ /// DenseMap keys.
+ struct BundleDenseMapInfo {
+ static SmallVector<VPValue *, 4> getEmptyKey() {
+ return {reinterpret_cast<VPValue *>(-1)};
+ }
+
+ static SmallVector<VPValue *, 4> getTombstoneKey() {
+ return {reinterpret_cast<VPValue *>(-2)};
+ }
+
+ static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
+ return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+ }
+
+ static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
+ const SmallVector<VPValue *, 4> &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ /// Mapping of values in the original VPlan to a combined VPInstruction.
+ DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
+ BundleToCombined;
+
+ VPInterleavedAccessInfo &IAI;
+
+ /// Basic block to operate on. For now, only instructions in a single BB are
+ /// considered.
+ const VPBasicBlock &BB;
+
+ /// Indicates whether we managed to combine all visited instructions or not.
+ bool CompletelySLP = true;
+
+ /// Width of the widest combined bundle in bits.
+ unsigned WidestBundleBits = 0;
+
+ using MultiNodeOpTy =
+ typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+
+ // Input operand bundles for the current multi node. Each multi node operand
+ // bundle contains values not matching the multi node's opcode. They will
+ // be reordered in reorderMultiNodeOps, once we completed building a
+ // multi node.
+ SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
+
+ /// Indicates whether we are building a multi node currently.
+ bool MultiNodeActive = false;
+
+ /// Check if we can vectorize Operands together.
+ bool areVectorizable(ArrayRef<VPValue *> Operands) const;
+
+ /// Add combined instruction \p New for the bundle \p Operands.
+ void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
+
+ /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
+ VPInstruction *markFailed();
+
+ /// Reorder operands in the multi node to maximize sequential memory access
+ /// and commutative operations.
+ SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
+
+ /// Choose the best candidate to use for the lane after \p Last. The set of
+ /// candidates to choose from are values with an opcode matching \p Last's
+ /// or loads consecutive to \p Last.
+ std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
+ SmallVectorImpl<VPValue *> &Candidates,
+ VPInterleavedAccessInfo &IAI);
+
+ /// Print bundle \p Values to dbgs().
+ void dumpBundle(ArrayRef<VPValue *> Values);
+
+public:
+ VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
+
+ ~VPlanSlp() {
+ for (auto &KV : BundleToCombined)
+ delete KV.second;
+ }
+
+ /// Tries to build an SLP tree rooted at \p Operands and returns a
+ /// VPInstruction combining \p Operands, if they can be combined.
+ VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
+
+ /// Return the width of the widest combined bundle in bits.
+ unsigned getWidestBundleBits() const { return WidestBundleBits; }
+
+ /// Return true if all visited instruction can be combined.
+ bool isCompletelySLP() const { return CompletelySLP; }
+};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
new file mode 100644
index 00000000000..679fb51e48d
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -0,0 +1,469 @@
+//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// This file implements SLP analysis based on VPlan. The analysis is based on
+/// the ideas described in
+///
+/// Look-ahead SLP: auto-vectorization in the presence of commutative
+/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
+/// Luís F. W. Góes
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPlan.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vplan-slp"
+
+// Number of levels to look ahead when re-ordering multi node operands.
+static unsigned LookaheadMaxDepth = 5;
+
+VPInstruction *VPlanSlp::markFailed() {
+ // FIXME: Currently this is used to signal we hit instructions we cannot
+ // trivially SLP'ize.
+ CompletelySLP = false;
+ return nullptr;
+}
+
+void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
+ if (all_of(Operands, [](VPValue *V) {
+ return cast<VPInstruction>(V)->getUnderlyingInstr();
+ })) {
+ unsigned BundleSize = 0;
+ for (VPValue *V : Operands) {
+ Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
+ assert(!T->isVectorTy() && "Only scalar types supported for now");
+ BundleSize += T->getScalarSizeInBits();
+ }
+ WidestBundleBits = std::max(WidestBundleBits, BundleSize);
+ }
+
+ auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
+ assert(Res.second &&
+ "Already created a combined instruction for the operand bundle");
+ (void)Res;
+}
+
+bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
+ // Currently we only support VPInstructions.
+ if (!all_of(Operands, [](VPValue *Op) {
+ return Op && isa<VPInstruction>(Op) &&
+ cast<VPInstruction>(Op)->getUnderlyingInstr();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
+ return false;
+ }
+
+ // Check if opcodes and type width agree for all instructions in the bundle.
+ // FIXME: Differing widths/opcodes can be handled by inserting additional
+ // instructions.
+ // FIXME: Deal with non-primitive types.
+ const Instruction *OriginalInstr =
+ cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
+ unsigned Opcode = OriginalInstr->getOpcode();
+ unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
+ if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
+ const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
+ return I->getOpcode() == Opcode &&
+ I->getType()->getPrimitiveSizeInBits() == Width;
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
+ return false;
+ }
+
+ // For now, all operands must be defined in the same BB.
+ if (any_of(Operands, [this](VPValue *Op) {
+ return cast<VPInstruction>(Op)->getParent() != &this->BB;
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
+ return false;
+ }
+
+ if (any_of(Operands,
+ [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
+ return false;
+ }
+
+ // For loads, check that there are no instructions writing to memory in
+ // between them.
+ // TODO: we only have to forbid instructions writing to memory that could
+ // interfere with any of the loads in the bundle
+ if (Opcode == Instruction::Load) {
+ unsigned LoadsSeen = 0;
+ VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
+ for (auto &I : *Parent) {
+ auto *VPI = cast<VPInstruction>(&I);
+ if (VPI->getOpcode() == Instruction::Load &&
+ std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())
+ LoadsSeen++;
+
+ if (LoadsSeen == Operands.size())
+ break;
+ if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
+ LLVM_DEBUG(
+ dbgs() << "VPSLP: instruction modifying memory between loads\n");
+ return false;
+ }
+ }
+
+ if (!all_of(Operands, [](VPValue *Op) {
+ return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
+ ->isSimple();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
+ return false;
+ }
+ }
+
+ if (Opcode == Instruction::Store)
+ if (!all_of(Operands, [](VPValue *Op) {
+ return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
+ ->isSimple();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
+ unsigned OperandIndex) {
+ SmallVector<VPValue *, 4> Operands;
+ for (VPValue *V : Values) {
+ auto *U = cast<VPUser>(V);
+ Operands.push_back(U->getOperand(OperandIndex));
+ }
+ return Operands;
+}
+
+static bool areCommutative(ArrayRef<VPValue *> Values) {
+ return Instruction::isCommutative(
+ cast<VPInstruction>(Values[0])->getOpcode());
+}
+
+static SmallVector<SmallVector<VPValue *, 4>, 4>
+getOperands(ArrayRef<VPValue *> Values) {
+ SmallVector<SmallVector<VPValue *, 4>, 4> Result;
+ auto *VPI = cast<VPInstruction>(Values[0]);
+
+ switch (VPI->getOpcode()) {
+ case Instruction::Load:
+ llvm_unreachable("Loads terminate a tree, no need to get operands");
+ case Instruction::Store:
+ Result.push_back(getOperands(Values, 0));
+ break;
+ default:
+ for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
+ Result.push_back(getOperands(Values, I));
+ break;
+ }
+
+ return Result;
+}
+
+/// Returns the opcode of Values or ~0 if they do not all agree.
+static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
+ unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
+ if (any_of(Values, [Opcode](VPValue *V) {
+ return cast<VPInstruction>(V)->getOpcode() != Opcode;
+ }))
+ return None;
+ return {Opcode};
+}
+
+/// Returns true if A and B access sequential memory if they are loads or
+/// stores or if they have identical opcodes otherwise.
+static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
+ VPInterleavedAccessInfo &IAI) {
+ if (A->getOpcode() != B->getOpcode())
+ return false;
+
+ if (A->getOpcode() != Instruction::Load &&
+ A->getOpcode() != Instruction::Store)
+ return true;
+ auto *GA = IAI.getInterleaveGroup(A);
+ auto *GB = IAI.getInterleaveGroup(B);
+
+ return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
+}
+
+/// Implements getLAScore from Listing 7 in the paper.
+/// Traverses and compares operands of V1 and V2 to MaxLevel.
+static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
+ VPInterleavedAccessInfo &IAI) {
+ if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
+ return 0;
+
+ if (MaxLevel == 0)
+ return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
+ cast<VPInstruction>(V2), IAI);
+
+ unsigned Score = 0;
+ for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)
+ for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
+ Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
+ cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
+ return Score;
+}
+
+std::pair<VPlanSlp::OpMode, VPValue *>
+VPlanSlp::getBest(OpMode Mode, VPValue *Last,
+ SmallVectorImpl<VPValue *> &Candidates,
+ VPInterleavedAccessInfo &IAI) {
+ LLVM_DEBUG(dbgs() << " getBest\n");
+ VPValue *Best = Candidates[0];
+ SmallVector<VPValue *, 4> BestCandidates;
+
+ LLVM_DEBUG(dbgs() << " Candidates for "
+ << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
+ for (auto *Candidate : Candidates) {
+ auto *LastI = cast<VPInstruction>(Last);
+ auto *CandidateI = cast<VPInstruction>(Candidate);
+ if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
+ LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
+ << " ");
+ BestCandidates.push_back(Candidate);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ if (BestCandidates.empty())
+ return {OpMode::Failed, nullptr};
+
+ if (BestCandidates.size() == 1)
+ return {Mode, BestCandidates[0]};
+
+ if (Mode == OpMode::Opcode) {
+ unsigned BestScore = 0;
+ for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
+ unsigned PrevScore = ~0u;
+ bool AllSame = true;
+
+ // FIXME: Avoid visiting the same operands multiple times.
+ for (auto *Candidate : BestCandidates) {
+ unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
+ if (PrevScore == ~0u)
+ PrevScore = Score;
+ if (PrevScore != Score)
+ AllSame = false;
+ PrevScore = Score;
+
+ if (Score > BestScore) {
+ BestScore = Score;
+ Best = Candidate;
+ }
+ }
+ if (!AllSame)
+ break;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Found best "
+ << *cast<VPInstruction>(Best)->getUnderlyingInstr()
+ << "\n");
+ std::remove(Candidates.begin(), Candidates.end(), Best);
+
+ return {Mode, Best};
+}
+
+SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
+ SmallVector<MultiNodeOpTy, 4> FinalOrder;
+ SmallVector<OpMode, 4> Mode;
+ FinalOrder.reserve(MultiNodeOps.size());
+ Mode.reserve(MultiNodeOps.size());
+
+ LLVM_DEBUG(dbgs() << "Reordering multinode\n");
+
+ for (auto &Operands : MultiNodeOps) {
+ FinalOrder.push_back({Operands.first, {Operands.second[0]}});
+ if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
+ Instruction::Load)
+ Mode.push_back(OpMode::Load);
+ else
+ Mode.push_back(OpMode::Opcode);
+ }
+
+ for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
+ LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
+ SmallVector<VPValue *, 4> Candidates;
+ Candidates.reserve(MultiNodeOps.size());
+ LLVM_DEBUG(dbgs() << " Candidates ");
+ for (auto Ops : MultiNodeOps) {
+ LLVM_DEBUG(
+ dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
+ << " ");
+ Candidates.push_back(Ops.second[Lane]);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
+ LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
+ if (Mode[Op] == OpMode::Failed)
+ continue;
+
+ VPValue *Last = FinalOrder[Op].second[Lane - 1];
+ std::pair<OpMode, VPValue *> Res =
+ getBest(Mode[Op], Last, Candidates, IAI);
+ if (Res.second)
+ FinalOrder[Op].second.push_back(Res.second);
+ else
+ // TODO: handle this case
+ FinalOrder[Op].second.push_back(markFailed());
+ }
+ }
+
+ return FinalOrder;
+}
+
+void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
+ LLVM_DEBUG(dbgs() << " Ops: ");
+ for (auto Op : Values)
+ if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())
+ LLVM_DEBUG(dbgs() << *Instr << " | ");
+ else
+ LLVM_DEBUG(dbgs() << " nullptr | ");
+ LLVM_DEBUG(dbgs() << "\n");
+}
+
+VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
+ assert(!Values.empty() && "Need some operands!");
+
+ // If we already visited this instruction bundle, re-use the existing node
+ auto I = BundleToCombined.find(to_vector<4>(Values));
+ if (I != BundleToCombined.end()) {
+#ifdef NDEBUG
+ // Check that the resulting graph is a tree. If we re-use a node, this means
+ // its values have multiple users. We only allow this, if all users of each
+ // value are the same instruction.
+ for (auto *V : Values) {
+ auto UI = V->user_begin();
+ auto *FirstUser = *UI++;
+ while (UI != V->use_end()) {
+ assert(*UI == FirstUser && "Currently we only support SLP trees.");
+ UI++;
+ }
+ }
+#endif
+ return I->second;
+ }
+
+ // Dump inputs
+ LLVM_DEBUG({
+ dbgs() << "buildGraph: ";
+ dumpBundle(Values);
+ });
+
+ if (!areVectorizable(Values))
+ return markFailed();
+
+ assert(getOpcode(Values) && "Opcodes for all values must match");
+ unsigned ValuesOpcode = getOpcode(Values).getValue();
+
+ SmallVector<VPValue *, 4> CombinedOperands;
+ if (areCommutative(Values)) {
+ bool MultiNodeRoot = !MultiNodeActive;
+ MultiNodeActive = true;
+ for (auto &Operands : getOperands(Values)) {
+ LLVM_DEBUG({
+ dbgs() << " Visiting Commutative";
+ dumpBundle(Operands);
+ });
+
+ auto OperandsOpcode = getOpcode(Operands);
+ if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
+ LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
+ CombinedOperands.push_back(buildGraph(Operands));
+ } else {
+ LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
+ // Create dummy VPInstruction, which will we replace later by the
+ // re-ordered operand.
+ VPInstruction *Op = new VPInstruction(0, {});
+ CombinedOperands.push_back(Op);
+ MultiNodeOps.emplace_back(Op, Operands);
+ }
+ }
+
+ if (MultiNodeRoot) {
+ LLVM_DEBUG(dbgs() << "Reorder \n");
+ MultiNodeActive = false;
+
+ auto FinalOrder = reorderMultiNodeOps();
+
+ MultiNodeOps.clear();
+ for (auto &Ops : FinalOrder) {
+ VPInstruction *NewOp = buildGraph(Ops.second);
+ Ops.first->replaceAllUsesWith(NewOp);
+ for (unsigned i = 0; i < CombinedOperands.size(); i++)
+ if (CombinedOperands[i] == Ops.first)
+ CombinedOperands[i] = NewOp;
+ delete Ops.first;
+ Ops.first = NewOp;
+ }
+ LLVM_DEBUG(dbgs() << "Found final order\n");
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " NonCommuntative\n");
+ if (ValuesOpcode == Instruction::Load)
+ for (VPValue *V : Values)
+ CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
+ else
+ for (auto &Operands : getOperands(Values))
+ CombinedOperands.push_back(buildGraph(Operands));
+ }
+
+ unsigned Opcode;
+ switch (ValuesOpcode) {
+ case Instruction::Load:
+ Opcode = VPInstruction::SLPLoad;
+ break;
+ case Instruction::Store:
+ Opcode = VPInstruction::SLPStore;
+ break;
+ default:
+ Opcode = ValuesOpcode;
+ break;
+ }
+
+ if (!CompletelySLP)
+ return markFailed();
+
+ assert(CombinedOperands.size() > 0 && "Need more some operands");
+ auto *VPI = new VPInstruction(Opcode, CombinedOperands);
+ VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
+
+ LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
+ cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");
+ addCombined(Values, VPI);
+ return VPI;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 972f0fa7da6..b473579b699 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -106,6 +106,20 @@ public:
const_user_range users() const {
return const_user_range(user_begin(), user_end());
}
+
+ /// Returns true if the value has more than one unique user.
+ bool hasMoreThanOneUniqueUser() {
+ if (getNumUsers() == 0)
+ return false;
+
+ // Check if all users match the first user.
+ auto Current = std::next(user_begin());
+ while (Current != user_end() && *user_begin() == *Current)
+ Current++;
+ return Current != user_end();
+ }
+
+ void replaceAllUsesWith(VPValue *New);
};
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
@@ -151,6 +165,8 @@ public:
return Operands[N];
}
+ void setOperand(unsigned I, VPValue *New) { Operands[I] = New; }
+
typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;
typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator;
typedef iterator_range<operand_iterator> operand_range;
OpenPOWER on IntegriCloud