summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h125
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanSLP.cpp469
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h16
-rw-r--r--llvm/unittests/Transforms/Vectorize/CMakeLists.txt1
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp899
7 files changed, 1523 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 27a4d241b32..06eaadf58c3 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMVectorize
VPlan.cpp
VPlanHCFGBuilder.cpp
VPlanHCFGTransforms.cpp
+ VPlanSLP.cpp
VPlanVerifier.cpp
ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 541378dee2a..dad733b4651 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -338,6 +338,12 @@ void VPInstruction::print(raw_ostream &O) const {
case VPInstruction::ICmpULE:
O << "icmp ule";
break;
+ case VPInstruction::SLPLoad:
+ O << "combined load";
+ break;
+ case VPInstruction::SLPStore:
+ O << "combined store";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -681,6 +687,13 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
+void VPValue::replaceAllUsesWith(VPValue *New) {
+ for (VPUser *User : users())
+ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
+ if (User->getOperand(I) == this)
+ User->setOperand(I, New);
+}
+
void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
Old2NewTy &Old2New,
InterleavedAccessInfo &IAI) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 66333220971..dd50db346b0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -60,6 +60,7 @@ class Value;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
+class VPlanSlp;
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
@@ -609,10 +610,16 @@ public:
/// the VPInstruction is also a single def-use vertex.
class VPInstruction : public VPUser, public VPRecipeBase {
friend class VPlanHCFGTransforms;
+ friend class VPlanSlp;
public:
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
- enum { Not = Instruction::OtherOpsEnd + 1, ICmpULE };
+ enum {
+ Not = Instruction::OtherOpsEnd + 1,
+ ICmpULE,
+ SLPLoad,
+ SLPStore,
+ };
private:
typedef unsigned char OpcodeTy;
@@ -622,6 +629,13 @@ private:
/// modeled instruction.
void generateInstruction(VPTransformState &State, unsigned Part);
+protected:
+ Instruction *getUnderlyingInstr() {
+ return cast_or_null<Instruction>(getUnderlyingValue());
+ }
+
+ void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
+
public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
: VPUser(VPValue::VPInstructionSC, Operands),
@@ -635,6 +649,11 @@ public:
return V->getVPValueID() == VPValue::VPInstructionSC;
}
+ VPInstruction *clone() const {
+ SmallVector<VPValue *, 2> Operands(operands());
+ return new VPInstruction(Opcode, Operands);
+ }
+
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *R) {
return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
@@ -652,6 +671,14 @@ public:
/// Print the VPInstruction.
void print(raw_ostream &O) const;
+
+ /// Return true if this instruction may modify memory.
+ bool mayWriteToMemory() const {
+ // TODO: we can use attributes of the called function to rule out memory
+ // modifications.
+ return Opcode == Instruction::Store || Opcode == Instruction::Call ||
+ Opcode == Instruction::Invoke || Opcode == SLPStore;
+ }
};
/// VPWidenRecipe is a recipe for producing a copy of vector type for each
@@ -1508,6 +1535,102 @@ public:
}
};
+/// Class that maps (parts of) an existing VPlan to trees of combined
+/// VPInstructions.
+class VPlanSlp {
+private:
+ enum class OpMode { Failed, Load, Opcode };
+
+ /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
+ /// DenseMap keys.
+ struct BundleDenseMapInfo {
+ static SmallVector<VPValue *, 4> getEmptyKey() {
+ return {reinterpret_cast<VPValue *>(-1)};
+ }
+
+ static SmallVector<VPValue *, 4> getTombstoneKey() {
+ return {reinterpret_cast<VPValue *>(-2)};
+ }
+
+ static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
+ return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+ }
+
+ static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
+ const SmallVector<VPValue *, 4> &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ /// Mapping of values in the original VPlan to a combined VPInstruction.
+ DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
+ BundleToCombined;
+
+ VPInterleavedAccessInfo &IAI;
+
+ /// Basic block to operate on. For now, only instructions in a single BB are
+ /// considered.
+ const VPBasicBlock &BB;
+
+ /// Indicates whether we managed to combine all visited instructions or not.
+ bool CompletelySLP = true;
+
+ /// Width of the widest combined bundle in bits.
+ unsigned WidestBundleBits = 0;
+
+ using MultiNodeOpTy =
+ typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+
+ // Input operand bundles for the current multi node. Each multi node operand
+ // bundle contains values not matching the multi node's opcode. They will
+ // be reordered in reorderMultiNodeOps, once we completed building a
+ // multi node.
+ SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
+
+ /// Indicates whether we are building a multi node currently.
+ bool MultiNodeActive = false;
+
+ /// Check if we can vectorize Operands together.
+ bool areVectorizable(ArrayRef<VPValue *> Operands) const;
+
+ /// Add combined instruction \p New for the bundle \p Operands.
+ void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
+
+ /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
+ VPInstruction *markFailed();
+
+ /// Reorder operands in the multi node to maximize sequential memory access
+ /// and commutative operations.
+ SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
+
+ /// Choose the best candidate to use for the lane after \p Last. The set of
+ /// candidates to choose from are values with an opcode matching \p Last's
+ /// or loads consecutive to \p Last.
+ std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
+ SmallVectorImpl<VPValue *> &Candidates,
+ VPInterleavedAccessInfo &IAI);
+
+ /// Print bundle \p Values to dbgs().
+ void dumpBundle(ArrayRef<VPValue *> Values);
+
+public:
+ VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
+
+ ~VPlanSlp() {
+ for (auto &KV : BundleToCombined)
+ delete KV.second;
+ }
+
+ /// Tries to build an SLP tree rooted at \p Operands and returns a
+ /// VPInstruction combining \p Operands, if they can be combined.
+ VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
+
+ /// Return the width of the widest combined bundle in bits.
+ unsigned getWidestBundleBits() const { return WidestBundleBits; }
+
+ /// Return true if all visited instruction can be combined.
+ bool isCompletelySLP() const { return CompletelySLP; }
+};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
new file mode 100644
index 00000000000..679fb51e48d
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -0,0 +1,469 @@
+//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// This file implements SLP analysis based on VPlan. The analysis is based on
+/// the ideas described in
+///
+/// Look-ahead SLP: auto-vectorization in the presence of commutative
+/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
+/// Luís F. W. Góes
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPlan.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vplan-slp"
+
+// Number of levels to look ahead when re-ordering multi node operands.
+static unsigned LookaheadMaxDepth = 5;
+
+VPInstruction *VPlanSlp::markFailed() {
+ // FIXME: Currently this is used to signal we hit instructions we cannot
+ // trivially SLP'ize.
+ CompletelySLP = false;
+ return nullptr;
+}
+
+void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
+ if (all_of(Operands, [](VPValue *V) {
+ return cast<VPInstruction>(V)->getUnderlyingInstr();
+ })) {
+ unsigned BundleSize = 0;
+ for (VPValue *V : Operands) {
+ Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
+ assert(!T->isVectorTy() && "Only scalar types supported for now");
+ BundleSize += T->getScalarSizeInBits();
+ }
+ WidestBundleBits = std::max(WidestBundleBits, BundleSize);
+ }
+
+ auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
+ assert(Res.second &&
+ "Already created a combined instruction for the operand bundle");
+ (void)Res;
+}
+
+bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
+ // Currently we only support VPInstructions.
+ if (!all_of(Operands, [](VPValue *Op) {
+ return Op && isa<VPInstruction>(Op) &&
+ cast<VPInstruction>(Op)->getUnderlyingInstr();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
+ return false;
+ }
+
+ // Check if opcodes and type width agree for all instructions in the bundle.
+ // FIXME: Differing widths/opcodes can be handled by inserting additional
+ // instructions.
+ // FIXME: Deal with non-primitive types.
+ const Instruction *OriginalInstr =
+ cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
+ unsigned Opcode = OriginalInstr->getOpcode();
+ unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
+ if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
+ const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
+ return I->getOpcode() == Opcode &&
+ I->getType()->getPrimitiveSizeInBits() == Width;
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
+ return false;
+ }
+
+ // For now, all operands must be defined in the same BB.
+ if (any_of(Operands, [this](VPValue *Op) {
+ return cast<VPInstruction>(Op)->getParent() != &this->BB;
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
+ return false;
+ }
+
+ if (any_of(Operands,
+ [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
+ return false;
+ }
+
+ // For loads, check that there are no instructions writing to memory in
+ // between them.
+ // TODO: we only have to forbid instructions writing to memory that could
+ // interfere with any of the loads in the bundle
+ if (Opcode == Instruction::Load) {
+ unsigned LoadsSeen = 0;
+ VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
+ for (auto &I : *Parent) {
+ auto *VPI = cast<VPInstruction>(&I);
+ if (VPI->getOpcode() == Instruction::Load &&
+ std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())
+ LoadsSeen++;
+
+ if (LoadsSeen == Operands.size())
+ break;
+ if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
+ LLVM_DEBUG(
+ dbgs() << "VPSLP: instruction modifying memory between loads\n");
+ return false;
+ }
+ }
+
+ if (!all_of(Operands, [](VPValue *Op) {
+ return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
+ ->isSimple();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
+ return false;
+ }
+ }
+
+ if (Opcode == Instruction::Store)
+ if (!all_of(Operands, [](VPValue *Op) {
+ return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
+ ->isSimple();
+ })) {
+ LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
+ unsigned OperandIndex) {
+ SmallVector<VPValue *, 4> Operands;
+ for (VPValue *V : Values) {
+ auto *U = cast<VPUser>(V);
+ Operands.push_back(U->getOperand(OperandIndex));
+ }
+ return Operands;
+}
+
+static bool areCommutative(ArrayRef<VPValue *> Values) {
+ return Instruction::isCommutative(
+ cast<VPInstruction>(Values[0])->getOpcode());
+}
+
+static SmallVector<SmallVector<VPValue *, 4>, 4>
+getOperands(ArrayRef<VPValue *> Values) {
+ SmallVector<SmallVector<VPValue *, 4>, 4> Result;
+ auto *VPI = cast<VPInstruction>(Values[0]);
+
+ switch (VPI->getOpcode()) {
+ case Instruction::Load:
+ llvm_unreachable("Loads terminate a tree, no need to get operands");
+ case Instruction::Store:
+ Result.push_back(getOperands(Values, 0));
+ break;
+ default:
+ for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
+ Result.push_back(getOperands(Values, I));
+ break;
+ }
+
+ return Result;
+}
+
+/// Returns the opcode of Values or ~0 if they do not all agree.
+static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
+ unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
+ if (any_of(Values, [Opcode](VPValue *V) {
+ return cast<VPInstruction>(V)->getOpcode() != Opcode;
+ }))
+ return None;
+ return {Opcode};
+}
+
+/// Returns true if A and B access sequential memory if they are loads or
+/// stores or if they have identical opcodes otherwise.
+static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
+ VPInterleavedAccessInfo &IAI) {
+ if (A->getOpcode() != B->getOpcode())
+ return false;
+
+ if (A->getOpcode() != Instruction::Load &&
+ A->getOpcode() != Instruction::Store)
+ return true;
+ auto *GA = IAI.getInterleaveGroup(A);
+ auto *GB = IAI.getInterleaveGroup(B);
+
+ return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
+}
+
+/// Implements getLAScore from Listing 7 in the paper.
+/// Traverses and compares operands of V1 and V2 to MaxLevel.
+static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
+ VPInterleavedAccessInfo &IAI) {
+ if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
+ return 0;
+
+ if (MaxLevel == 0)
+ return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
+ cast<VPInstruction>(V2), IAI);
+
+ unsigned Score = 0;
+ for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)
+ for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
+ Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
+ cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
+ return Score;
+}
+
+std::pair<VPlanSlp::OpMode, VPValue *>
+VPlanSlp::getBest(OpMode Mode, VPValue *Last,
+ SmallVectorImpl<VPValue *> &Candidates,
+ VPInterleavedAccessInfo &IAI) {
+ LLVM_DEBUG(dbgs() << " getBest\n");
+ VPValue *Best = Candidates[0];
+ SmallVector<VPValue *, 4> BestCandidates;
+
+ LLVM_DEBUG(dbgs() << " Candidates for "
+ << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
+ for (auto *Candidate : Candidates) {
+ auto *LastI = cast<VPInstruction>(Last);
+ auto *CandidateI = cast<VPInstruction>(Candidate);
+ if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
+ LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
+ << " ");
+ BestCandidates.push_back(Candidate);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ if (BestCandidates.empty())
+ return {OpMode::Failed, nullptr};
+
+ if (BestCandidates.size() == 1)
+ return {Mode, BestCandidates[0]};
+
+ if (Mode == OpMode::Opcode) {
+ unsigned BestScore = 0;
+ for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
+ unsigned PrevScore = ~0u;
+ bool AllSame = true;
+
+ // FIXME: Avoid visiting the same operands multiple times.
+ for (auto *Candidate : BestCandidates) {
+ unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
+ if (PrevScore == ~0u)
+ PrevScore = Score;
+ if (PrevScore != Score)
+ AllSame = false;
+ PrevScore = Score;
+
+ if (Score > BestScore) {
+ BestScore = Score;
+ Best = Candidate;
+ }
+ }
+ if (!AllSame)
+ break;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Found best "
+ << *cast<VPInstruction>(Best)->getUnderlyingInstr()
+ << "\n");
+ std::remove(Candidates.begin(), Candidates.end(), Best);
+
+ return {Mode, Best};
+}
+
+SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
+ SmallVector<MultiNodeOpTy, 4> FinalOrder;
+ SmallVector<OpMode, 4> Mode;
+ FinalOrder.reserve(MultiNodeOps.size());
+ Mode.reserve(MultiNodeOps.size());
+
+ LLVM_DEBUG(dbgs() << "Reordering multinode\n");
+
+ for (auto &Operands : MultiNodeOps) {
+ FinalOrder.push_back({Operands.first, {Operands.second[0]}});
+ if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
+ Instruction::Load)
+ Mode.push_back(OpMode::Load);
+ else
+ Mode.push_back(OpMode::Opcode);
+ }
+
+ for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
+ LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
+ SmallVector<VPValue *, 4> Candidates;
+ Candidates.reserve(MultiNodeOps.size());
+ LLVM_DEBUG(dbgs() << " Candidates ");
+ for (auto Ops : MultiNodeOps) {
+ LLVM_DEBUG(
+ dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
+ << " ");
+ Candidates.push_back(Ops.second[Lane]);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
+ LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
+ if (Mode[Op] == OpMode::Failed)
+ continue;
+
+ VPValue *Last = FinalOrder[Op].second[Lane - 1];
+ std::pair<OpMode, VPValue *> Res =
+ getBest(Mode[Op], Last, Candidates, IAI);
+ if (Res.second)
+ FinalOrder[Op].second.push_back(Res.second);
+ else
+ // TODO: handle this case
+ FinalOrder[Op].second.push_back(markFailed());
+ }
+ }
+
+ return FinalOrder;
+}
+
+void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
+ LLVM_DEBUG(dbgs() << " Ops: ");
+ for (auto Op : Values)
+ if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())
+ LLVM_DEBUG(dbgs() << *Instr << " | ");
+ else
+ LLVM_DEBUG(dbgs() << " nullptr | ");
+ LLVM_DEBUG(dbgs() << "\n");
+}
+
+VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
+ assert(!Values.empty() && "Need some operands!");
+
+ // If we already visited this instruction bundle, re-use the existing node
+ auto I = BundleToCombined.find(to_vector<4>(Values));
+ if (I != BundleToCombined.end()) {
+#ifdef NDEBUG
+ // Check that the resulting graph is a tree. If we re-use a node, this means
+ // its values have multiple users. We only allow this, if all users of each
+ // value are the same instruction.
+ for (auto *V : Values) {
+ auto UI = V->user_begin();
+ auto *FirstUser = *UI++;
+ while (UI != V->use_end()) {
+ assert(*UI == FirstUser && "Currently we only support SLP trees.");
+ UI++;
+ }
+ }
+#endif
+ return I->second;
+ }
+
+ // Dump inputs
+ LLVM_DEBUG({
+ dbgs() << "buildGraph: ";
+ dumpBundle(Values);
+ });
+
+ if (!areVectorizable(Values))
+ return markFailed();
+
+ assert(getOpcode(Values) && "Opcodes for all values must match");
+ unsigned ValuesOpcode = getOpcode(Values).getValue();
+
+ SmallVector<VPValue *, 4> CombinedOperands;
+ if (areCommutative(Values)) {
+ bool MultiNodeRoot = !MultiNodeActive;
+ MultiNodeActive = true;
+ for (auto &Operands : getOperands(Values)) {
+ LLVM_DEBUG({
+ dbgs() << " Visiting Commutative";
+ dumpBundle(Operands);
+ });
+
+ auto OperandsOpcode = getOpcode(Operands);
+ if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
+ LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
+ CombinedOperands.push_back(buildGraph(Operands));
+ } else {
+ LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
+ // Create dummy VPInstruction, which will we replace later by the
+ // re-ordered operand.
+ VPInstruction *Op = new VPInstruction(0, {});
+ CombinedOperands.push_back(Op);
+ MultiNodeOps.emplace_back(Op, Operands);
+ }
+ }
+
+ if (MultiNodeRoot) {
+ LLVM_DEBUG(dbgs() << "Reorder \n");
+ MultiNodeActive = false;
+
+ auto FinalOrder = reorderMultiNodeOps();
+
+ MultiNodeOps.clear();
+ for (auto &Ops : FinalOrder) {
+ VPInstruction *NewOp = buildGraph(Ops.second);
+ Ops.first->replaceAllUsesWith(NewOp);
+ for (unsigned i = 0; i < CombinedOperands.size(); i++)
+ if (CombinedOperands[i] == Ops.first)
+ CombinedOperands[i] = NewOp;
+ delete Ops.first;
+ Ops.first = NewOp;
+ }
+ LLVM_DEBUG(dbgs() << "Found final order\n");
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " NonCommuntative\n");
+ if (ValuesOpcode == Instruction::Load)
+ for (VPValue *V : Values)
+ CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
+ else
+ for (auto &Operands : getOperands(Values))
+ CombinedOperands.push_back(buildGraph(Operands));
+ }
+
+ unsigned Opcode;
+ switch (ValuesOpcode) {
+ case Instruction::Load:
+ Opcode = VPInstruction::SLPLoad;
+ break;
+ case Instruction::Store:
+ Opcode = VPInstruction::SLPStore;
+ break;
+ default:
+ Opcode = ValuesOpcode;
+ break;
+ }
+
+ if (!CompletelySLP)
+ return markFailed();
+
+ assert(CombinedOperands.size() > 0 && "Need more some operands");
+ auto *VPI = new VPInstruction(Opcode, CombinedOperands);
+ VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
+
+ LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
+ cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");
+ addCombined(Values, VPI);
+ return VPI;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 972f0fa7da6..b473579b699 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -106,6 +106,20 @@ public:
const_user_range users() const {
return const_user_range(user_begin(), user_end());
}
+
+ /// Returns true if the value has more than one unique user.
+ bool hasMoreThanOneUniqueUser() {
+ if (getNumUsers() == 0)
+ return false;
+
+ // Check if all users match the first user.
+ auto Current = std::next(user_begin());
+ while (Current != user_end() && *user_begin() == *Current)
+ Current++;
+ return Current != user_end();
+ }
+
+ void replaceAllUsesWith(VPValue *New);
};
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
@@ -151,6 +165,8 @@ public:
return Operands[N];
}
+ void setOperand(unsigned I, VPValue *New) { Operands[I] = New; }
+
typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;
typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator;
typedef iterator_range<operand_iterator> operand_range;
diff --git a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
index 5a9142d17b1..6e886bbe25a 100644
--- a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
@@ -10,4 +10,5 @@ add_llvm_unittest(VectorizeTests
VPlanLoopInfoTest.cpp
VPlanTest.cpp
VPlanHCFGTest.cpp
+ VPlanSlpTest.cpp
)
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
new file mode 100644
index 00000000000..0381925655e
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
@@ -0,0 +1,899 @@
+//===- llvm/unittest/Transforms/Vectorize/VPlanSlpTest.cpp ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h"
+#include "../lib/Transforms/Vectorize/VPlanHCFGTransforms.h"
+#include "VPlanTestBase.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+class VPlanSlpTest : public VPlanTestBase {
+protected:
+ TargetLibraryInfoImpl TLII;
+ TargetLibraryInfo TLI;
+ DataLayout DL;
+
+ std::unique_ptr<AssumptionCache> AC;
+ std::unique_ptr<ScalarEvolution> SE;
+ std::unique_ptr<AAResults> AARes;
+ std::unique_ptr<BasicAAResult> BasicAA;
+ std::unique_ptr<LoopAccessInfo> LAI;
+ std::unique_ptr<PredicatedScalarEvolution> PSE;
+ std::unique_ptr<InterleavedAccessInfo> IAI;
+
+ VPlanSlpTest()
+ : TLII(), TLI(TLII),
+ DL("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-"
+ "f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:"
+ "16:32:64-S128") {}
+
+ VPInterleavedAccessInfo getInterleavedAccessInfo(Function &F, Loop *L,
+ VPlan &Plan) {
+ AC.reset(new AssumptionCache(F));
+ SE.reset(new ScalarEvolution(F, TLI, *AC, *DT, *LI));
+ BasicAA.reset(new BasicAAResult(DL, F, TLI, *AC, &*DT, &*LI));
+ AARes.reset(new AAResults(TLI));
+ AARes->addAAResult(*BasicAA);
+ PSE.reset(new PredicatedScalarEvolution(*SE, *L));
+ LAI.reset(new LoopAccessInfo(L, &*SE, &TLI, &*AARes, &*DT, &*LI));
+ IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));
+ IAI->analyzeInterleaving(false);
+ return {Plan, *IAI};
+ }
+};
+
+TEST_F(VPlanSlpTest, testSlpSimple_2) {
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "%struct.Test3 = type { i32, i32, i32 }\n"
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %add0 = add nsw i32 %vA0, %vB0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %add1 = add nsw i32 %vA1, %vB1\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add0, i32* %C0, align 4\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add1, i32* %C1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x2");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+
+ VPlanSlp Slp(VPIAI, *Body);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
+ EXPECT_EQ(64u, Slp.getWidestBundleBits());
+ EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
+
+ auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
+ EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
+
+ auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
+ auto *CombinedLoadB = cast<VPInstruction>(CombinedAdd->getOperand(1));
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
+}
+
+TEST_F(VPlanSlpTest, testSlpSimple_3) {
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "%struct.Test3 = type { i32, i32, i32 }\n"
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr %struct.Test, %struct.Test* %A, i64 "
+ " %indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ " %indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %add0 = add nsw i32 %vA0, %vB0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ " %indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ " %indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %add1 = add nsw i32 %vA1, %vB1\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ " %indvars.iv, i32 0\n"
+ " store i32 %add0, i32* %C0, align 4\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ " %indvars.iv, i32 1\n"
+ " store i32 %add1, i32* %C1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x2");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+
+ VPlanSlp Slp(VPIAI, *Body);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
+ EXPECT_EQ(64u, Slp.getWidestBundleBits());
+ EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
+
+ auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
+ EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
+
+ auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
+ auto *CombinedLoadB = cast<VPInstruction>(CombinedAdd->getOperand(1));
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
+
+ VPInstruction *GetA = cast<VPInstruction>(&*std::next(Body->begin(), 1));
+ VPInstruction *GetB = cast<VPInstruction>(&*std::next(Body->begin(), 3));
+ EXPECT_EQ(GetA, CombinedLoadA->getOperand(0));
+ EXPECT_EQ(GetB, CombinedLoadB->getOperand(0));
+}
+
+TEST_F(VPlanSlpTest, testSlpReuse_1) {
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %add0 = add nsw i32 %vA0, %vA0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %add1 = add nsw i32 %vA1, %vA1\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add0, i32* %C0, align 4\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add1, i32* %C1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x2");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 8));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 10));
+
+ VPlanSlp Slp(VPIAI, *Body);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
+ EXPECT_EQ(64u, Slp.getWidestBundleBits());
+ EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
+
+ auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
+ EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
+
+ auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
+ EXPECT_EQ(CombinedLoadA, CombinedAdd->getOperand(1));
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
+}
+
+TEST_F(VPlanSlpTest, testSlpReuse_2) {
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "define i32 @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %add0 = add nsw i32 %vA0, %vA0\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add0, i32* %C0, align 4\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %add1 = add nsw i32 %vA1, %vA1\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add1, i32* %C1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret i32 %vA1\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x2");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 5));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 10));
+
+ VPlanSlp Slp(VPIAI, *Body);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ Slp.buildGraph(StoreRoot);
+ EXPECT_FALSE(Slp.isCompletelySLP());
+}
+
+static void checkReorderExample(VPInstruction *Store1, VPInstruction *Store2,
+ VPBasicBlock *Body,
+ VPInterleavedAccessInfo &&IAI) {
+ VPlanSlp Slp(IAI, *Body);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
+
+ EXPECT_TRUE(Slp.isCompletelySLP());
+ EXPECT_EQ(CombinedStore->getOpcode(), VPInstruction::SLPStore);
+
+ VPInstruction *CombinedAdd =
+ cast<VPInstruction>(CombinedStore->getOperand(0));
+ EXPECT_EQ(CombinedAdd->getOpcode(), Instruction::Add);
+
+ VPInstruction *CombinedMulAB =
+ cast<VPInstruction>(CombinedAdd->getOperand(0));
+ VPInstruction *CombinedMulCD =
+ cast<VPInstruction>(CombinedAdd->getOperand(1));
+ EXPECT_EQ(CombinedMulAB->getOpcode(), Instruction::Mul);
+
+ VPInstruction *CombinedLoadA =
+ cast<VPInstruction>(CombinedMulAB->getOperand(0));
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
+ VPInstruction *LoadvA0 = cast<VPInstruction>(&*std::next(Body->begin(), 2));
+ VPInstruction *LoadvA1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
+ EXPECT_EQ(LoadvA0->getOperand(0), CombinedLoadA->getOperand(0));
+ EXPECT_EQ(LoadvA1->getOperand(0), CombinedLoadA->getOperand(1));
+
+ VPInstruction *CombinedLoadB =
+ cast<VPInstruction>(CombinedMulAB->getOperand(1));
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
+ VPInstruction *LoadvB0 = cast<VPInstruction>(&*std::next(Body->begin(), 4));
+ VPInstruction *LoadvB1 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+ EXPECT_EQ(LoadvB0->getOperand(0), CombinedLoadB->getOperand(0));
+ EXPECT_EQ(LoadvB1->getOperand(0), CombinedLoadB->getOperand(1));
+
+ EXPECT_EQ(CombinedMulCD->getOpcode(), Instruction::Mul);
+
+ VPInstruction *CombinedLoadC =
+ cast<VPInstruction>(CombinedMulCD->getOperand(0));
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadC->getOpcode());
+ VPInstruction *LoadvC0 = cast<VPInstruction>(&*std::next(Body->begin(), 7));
+ VPInstruction *LoadvC1 = cast<VPInstruction>(&*std::next(Body->begin(), 17));
+ EXPECT_EQ(LoadvC0->getOperand(0), CombinedLoadC->getOperand(0));
+ EXPECT_EQ(LoadvC1->getOperand(0), CombinedLoadC->getOperand(1));
+
+ VPInstruction *CombinedLoadD =
+ cast<VPInstruction>(CombinedMulCD->getOperand(1));
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadD->getOpcode());
+ VPInstruction *LoadvD0 = cast<VPInstruction>(&*std::next(Body->begin(), 9));
+ VPInstruction *LoadvD1 = cast<VPInstruction>(&*std::next(Body->begin(), 19));
+ EXPECT_EQ(LoadvD0->getOperand(0), CombinedLoadD->getOperand(0));
+ EXPECT_EQ(LoadvD1->getOperand(0), CombinedLoadD->getOperand(1));
+}
+
+TEST_F(VPlanSlpTest, testSlpReorder_1) {
+ LLVMContext Ctx;
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "define void @add_x3(%struct.Test* %A, %struct.Test* %B, %struct.Test* "
+ "%C, %struct.Test* %D, %struct.Test* %E) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %mul11 = mul nsw i32 %vA0, %vB0\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vC0 = load i32, i32* %C0, align 4\n"
+ " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vD0 = load i32, i32* %D0, align 4\n"
+ " %mul12 = mul nsw i32 %vC0, %vD0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %mul21 = mul nsw i32 %vA1, %vB1\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vC1 = load i32, i32* %C1, align 4\n"
+ " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vD1 = load i32, i32* %D1, align 4\n"
+ " %mul22 = mul nsw i32 %vC1, %vD1\n"
+ " %add1 = add nsw i32 %mul11, %mul12\n"
+ " %add2 = add nsw i32 %mul22, %mul21\n"
+ " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add1, i32* %E0, align 4\n"
+ " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add2, i32* %E1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x3");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
+
+ checkReorderExample(
+ Store1, Store2, Body,
+ getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
+}
+
+TEST_F(VPlanSlpTest, testSlpReorder_2) {
+ LLVMContext Ctx;
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "define void @add_x3(%struct.Test* %A, %struct.Test* %B, %struct.Test* "
+ "%C, %struct.Test* %D, %struct.Test* %E) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %mul11 = mul nsw i32 %vA0, %vB0\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vC0 = load i32, i32* %C0, align 4\n"
+ " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vD0 = load i32, i32* %D0, align 4\n"
+ " %mul12 = mul nsw i32 %vC0, %vD0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %mul21 = mul nsw i32 %vB1, %vA1\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vC1 = load i32, i32* %C1, align 4\n"
+ " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vD1 = load i32, i32* %D1, align 4\n"
+ " %mul22 = mul nsw i32 %vD1, %vC1\n"
+ " %add1 = add nsw i32 %mul11, %mul12\n"
+ " %add2 = add nsw i32 %mul22, %mul21\n"
+ " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add1, i32* %E0, align 4\n"
+ " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add2, i32* %E1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x3");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
+
+ checkReorderExample(
+ Store1, Store2, Body,
+ getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
+}
+
+TEST_F(VPlanSlpTest, testSlpReorder_3) {
+ LLVMContext Ctx;
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "define void @add_x3(%struct.Test* %A, %struct.Test* %B, %struct.Test* "
+ "%C, %struct.Test* %D, %struct.Test* %E) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %mul11 = mul nsw i32 %vA1, %vB0\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vC0 = load i32, i32* %C0, align 4\n"
+ " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vD0 = load i32, i32* %D0, align 4\n"
+ " %mul12 = mul nsw i32 %vC0, %vD0\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %mul21 = mul nsw i32 %vB1, %vA0\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vC1 = load i32, i32* %C1, align 4\n"
+ " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vD1 = load i32, i32* %D1, align 4\n"
+ " %mul22 = mul nsw i32 %vD1, %vC1\n"
+ " %add1 = add nsw i32 %mul11, %mul12\n"
+ " %add2 = add nsw i32 %mul22, %mul21\n"
+ " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add1, i32* %E0, align 4\n"
+ " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add2, i32* %E1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x3");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
+
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+ VPlanSlp Slp(VPIAI, *Body);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
+
+ // FIXME Need to select better first value for lane0.
+ EXPECT_FALSE(Slp.isCompletelySLP());
+}
+
+TEST_F(VPlanSlpTest, testSlpReorder_4) {
+ LLVMContext Ctx;
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "define void @add_x3(%struct.Test* %A, %struct.Test* %B, %struct.Test* "
+ "%C, %struct.Test* %D, %struct.Test* %E) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %mul11 = mul nsw i32 %vA0, %vB0\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vC0 = load i32, i32* %C0, align 4\n"
+ " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vD0 = load i32, i32* %D0, align 4\n"
+ " %mul12 = mul nsw i32 %vC0, %vD0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %mul21 = mul nsw i32 %vA1, %vB1\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vC1 = load i32, i32* %C1, align 4\n"
+ " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vD1 = load i32, i32* %D1, align 4\n"
+ " %mul22 = mul nsw i32 %vC1, %vD1\n"
+ " %add1 = add nsw i32 %mul11, %mul12\n"
+ " %add2 = add nsw i32 %mul22, %mul21\n"
+ " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add1, i32* %E0, align 4\n"
+ " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add2, i32* %E1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x3");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
+
+ checkReorderExample(
+ Store1, Store2, Body,
+ getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
+}
+
+// Make sure we do not combine instructions with operands in different BBs.
+TEST_F(VPlanSlpTest, testInstrsInDifferentBBs) {
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "%struct.Test3 = type { i32, i32, i32 }\n"
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %add0 = add nsw i32 %vA0, %vB0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " br label %bb2\n"
+ "bb2:\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %add1 = add nsw i32 %vA1, %vB1\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add0, i32* %C0, align 4\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add1, i32* %C1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x2");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+ VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 3));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 5));
+
+ VPlanSlp Slp(VPIAI, *BB2);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
+ EXPECT_EQ(0u, Slp.getWidestBundleBits());
+}
+
+// Make sure we do not combine instructions with operands in different BBs.
+TEST_F(VPlanSlpTest, testInstrsInDifferentBBs2) {
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "%struct.Test3 = type { i32, i32, i32 }\n"
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %add0 = add nsw i32 %vA0, %vB0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %add1 = add nsw i32 %vA1, %vB1\n"
+ " br label %bb2\n"
+ "bb2:\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add0, i32* %C0, align 4\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add1, i32* %C1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x2");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+ VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 1));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 3));
+
+ VPlanSlp Slp(VPIAI, *BB2);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
+ EXPECT_EQ(0u, Slp.getWidestBundleBits());
+}
+
+TEST_F(VPlanSlpTest, testSlpAtomicLoad) {
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "%struct.Test3 = type { i32, i32, i32 }\n"
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load atomic i32, i32* %A0 monotonic, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %add0 = add nsw i32 %vA0, %vB0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %add1 = add nsw i32 %vA1, %vB1\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " store i32 %add0, i32* %C0, align 4\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add1, i32* %C1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x2");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+
+ VPlanSlp Slp(VPIAI, *Body);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
+ EXPECT_FALSE(Slp.isCompletelySLP());
+}
+
+TEST_F(VPlanSlpTest, testSlpAtomicStore) {
+ const char *ModuleString =
+ "%struct.Test = type { i32, i32 }\n"
+ "%struct.Test3 = type { i32, i32, i32 }\n"
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body: ; preds = %for.body, "
+ "%entry\n"
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vA0 = load i32, i32* %A0, align 4\n"
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 0\n"
+ " %vB0 = load i32, i32* %B0, align 4\n"
+ " %add0 = add nsw i32 %vA0, %vB0\n"
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vA1 = load i32, i32* %A1, align 4\n"
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "
+ "%indvars.iv, i32 1\n"
+ " %vB1 = load i32, i32* %B1, align 4\n"
+ " %add1 = add nsw i32 %vA1, %vB1\n"
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 0\n"
+ " store atomic i32 %add0, i32* %C0 monotonic, align 4\n"
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "
+ "%indvars.iv, i32 1\n"
+ " store i32 %add1, i32* %C1, align 4\n"
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
+ "for.cond.cleanup: ; preds = %for.body\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("add_x2");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
+
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
+
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
+
+ VPlanSlp Slp(VPIAI, *Body);
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
+ Slp.buildGraph(StoreRoot);
+ EXPECT_FALSE(Slp.isCompletelySLP());
+}
+
+} // namespace
+} // namespace llvm
OpenPOWER on IntegriCloud