summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp343
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp74
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h143
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanBuilder.h61
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h146
5 files changed, 641 insertions, 126 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0f1a7e0fb14..0bf80e52d94 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -48,6 +48,7 @@
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "VPlan.h"
+#include "VPlanBuilder.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -450,15 +451,6 @@ public:
/// new unrolled loop, where UF is the unroll factor.
using VectorParts = SmallVector<Value *, 2>;
- /// A helper function that computes the predicate of the block BB, assuming
- /// that the header block of the loop is set to True. It returns the *entry*
- /// mask for the block BB.
- VectorParts createBlockInMask(BasicBlock *BB);
-
- /// A helper function that computes the predicate of the edge between SRC
- /// and DST.
- VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
-
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
@@ -511,8 +503,10 @@ public:
/// Try to vectorize the interleaved access group that \p Instr belongs to.
void vectorizeInterleaveGroup(Instruction *Instr);
- /// Vectorize Load and Store instructions,
- virtual void vectorizeMemoryInstruction(Instruction *Instr);
+ /// Vectorize Load and Store instructions, optionally masking the vector
+ /// operations if \p BlockInMask is non-null.
+ void vectorizeMemoryInstruction(Instruction *Instr,
+ VectorParts *BlockInMask = nullptr);
/// \brief Set the debug location in the builder using the debug location in
/// the instruction.
@@ -530,12 +524,6 @@ protected:
/// vectorization factor.
using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
- // When we if-convert we need to create edge masks. We have to cache values
- // so that we don't end up with exponential recursion/IR.
- using EdgeMaskCacheTy =
- DenseMap<std::pair<BasicBlock *, BasicBlock *>, VectorParts>;
- using BlockMaskCacheTy = DenseMap<BasicBlock *, VectorParts>;
-
/// Set up the values of the IVs correctly when exiting the vector loop.
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
Value *CountRoundDown, Value *EndValue,
@@ -739,9 +727,6 @@ protected:
/// Store instructions that were predicated.
SmallVector<Instruction *, 4> PredicatedInstructions;
- EdgeMaskCacheTy EdgeMaskCache;
- BlockMaskCacheTy BlockMaskCache;
-
/// Trip count of the original loop.
Value *TripCount = nullptr;
@@ -2231,7 +2216,34 @@ class LoopVectorizationPlanner {
/// The profitablity analysis.
LoopVectorizationCostModel &CM;
- SmallVector<std::unique_ptr<VPlan>, 4> VPlans;
+ using VPlanPtr = std::unique_ptr<VPlan>;
+
+ SmallVector<VPlanPtr, 4> VPlans;
+
+ /// This class is used to enable the VPlan to invoke a method of ILV. This is
+ /// needed until the method is refactored out of ILV and becomes reusable.
+ struct VPCallbackILV : public VPCallback {
+ InnerLoopVectorizer &ILV;
+
+ VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {}
+
+ Value *getOrCreateVectorValues(Value *V, unsigned Part) override {
+ return ILV.getOrCreateVectorValue(V, Part);
+ }
+ };
+
+ /// A builder used to construct the current plan.
+ VPBuilder Builder;
+
+ /// When we if-convert we need to create edge masks. We have to cache values
+ /// so that we don't end up with exponential recursion/IR. Note that
+ /// if-conversion currently takes place during VPlan-construction, so these
+ /// caches are only used at that stage.
+ using EdgeMaskCacheTy =
+ DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>;
+ using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>;
+ EdgeMaskCacheTy EdgeMaskCache;
+ BlockMaskCacheTy BlockMaskCache;
unsigned BestVF = 0;
unsigned BestUF = 0;
@@ -2288,6 +2300,15 @@ protected:
void buildVPlans(unsigned MinVF, unsigned MaxVF);
private:
+ /// A helper function that computes the predicate of the block BB, assuming
+ /// that the header block of the loop is set to True. It returns the *entry*
+ /// mask for the block BB.
+ VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan);
+
+ /// A helper function that computes the predicate of the edge between SRC
+ /// and DST.
+ VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
+
/// Check if \I belongs to an Interleave Group within the given VF \p Range,
/// \return true in the first returned value if so and false otherwise.
/// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG
@@ -2299,9 +2320,11 @@ private:
VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range);
// Check if \I is a memory instruction to be widened for \p Range.Start and
- // potentially masked.
+ // potentially masked. Such instructions are handled by a recipe that takes an
+ // additional VPInstruction for the mask.
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
- VFRange &Range);
+ VFRange &Range,
+ VPlanPtr &Plan);
/// Check if an induction recipe should be constructed for \I within the given
/// VF \p Range. If so build and return it. If not, return null. \p Range.End
@@ -2313,7 +2336,7 @@ private:
/// Handle non-loop phi nodes. Currently all such phi nodes are turned into
/// a sequence of select instructions as the vectorizer currently performs
/// full if-conversion.
- VPBlendRecipe *tryToBlend(Instruction *I);
+ VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan);
/// Check if \p I can be widened within the given VF \p Range. If \p I can be
/// widened for \p Range.Start, check if the last recipe of \p VPBB can be
@@ -2331,17 +2354,19 @@ private:
/// \p Range.Start to \p Range.End.
VPBasicBlock *handleReplication(
Instruction *I, VFRange &Range, VPBasicBlock *VPBB,
- DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe);
+ DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe,
+ VPlanPtr &Plan);
/// Create a replicating region for instruction \p I that requires
/// predication. \p PredRecipe is a VPReplicateRecipe holding \p I.
- VPRegionBlock *createReplicateRegion(Instruction *I,
- VPRecipeBase *PredRecipe);
+ VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe,
+ VPlanPtr &Plan);
/// Build a VPlan according to the information gathered by Legal. \return a
/// VPlan for vectorization factors \p Range.Start and up to \p Range.End
/// exclusive, possibly decreasing \p Range.End.
- std::unique_ptr<VPlan> buildVPlan(VFRange &Range);
+ VPlanPtr buildVPlan(VFRange &Range,
+ const SmallPtrSetImpl<Value *> &NeedDef);
};
} // end namespace llvm
@@ -3091,7 +3116,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
}
}
-void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
+void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
+ VectorParts *BlockInMask) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -3132,7 +3158,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
if (ConsecutiveStride)
Ptr = getOrCreateScalarValue(Ptr, {0, 0});
- VectorParts Mask = createBlockInMask(Instr->getParent());
+ VectorParts Mask;
+ bool isMaskRequired = BlockInMask;
+ if (isMaskRequired)
+ Mask = *BlockInMask;
+
// Handle Stores:
if (SI) {
assert(!Legal->isUniform(SI->getPointerOperand()) &&
@@ -3143,7 +3173,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Instruction *NewSI = nullptr;
Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
if (CreateGatherScatter) {
- Value *MaskPart = Legal->isMaskRequired(SI) ? Mask[Part] : nullptr;
+ Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
MaskPart);
@@ -3165,14 +3195,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
PartPtr =
Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
- if (Mask[Part]) // The reverse of a null all-one mask is a null mask.
+ if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
Mask[Part] = reverseVector(Mask[Part]);
}
Value *VecPtr =
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
- if (Legal->isMaskRequired(SI) && Mask[Part])
+ if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
Mask[Part]);
else
@@ -3189,7 +3219,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
for (unsigned Part = 0; Part < UF; ++Part) {
Value *NewLI;
if (CreateGatherScatter) {
- Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;
+ Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart,
nullptr, "wide.masked.gather");
@@ -3204,13 +3234,13 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide load needs to start at the last vector element.
PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
- if (Mask[Part]) // The reverse of a null all-one mask is a null mask.
+ if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
Mask[Part] = reverseVector(Mask[Part]);
}
Value *VecPtr =
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
- if (Legal->isMaskRequired(LI) && Mask[Part])
+ if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part],
UndefValue::get(DataTy),
"wide.masked.load");
@@ -4512,6 +4542,9 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
unsigned VF) {
+ assert(PN->getParent() == OrigLoop->getHeader() &&
+ "Non-header phis should have been handled elsewhere");
+
PHINode *P = cast<PHINode>(PN);
// In order to support recurrences we need to be able to vectorize Phi nodes.
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
@@ -7509,7 +7542,7 @@ void LoopVectorizationPlanner::setBestPlan(unsigned VF, unsigned UF) {
BestVF = VF;
BestUF = UF;
- erase_if(VPlans, [VF](const std::unique_ptr<VPlan> &Plan) {
+ erase_if(VPlans, [VF](const VPlanPtr &Plan) {
return !Plan->hasVF(VF);
});
assert(VPlans.size() == 1 && "Best VF has not a single VPlan.");
@@ -7520,8 +7553,11 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
// Perform the actual loop transformation.
// 1. Create a new empty loop. Unlink the old loop and connect the new one.
- VPTransformState State{
- BestVF, BestUF, LI, DT, ILV.Builder, ILV.VectorLoopValueMap, &ILV};
+ VPCallbackILV CallbackILV(ILV);
+
+ VPTransformState State{BestVF, BestUF, LI,
+ DT, ILV.Builder, ILV.VectorLoopValueMap,
+ &ILV, CallbackILV};
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
//===------------------------------------------------===//
@@ -7734,8 +7770,18 @@ class VPBlendRecipe : public VPRecipeBase {
private:
PHINode *Phi;
+ /// The blend operation is a User of a mask, if not null.
+ std::unique_ptr<VPUser> User;
+
public:
- VPBlendRecipe(PHINode *Phi) : VPRecipeBase(VPBlendSC), Phi(Phi) {}
+ VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Masks)
+ : VPRecipeBase(VPBlendSC), Phi(Phi) {
+ assert((Phi->getNumIncomingValues() == 1 ||
+ Phi->getNumIncomingValues() == Masks.size()) &&
+ "Expected the same number of incoming values and masks");
+ if (!Masks.empty())
+ User.reset(new VPUser(Masks));
+ }
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *V) {
@@ -7754,27 +7800,28 @@ public:
unsigned NumIncoming = Phi->getNumIncomingValues();
+ assert((User || NumIncoming == 1) &&
+ "Multiple predecessors with predecessors having a full mask");
// Generate a sequence of selects of the form:
// SELECT(Mask3, In3,
// SELECT(Mask2, In2,
// ( ...)))
InnerLoopVectorizer::VectorParts Entry(State.UF);
- for (unsigned In = 0; In < NumIncoming; In++) {
- InnerLoopVectorizer::VectorParts Cond =
- State.ILV->createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent());
-
+ for (unsigned In = 0; In < NumIncoming; ++In) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // We might have single edge PHIs (blocks) - use an identity
+ // 'select' for the first PHI operand.
Value *In0 =
- State.ILV->getOrCreateVectorValue(Phi->getIncomingValue(In), Part);
- assert((Cond[Part] || NumIncoming == 1) &&
- "Multiple predecessors with one predecessor having a full mask");
+ State.ILV->getOrCreateVectorValue(Phi->getIncomingValue(In), Part);
if (In == 0)
Entry[Part] = In0; // Initialize with the first incoming value.
- else
+ else {
// Select between the current value and the previous incoming edge
// based on the incoming mask.
- Entry[Part] = State.Builder.CreateSelect(Cond[Part], In0, Entry[Part],
- "predphi");
+ Value *Cond = State.get(User->getOperand(In), Part);
+ Entry[Part] =
+ State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
+ }
}
}
for (unsigned Part = 0; Part < State.UF; ++Part)
@@ -7786,21 +7833,20 @@ public:
O << " +\n" << Indent << "\"BLEND ";
Phi->printAsOperand(O, false);
O << " =";
- if (Phi->getNumIncomingValues() == 1) {
+ if (!User) {
// Not a User of any mask: not really blending, this is a
// single-predecessor phi.
O << " ";
Phi->getIncomingValue(0)->printAsOperand(O, false);
} else {
- for (unsigned I = 0, E = Phi->getNumIncomingValues(); I < E; ++I) {
+ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
O << " ";
Phi->getIncomingValue(I)->printAsOperand(O, false);
O << "/";
- Phi->getIncomingBlock(I)->printAsOperand(O, false);
+ User->getOperand(I)->printAsOperand(O);
}
}
O << "\\l\"";
-
}
};
@@ -7890,13 +7936,13 @@ public:
/// A recipe for generating conditional branches on the bits of a mask.
class VPBranchOnMaskRecipe : public VPRecipeBase {
private:
- /// The input IR basic block used to obtain the mask providing the condition
- /// bits for the branch.
- BasicBlock *MaskedBasicBlock;
+ std::unique_ptr<VPUser> User;
public:
- VPBranchOnMaskRecipe(BasicBlock *BB)
- : VPRecipeBase(VPBranchOnMaskSC), MaskedBasicBlock(BB) {}
+ VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) {
+ if (BlockInMask) // nullptr means all-one mask.
+ User.reset(new VPUser({BlockInMask}));
+ }
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *V) {
@@ -7909,9 +7955,12 @@ public:
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent) const override {
- O << " +\n"
- << Indent << "\"BRANCH-ON-MASK-OF " << MaskedBasicBlock->getName()
- << "\\l\"";
+ O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
+ if (User)
+ O << *User->getOperand(0);
+ else
+ O << " All-One";
+ O << "\\l\"";
}
};
@@ -7948,13 +7997,19 @@ public:
};
/// A Recipe for widening load/store operations.
+/// TODO: We currently execute only per-part unless a specific instance is
+/// provided.
class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
private:
Instruction &Instr;
+ std::unique_ptr<VPUser> User;
public:
- VPWidenMemoryInstructionRecipe(Instruction &Instr)
- : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) {}
+ VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask)
+ : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) {
+ if (Mask) // Create a VPInstruction to register as a user of the mask.
+ User.reset(new VPUser({Mask}));
+ }
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *V) {
@@ -7963,12 +8018,24 @@ public:
/// Generate the wide load/store.
void execute(VPTransformState &State) override {
- State.ILV->vectorizeMemoryInstruction(&Instr);
+ if (!User)
+ return State.ILV->vectorizeMemoryInstruction(&Instr);
+
+ // Last (and currently only) operand is a mask.
+ InnerLoopVectorizer::VectorParts MaskValues(State.UF);
+ VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ MaskValues[Part] = State.get(Mask, Part);
+ State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
}
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent) const override {
O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr);
+ if (User) {
+ O << ", ";
+ User->getOperand(0)->printAsOperand(O);
+ }
O << "\\l\"";
}
};
@@ -7994,15 +8061,30 @@ bool LoopVectorizationPlanner::getDecisionAndClampRange(
/// vectorization decision can potentially shorten this sub-range during
/// buildVPlan().
void LoopVectorizationPlanner::buildVPlans(unsigned MinVF, unsigned MaxVF) {
+
+ // Collect conditions feeding internal conditional branches; they need to be
+ // represented in VPlan for it to model masking.
+ SmallPtrSet<Value *, 1> NeedDef;
+
+ auto *Latch = OrigLoop->getLoopLatch();
+ for (BasicBlock *BB : OrigLoop->blocks()) {
+ if (BB == Latch)
+ continue;
+ BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ if (Branch && Branch->isConditional())
+ NeedDef.insert(Branch->getCondition());
+ }
+
for (unsigned VF = MinVF; VF < MaxVF + 1;) {
VFRange SubRange = {VF, MaxVF + 1};
- VPlans.push_back(buildVPlan(SubRange));
+ VPlans.push_back(buildVPlan(SubRange, NeedDef));
VF = SubRange.End;
}
}
-InnerLoopVectorizer::VectorParts
-InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
+VPValue *LoopVectorizationPlanner::createEdgeMask(BasicBlock *Src,
+ BasicBlock *Dst,
+ VPlanPtr &Plan) {
assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
// Look for cached value.
@@ -8011,7 +8093,7 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
if (ECEntryIt != EdgeMaskCache.end())
return ECEntryIt->second;
- VectorParts SrcMask = createBlockInMask(Src);
+ VPValue *SrcMask = createBlockInMask(Src, Plan);
// The terminator has to be a branch inst!
BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
@@ -8020,23 +8102,20 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
if (!BI->isConditional())
return EdgeMaskCache[Edge] = SrcMask;
- VectorParts EdgeMask(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
- if (BI->getSuccessor(0) != Dst)
- EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
+ VPValue *EdgeMask = Plan->getVPValue(BI->getCondition());
+ assert(EdgeMask && "No Edge Mask found for condition");
- if (SrcMask[Part]) // Otherwise block in-mask is all-one, no need to AND.
- EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]);
+ if (BI->getSuccessor(0) != Dst)
+ EdgeMask = Builder.createNot(EdgeMask);
- EdgeMask[Part] = EdgeMaskPart;
- }
+ if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
+ EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
return EdgeMaskCache[Edge] = EdgeMask;
}
-InnerLoopVectorizer::VectorParts
-InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
+VPValue *LoopVectorizationPlanner::createBlockInMask(BasicBlock *BB,
+ VPlanPtr &Plan) {
assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
// Look for cached value.
@@ -8046,9 +8125,7 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
// All-one mask is modelled as no-mask following the convention for masked
// load/store/gather/scatter. Initialize BlockMask to no-mask.
- VectorParts BlockMask(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = nullptr;
+ VPValue *BlockMask = nullptr;
// Loop incoming mask is all-one.
if (OrigLoop->getHeader() == BB)
@@ -8056,17 +8133,16 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
// This is the block mask. We OR all incoming edges.
for (auto *Predecessor : predecessors(BB)) {
- VectorParts EdgeMask = createEdgeMask(Predecessor, BB);
- if (!EdgeMask[0]) // Mask of predecessor is all-one so mask of block is too.
+ VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan);
+ if (!EdgeMask) // Mask of predecessor is all-one so mask of block is too.
return BlockMaskCache[BB] = EdgeMask;
- if (!BlockMask[0]) { // BlockMask has its initialized nullptr value.
+ if (!BlockMask) { // BlockMask has its initialized nullptr value.
BlockMask = EdgeMask;
continue;
}
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EdgeMask[Part]);
+ BlockMask = Builder.createOr(BlockMask, EdgeMask);
}
return BlockMaskCache[BB] = BlockMask;
@@ -8100,7 +8176,8 @@ LoopVectorizationPlanner::tryToInterleaveMemory(Instruction *I,
}
VPWidenMemoryInstructionRecipe *
-LoopVectorizationPlanner::tryToWidenMemory(Instruction *I, VFRange &Range) {
+LoopVectorizationPlanner::tryToWidenMemory(Instruction *I, VFRange &Range,
+ VPlanPtr &Plan) {
if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
return nullptr;
@@ -8122,7 +8199,11 @@ LoopVectorizationPlanner::tryToWidenMemory(Instruction *I, VFRange &Range) {
if (!getDecisionAndClampRange(willWiden, Range))
return nullptr;
- return new VPWidenMemoryInstructionRecipe(*I);
+ VPValue *Mask = nullptr;
+ if (Legal->isMaskRequired(I))
+ Mask = createBlockInMask(I->getParent(), Plan);
+
+ return new VPWidenMemoryInstructionRecipe(*I, Mask);
}
VPWidenIntOrFpInductionRecipe *
@@ -8159,12 +8240,29 @@ LoopVectorizationPlanner::tryToOptimizeInduction(Instruction *I,
return nullptr;
}
-VPBlendRecipe *LoopVectorizationPlanner::tryToBlend(Instruction *I) {
+VPBlendRecipe *
+LoopVectorizationPlanner::tryToBlend(Instruction *I, VPlanPtr &Plan) {
PHINode *Phi = dyn_cast<PHINode>(I);
if (!Phi || Phi->getParent() == OrigLoop->getHeader())
return nullptr;
- return new VPBlendRecipe(Phi);
+ // We know that all PHIs in non-header blocks are converted into selects, so
+ // we don't have to worry about the insertion order and we can just use the
+ // builder. At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+
+ SmallVector<VPValue *, 2> Masks;
+ unsigned NumIncoming = Phi->getNumIncomingValues();
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VPValue *EdgeMask =
+ createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), Plan);
+ assert((EdgeMask || NumIncoming == 1) &&
+ "Multiple predecessors with one having a full mask");
+ if (EdgeMask)
+ Masks.push_back(EdgeMask);
+ }
+ return new VPBlendRecipe(Phi, Masks);
}
bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
@@ -8245,13 +8343,10 @@ bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
return UseVectorIntrinsic || !NeedToScalarize;
}
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
- LoopVectorizationCostModel::InstWidening Decision =
- CM.getWideningDecision(I, VF);
- assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
- "CM decision should be taken at this point.");
- assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
- "Interleave memory opportunity should be caught earlier.");
- return Decision != LoopVectorizationCostModel::CM_Scalarize;
+ assert(CM.getWideningDecision(I, VF) ==
+ LoopVectorizationCostModel::CM_Scalarize &&
+ "Memory widening decisions should have been taken care by now");
+ return false;
}
return true;
};
@@ -8273,7 +8368,8 @@ bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
VPBasicBlock *LoopVectorizationPlanner::handleReplication(
Instruction *I, VFRange &Range, VPBasicBlock *VPBB,
- DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe) {
+ DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe,
+ VPlanPtr &Plan) {
bool IsUniform = getDecisionAndClampRange(
[&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
@@ -8300,20 +8396,25 @@ VPBasicBlock *LoopVectorizationPlanner::handleReplication(
"VPBB has successors when handling predicated replication.");
// Record predicated instructions for above packing optimizations.
PredInst2Recipe[I] = Recipe;
- VPBlockBase *Region = VPBB->setOneSuccessor(createReplicateRegion(I, Recipe));
+ VPBlockBase *Region =
+ VPBB->setOneSuccessor(createReplicateRegion(I, Recipe, Plan));
return cast<VPBasicBlock>(Region->setOneSuccessor(new VPBasicBlock()));
}
VPRegionBlock *
LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr,
- VPRecipeBase *PredRecipe) {
+ VPRecipeBase *PredRecipe,
+ VPlanPtr &Plan) {
// Instructions marked for predication are replicated and placed under an
// if-then construct to prevent side-effects.
+ // Generate recipes to compute the block mask for this region.
+ VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan);
+
// Build the triangular if-then region.
std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
assert(Instr->getParent() && "Predicated instruction not in any basic block");
- auto *BOMRecipe = new VPBranchOnMaskRecipe(Instr->getParent());
+ auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
auto *PHIRecipe =
Instr->getType()->isVoidTy() ? nullptr : new VPPredInstPHIRecipe(Instr);
@@ -8329,7 +8430,11 @@ LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr,
return Region;
}
-std::unique_ptr<VPlan> LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
+LoopVectorizationPlanner::VPlanPtr
+LoopVectorizationPlanner::buildVPlan(VFRange &Range,
+ const SmallPtrSetImpl<Value *> &NeedDef) {
+ EdgeMaskCache.clear();
+ BlockMaskCache.clear();
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
DenseMap<Instruction *, Instruction *> SinkAfterInverse;
@@ -8351,6 +8456,10 @@ std::unique_ptr<VPlan> LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
auto Plan = llvm::make_unique<VPlan>(VPBB);
+ // Represent values that will have defs inside VPlan.
+ for (Value *V : NeedDef)
+ Plan->addVPValue(V);
+
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);
@@ -8363,6 +8472,7 @@ std::unique_ptr<VPlan> LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
VPBB->setOneSuccessor(FirstVPBBForBB);
VPBB = FirstVPBBForBB;
+ Builder.setInsertPoint(VPBB);
std::vector<Instruction *> Ingredients;
@@ -8421,7 +8531,7 @@ std::unique_ptr<VPlan> LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
}
// Check if Instr is a memory operation that should be widened.
- if ((Recipe = tryToWidenMemory(Instr, Range))) {
+ if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
VPBB->appendRecipe(Recipe);
continue;
}
@@ -8431,7 +8541,7 @@ std::unique_ptr<VPlan> LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
VPBB->appendRecipe(Recipe);
continue;
}
- if ((Recipe = tryToBlend(Instr))) {
+ if ((Recipe = tryToBlend(Instr, Plan))) {
VPBB->appendRecipe(Recipe);
continue;
}
@@ -8449,7 +8559,7 @@ std::unique_ptr<VPlan> LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// Otherwise, if all widening options failed, Instruction is to be
// replicated. This may create a successor for VPBB.
VPBasicBlock *NextVPBB =
- handleReplication(Instr, Range, VPBB, PredInst2Recipe);
+ handleReplication(Instr, Range, VPBB, PredInst2Recipe, Plan);
if (NextVPBB != VPBB) {
VPBB = NextVPBB;
VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++)
@@ -8525,14 +8635,16 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
unsigned Part = State.Instance->Part;
unsigned Lane = State.Instance->Lane;
- auto Cond = State.ILV->createBlockInMask(MaskedBasicBlock);
-
- Value *ConditionBit = Cond[Part];
- if (!ConditionBit) // Block in mask is all-one.
+ Value *ConditionBit = nullptr;
+ if (!User) // Block in mask is all-one.
ConditionBit = State.Builder.getTrue();
- else if (ConditionBit->getType()->isVectorTy())
- ConditionBit = State.Builder.CreateExtractElement(
- ConditionBit, State.Builder.getInt32(Lane));
+ else {
+ VPValue *BlockInMask = User->getOperand(0);
+ ConditionBit = State.get(BlockInMask, Part);
+ if (ConditionBit->getType()->isVectorTy())
+ ConditionBit = State.Builder.CreateExtractElement(
+ ConditionBit, State.Builder.getInt32(Lane));
+ }
// Replace the temporary unreachable terminator with a new conditional branch,
// whose two destinations will be set later when they are created.
@@ -8542,9 +8654,6 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
CondBr->setSuccessor(0, nullptr);
ReplaceInstWithInst(CurrentTerminator, CondBr);
-
- DEBUG(dbgs() << "\nLV: vectorizing BranchOnMask recipe "
- << MaskedBasicBlock->getName());
}
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f74426e5f30..5bbe5edb386 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -46,6 +46,14 @@ using namespace llvm;
#define DEBUG_TYPE "vplan"
+raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
+ if (const VPInstruction *Instr = dyn_cast<VPInstruction>(&V))
+ Instr->print(OS);
+ else
+ V.printAsOperand(OS);
+ return OS;
+}
+
/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
const VPBlockBase *Block = this;
@@ -212,10 +220,68 @@ void VPRegionBlock::execute(VPTransformState *State) {
State->Instance.reset();
}
+void VPInstruction::generateInstruction(VPTransformState &State,
+ unsigned Part) {
+ IRBuilder<> &Builder = State.Builder;
+
+ if (Instruction::isBinaryOp(getOpcode())) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *B = State.get(getOperand(1), Part);
+ Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
+ State.set(this, V, Part);
+ return;
+ }
+
+ switch (getOpcode()) {
+ case VPInstruction::Not: {
+ Value *A = State.get(getOperand(0), Part);
+ Value *V = Builder.CreateNot(A);
+ State.set(this, V, Part);
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported opcode for instruction");
+ }
+}
+
+void VPInstruction::execute(VPTransformState &State) {
+ assert(!State.Instance && "VPInstruction executing an Instance");
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ generateInstruction(State, Part);
+}
+
+void VPInstruction::print(raw_ostream &O, const Twine &Indent) const {
+ O << " +\n" << Indent << "\"EMIT ";
+ print(O);
+ O << "\\l\"";
+}
+
+void VPInstruction::print(raw_ostream &O) const {
+ printAsOperand(O);
+ O << " = ";
+
+ switch (getOpcode()) {
+ case VPInstruction::Not:
+ O << "not";
+ break;
+ default:
+ O << Instruction::getOpcodeName(getOpcode());
+ }
+
+ for (const VPValue *Operand : operands()) {
+ O << " ";
+ Operand->printAsOperand(O);
+ }
+}
+
/// Generate the code inside the body of the vectorized loop. Assumes a single
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
+ // 0. Set the reverse mapping from VPValues to Values for code generation.
+ for (auto &Entry : Value2VPValue)
+ State->VPValue2Value[Entry.second] = Entry.first;
+
BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
@@ -316,6 +382,14 @@ void VPlanPrinter::dump() {
OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
if (!Plan.getName().empty())
OS << "\\n" << DOT::EscapeString(Plan.getName());
+ if (!Plan.Value2VPValue.empty()) {
+ OS << ", where:";
+ for (auto Entry : Plan.Value2VPValue) {
+ OS << "\\n" << *Entry.second;
+ OS << DOT::EscapeString(" := ");
+ Entry.first->printAsOperand(OS, false);
+ }
+ }
OS << "\"]\n";
OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
OS << "edge [fontname=Courier, fontsize=30]\n";
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f039e21458a..a39eda0c08c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -15,8 +15,10 @@
/// treated as proper graphs for generic algorithms;
/// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
/// within VPBasicBlocks;
-/// 4. The VPlan class holding a candidate for vectorization;
-/// 5. The VPlanPrinter class providing a way to print a plan in dot format.
+/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
+/// instruction;
+/// 5. The VPlan class holding a candidate for vectorization;
+/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
/// These are documented in docs/VectorizationPlan.rst.
//
//===----------------------------------------------------------------------===//
@@ -24,6 +26,7 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
+#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Optional.h"
@@ -39,6 +42,13 @@
#include <map>
#include <string>
+// The (re)use of existing LoopVectorize classes is subject to future VPlan
+// refactoring.
+namespace {
+class LoopVectorizationLegality;
+class LoopVectorizationCostModel;
+} // namespace
+
namespace llvm {
class BasicBlock;
@@ -82,6 +92,8 @@ struct VPIteration {
/// Entries from either map can be retrieved using the getVectorValue and
/// getScalarValue functions, which assert that the desired value exists.
struct VectorizerValueMap {
+ friend struct VPTransformState;
+
private:
/// The unroll factor. Each entry in the vector map contains UF vector values.
unsigned UF;
@@ -195,14 +207,21 @@ public:
}
};
+/// This class is used to enable the VPlan to invoke a method of ILV. This is
+/// needed until the method is refactored out of ILV and becomes reusable.
+struct VPCallback {
+ virtual ~VPCallback() {}
+ virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
+};
+
/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {
VPTransformState(unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT,
IRBuilder<> &Builder, VectorizerValueMap &ValueMap,
- InnerLoopVectorizer *ILV)
- : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ValueMap(ValueMap),
- ILV(ILV) {}
+ InnerLoopVectorizer *ILV, VPCallback &Callback)
+ : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
+ ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
unsigned VF;
@@ -213,6 +232,37 @@ struct VPTransformState {
/// instructions.
Optional<VPIteration> Instance;
+ struct DataState {
+ /// A type for vectorized values in the new loop. Each value from the
+ /// original loop, when vectorized, is represented by UF vector values in
+ /// the new unrolled loop, where UF is the unroll factor.
+ typedef SmallVector<Value *, 2> PerPartValuesTy;
+
+ DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
+ } Data;
+
+ /// Get the generated Value for a given VPValue and a given Part. Note that
+ /// as some Defs are still created by ILV and managed in its ValueMap, this
+ /// method will delegate the call to ILV in such cases in order to provide
+ /// callers a consistent API.
+ /// \see set.
+ Value *get(VPValue *Def, unsigned Part) {
+ // If Values have been set for this Def return the one relevant for \p Part.
+ if (Data.PerPartOutput.count(Def))
+ return Data.PerPartOutput[Def][Part];
+ // Def is managed by ILV: bring the Values from ValueMap.
+ return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
+ }
+
+ /// Set the generated Value for a given VPValue and a given Part.
+ void set(VPValue *Def, Value *V, unsigned Part) {
+ if (!Data.PerPartOutput.count(Def)) {
+ DataState::PerPartValuesTy Entry(UF);
+ Data.PerPartOutput[Def] = Entry;
+ }
+ Data.PerPartOutput[Def][Part] = V;
+ }
+
/// Hold state information used when constructing the CFG of the output IR,
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
struct CFGState {
@@ -247,8 +297,14 @@ struct VPTransformState {
/// Values of the output IR.
VectorizerValueMap &ValueMap;
+ /// Hold a reference to a mapping between VPValues in VPlan and original
+ /// Values they correspond to.
+ VPValue2ValueTy VPValue2Value;
+
/// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
InnerLoopVectorizer *ILV;
+
+ VPCallback &Callback;
};
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
@@ -454,6 +510,7 @@ public:
using VPRecipeTy = enum {
VPBlendSC,
VPBranchOnMaskSC,
+ VPInstructionSC,
VPInterleaveSC,
VPPredInstPHISC,
VPReplicateSC,
@@ -483,6 +540,52 @@ public:
virtual void print(raw_ostream &O, const Twine &Indent) const = 0;
};
+/// This is a concrete Recipe that models a single VPlan-level instruction.
+/// While as any Recipe it may generate a sequence of IR instructions when
+/// executed, these instructions would always form a single-def expression as
+/// the VPInstruction is also a single def-use vertex.
+class VPInstruction : public VPUser, public VPRecipeBase {
+public:
+ /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
+ enum { Not = Instruction::OtherOpsEnd + 1 };
+
+private:
+ typedef unsigned char OpcodeTy;
+ OpcodeTy Opcode;
+
+ /// Utility method serving execute(): generates a single instance of the
+ /// modeled instruction.
+ void generateInstruction(VPTransformState &State, unsigned Part);
+
+public:
+ VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
+ : VPUser(VPValue::VPInstructionSC, Operands),
+ VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {}
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPInstructionSC;
+ }
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
+ }
+
+ unsigned getOpcode() const { return Opcode; }
+
+ /// Generate the instruction.
+ /// TODO: We currently execute only per-part unless a specific instance is
+ /// provided.
+ void execute(VPTransformState &State) override;
+
+ /// Print the Recipe.
+ void print(raw_ostream &O, const Twine &Indent) const override;
+
+ /// Print the VPInstruction.
+ void print(raw_ostream &O) const;
+};
+
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
/// output IR instructions.
@@ -539,15 +642,17 @@ public:
return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
}
- /// Augment the existing recipes of a VPBasicBlock with an additional
- /// \p Recipe as the last recipe.
- void appendRecipe(VPRecipeBase *Recipe) {
+ void insert(VPRecipeBase *Recipe, iterator InsertPt) {
assert(Recipe && "No recipe to append.");
assert(!Recipe->Parent && "Recipe already in VPlan");
Recipe->Parent = this;
- return Recipes.push_back(Recipe);
+ Recipes.insert(InsertPt, Recipe);
}
+ /// Augment the existing recipes of a VPBasicBlock with an additional
+ /// \p Recipe as the last recipe.
+ void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
+
/// The method which generates the output IR instructions that correspond to
/// this VPBasicBlock, thereby "executing" the VPlan.
void execute(struct VPTransformState *State) override;
@@ -620,6 +725,8 @@ public:
/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
/// VPBlock.
class VPlan {
+ friend class VPlanPrinter;
+
private:
/// Hold the single entry to the Hierarchical CFG of the VPlan.
VPBlockBase *Entry;
@@ -630,12 +737,18 @@ private:
/// Holds the name of the VPlan, for printing.
std::string Name;
+ /// Holds a mapping between Values and their corresponding VPValue inside
+ /// VPlan.
+ Value2VPValueTy Value2VPValue;
+
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
~VPlan() {
if (Entry)
VPBlockBase::deleteCFG(Entry);
+ for (auto &MapEntry : Value2VPValue)
+ delete MapEntry.second;
}
/// Generate the IR code for this VPlan.
@@ -654,6 +767,18 @@ public:
void setName(const Twine &newName) { Name = newName.str(); }
+ void addVPValue(Value *V) {
+ assert(V && "Trying to add a null Value to VPlan");
+ assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
+ Value2VPValue[V] = new VPValue();
+ }
+
+ VPValue *getVPValue(Value *V) {
+ assert(V && "Trying to get the VPValue of a null Value");
+ assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
+ return Value2VPValue[V];
+ }
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanBuilder.h
new file mode 100644
index 00000000000..d6eb3397d04
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanBuilder.h
@@ -0,0 +1,61 @@
+//===- VPlanBuilder.h - A VPlan utility for constructing VPInstructions ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides a VPlan-based builder utility analogous to IRBuilder.
+/// It provides an instruction-level API for generating VPInstructions while
+/// abstracting away the Recipe manipulation details.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_BUILDER_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_BUILDER_H
+
+#include "VPlan.h"
+
+namespace llvm {
+
+class VPBuilder {
+private:
+ VPBasicBlock *BB = nullptr;
+ VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
+
+ VPInstruction *createInstruction(unsigned Opcode,
+ std::initializer_list<VPValue *> Operands) {
+ VPInstruction *Instr = new VPInstruction(Opcode, Operands);
+ BB->insert(Instr, InsertPt);
+ return Instr;
+ }
+
+public:
+ VPBuilder() {}
+
+ /// \brief This specifies that created VPInstructions should be appended to
+ /// the end of the specified block.
+ void setInsertPoint(VPBasicBlock *TheBB) {
+ assert(TheBB && "Attempting to set a null insert point");
+ BB = TheBB;
+ InsertPt = BB->end();
+ }
+
+ VPValue *createNot(VPValue *Operand) {
+ return createInstruction(VPInstruction::Not, {Operand});
+ }
+
+ VPValue *createAnd(VPValue *LHS, VPValue *RHS) {
+ return createInstruction(Instruction::BinaryOps::And, {LHS, RHS});
+ }
+
+ VPValue *createOr(VPValue *LHS, VPValue *RHS) {
+ return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_BUILDER_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
new file mode 100644
index 00000000000..50966891e0e
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -0,0 +1,146 @@
+//===- VPlanValue.h - Represent Values in Vectorizer Plan -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declarations of the entities induced by Vectorization
+/// Plans, e.g. the instructions the VPlan intends to generate if executed.
+/// VPlan models the following entities:
+/// VPValue
+/// |-- VPUser
+/// | |-- VPInstruction
+/// These are documented in docs/VectorizationPlan.rst.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_VALUE_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VALUE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+// Forward declarations.
+class VPUser;
+
+// This is the base class of the VPlan Def/Use graph, used for modeling the data
+// flow into, within and out of the VPlan. VPValues can stand for live-ins
+// coming from the input IR, instructions which VPlan will generate if executed
+// and live-outs which the VPlan will need to fix accordingly.
+class VPValue {
+private:
+ const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
+
+ SmallVector<VPUser *, 1> Users;
+
+protected:
+ VPValue(const unsigned char SC) : SubclassID(SC) {}
+
+public:
+ /// An enumeration for keeping track of the concrete subclass of VPValue that
+ /// are actually instantiated. Values of this enumeration are kept in the
+ /// SubclassID field of the VPValue objects. They are used for concrete
+ /// type identification.
+ enum { VPValueSC, VPUserSC, VPInstructionSC };
+
+ VPValue() : SubclassID(VPValueSC) {}
+ VPValue(const VPValue &) = delete;
+ VPValue &operator=(const VPValue &) = delete;
+
+ /// \return an ID for the concrete type of this object.
+ /// This is used to implement the classof checks. This should not be used
+ /// for any other purpose, as the values may change as LLVM evolves.
+ unsigned getVPValueID() const { return SubclassID; }
+
+ void printAsOperand(raw_ostream &OS) const {
+ OS << "%vp" << (unsigned short)(unsigned long long)this;
+ }
+
+ unsigned getNumUsers() const { return Users.size(); }
+ void addUser(VPUser &User) { Users.push_back(&User); }
+
+ typedef SmallVectorImpl<VPUser *>::iterator user_iterator;
+ typedef SmallVectorImpl<VPUser *>::const_iterator const_user_iterator;
+ typedef iterator_range<user_iterator> user_range;
+ typedef iterator_range<const_user_iterator> const_user_range;
+
+ user_iterator user_begin() { return Users.begin(); }
+ const_user_iterator user_begin() const { return Users.begin(); }
+ user_iterator user_end() { return Users.end(); }
+ const_user_iterator user_end() const { return Users.end(); }
+ user_range users() { return user_range(user_begin(), user_end()); }
+ const_user_range users() const {
+ return const_user_range(user_begin(), user_end());
+ }
+};
+
+typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
+typedef DenseMap<VPValue *, Value *> VPValue2ValueTy;
+
+raw_ostream &operator<<(raw_ostream &OS, const VPValue &V);
+
+/// This class augments VPValue with operands which provide the inverse def-use
+/// edges from VPValue's users to their defs.
+class VPUser : public VPValue {
+private:
+ SmallVector<VPValue *, 2> Operands;
+
+ void addOperand(VPValue *Operand) {
+ Operands.push_back(Operand);
+ Operand->addUser(*this);
+ }
+
+protected:
+ VPUser(const unsigned char SC) : VPValue(SC) {}
+ VPUser(const unsigned char SC, ArrayRef<VPValue *> Operands) : VPValue(SC) {
+ for (VPValue *Operand : Operands)
+ addOperand(Operand);
+ }
+
+public:
+ VPUser() : VPValue(VPValue::VPUserSC) {}
+ VPUser(ArrayRef<VPValue *> Operands) : VPUser(VPValue::VPUserSC, Operands) {}
+ VPUser(std::initializer_list<VPValue *> Operands)
+ : VPUser(ArrayRef<VPValue *>(Operands)) {}
+ VPUser(const VPUser &) = delete;
+ VPUser &operator=(const VPUser &) = delete;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() >= VPUserSC &&
+ V->getVPValueID() <= VPInstructionSC;
+ }
+
+ unsigned getNumOperands() const { return Operands.size(); }
+ inline VPValue *getOperand(unsigned N) const {
+ assert(N < Operands.size() && "Operand index out of bounds");
+ return Operands[N];
+ }
+
+ typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;
+ typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator;
+ typedef iterator_range<operand_iterator> operand_range;
+ typedef iterator_range<const_operand_iterator> const_operand_range;
+
+ operand_iterator op_begin() { return Operands.begin(); }
+ const_operand_iterator op_begin() const { return Operands.begin(); }
+ operand_iterator op_end() { return Operands.end(); }
+ const_operand_iterator op_end() const { return Operands.end(); }
+ operand_range operands() { return operand_range(op_begin(), op_end()); }
+ const_operand_range operands() const {
+ return const_operand_range(op_begin(), op_end());
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_VALUE_H
OpenPOWER on IntegriCloud