diff options
author | Eugene Zelenko <eugene.zelenko@gmail.com> | 2017-09-01 21:37:29 +0000 |
---|---|---|
committer | Eugene Zelenko <eugene.zelenko@gmail.com> | 2017-09-01 21:37:29 +0000 |
commit | 75075efe5e8193afef83365f50980d773d150662 (patch) | |
tree | 9af419b6b24c37c18b1019e76825fcb82fc77078 /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | |
parent | 924f20262ba939ca4e14b2cba59cc9c95a9c5e09 (diff) | |
download | bcm5719-llvm-75075efe5e8193afef83365f50980d773d150662.tar.gz bcm5719-llvm-75075efe5e8193afef83365f50980d773d150662.zip |
[Analysis, Transforms] Fix some Clang-tidy modernize and Include What You Use warnings; other minor fixes (NFC).
llvm-svn: 312383
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 233 |
1 files changed, 143 insertions, 90 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 147860db400..b147445d716 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15,38 +15,86 @@ // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. // //===----------------------------------------------------------------------===// + #include "llvm/Transforms/Vectorize/SLPVectorizer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Vectorize.h" #include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> #include <memory> +#include <set> +#include <string> +#include <tuple> +#include <utility> +#include <vector> using namespace llvm; using namespace llvm::PatternMatch; @@ -382,7 +430,6 @@ static bool matchExtractIndex(Instruction *E, unsigned Idx, unsigned Opcode) { /// possible scalar operand in vectorized instruction. static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, TargetLibraryInfo *TLI) { - unsigned Opcode = UserInst->getOpcode(); switch (Opcode) { case Instruction::Load: { @@ -427,24 +474,25 @@ static bool isSimple(Instruction *I) { } namespace llvm { + namespace slpvectorizer { + /// Bottom Up SLP Vectorizer. class BoUpSLP { public: - typedef SmallVector<Value *, 8> ValueList; - typedef SmallVector<Instruction *, 16> InstrList; - typedef SmallPtrSet<Value *, 16> ValueSet; - typedef SmallVector<StoreInst *, 8> StoreList; - typedef MapVector<Value *, SmallVector<Instruction *, 2>> - ExtraValueToDebugLocsMap; + using ValueList = SmallVector<Value *, 8>; + using InstrList = SmallVector<Instruction *, 16>; + using ValueSet = SmallPtrSet<Value *, 16>; + using StoreList = SmallVector<StoreInst *, 8>; + using ExtraValueToDebugLocsMap = + MapVector<Value *, SmallVector<Instruction *, 2>>; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, const DataLayout *DL, OptimizationRemarkEmitter *ORE) - : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), - SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB), - DL(DL), ORE(ORE), Builder(Se->getContext()) { + : F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), + DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); // Use the vector register size specified by the target unless overridden // by a command-line option. @@ -466,6 +514,7 @@ public: /// \brief Vectorize the tree that starts with the elements in \p VL. /// Returns the vectorized root. Value *vectorizeTree(); + /// Vectorize the tree but with the list of externally used values \p /// ExternallyUsedValues. Values in this MapVector can be replaced but the /// generated extractvalue instructions. @@ -483,6 +532,7 @@ public: /// the purpose of scheduling and extraction in the \p UserIgnoreLst. void buildTree(ArrayRef<Value *> Roots, ArrayRef<Value *> UserIgnoreLst = None); + /// Construct a vectorizable tree that starts at \p Roots, ignoring users for /// the purpose of scheduling and extraction in the \p UserIgnoreLst taking /// into account (anf updating it, if required) list of externally used @@ -599,15 +649,14 @@ private: void reorderAltShuffleOperands(unsigned Opcode, ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left, SmallVectorImpl<Value *> &Right); + /// \reorder commutative operands to get better probability of /// generating vectorized code. void reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left, SmallVectorImpl<Value *> &Right); struct TreeEntry { - TreeEntry(std::vector<TreeEntry> &Container) - : Scalars(), VectorizedValue(nullptr), NeedToGather(0), - Container(Container) {} + TreeEntry(std::vector<TreeEntry> &Container) : Container(Container) {} /// \returns true if the scalars in VL are equal to this entry. bool isSame(ArrayRef<Value *> VL) const { @@ -619,10 +668,10 @@ private: ValueList Scalars; /// The Scalars are vectorized into this value. It is initialized to Null. - Value *VectorizedValue; + Value *VectorizedValue = nullptr; /// Do we need to gather this sequence ? - bool NeedToGather; + bool NeedToGather = false; /// Points back to the VectorizableTree. /// @@ -686,16 +735,19 @@ private: /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { - ExternalUser (Value *S, llvm::User *U, int L) : - Scalar(S), User(U), Lane(L){} + ExternalUser(Value *S, llvm::User *U, int L) + : Scalar(S), User(U), Lane(L) {} + // Which scalar in our function. Value *Scalar; + // Which user that uses the scalar. llvm::User *User; + // Which lane does the scalar belong to. int Lane; }; - typedef SmallVector<ExternalUser, 16> UserList; + using UserList = SmallVector<ExternalUser, 16>; /// Checks if two instructions may access the same memory. /// @@ -703,7 +755,6 @@ private: /// is invariant in the calling loop. bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1, Instruction *Inst2) { - // First check if the result is already in the cache. AliasCacheKey key = std::make_pair(Inst1, Inst2); Optional<bool> &result = AliasCache[key]; @@ -721,7 +772,7 @@ private: return aliased; } - typedef std::pair<Instruction *, Instruction *> AliasCacheKey; + using AliasCacheKey = std::pair<Instruction *, Instruction *>; /// Cache for alias results. /// TODO: consider moving this to the AliasAnalysis itself. @@ -754,6 +805,7 @@ private: /// Holds all of the instructions that we gathered. SetVector<Instruction *> GatherSeq; + /// A list of blocks that we are going to CSE. SetVector<BasicBlock *> CSEBlocks; @@ -762,17 +814,11 @@ private: /// instruction bundle (= a group of instructions which is combined into a /// vector instruction). struct ScheduleData { - // The initial value for the dependency counters. It means that the // dependencies are not calculated yet. enum { InvalidDeps = -1 }; - ScheduleData() - : Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr), - NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0), - Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps), - UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false), - OpValue(nullptr) {} + ScheduleData() = default; void init(int BlockSchedulingRegionID, Value *OpVal) { FirstInBundle = this; @@ -842,19 +888,19 @@ private: } } - Instruction *Inst; + Instruction *Inst = nullptr; /// Points to the head in an instruction bundle (and always to this for /// single instructions). - ScheduleData *FirstInBundle; + ScheduleData *FirstInBundle = nullptr; /// Single linked list of all instructions in a bundle. Null if it is a /// single instruction. - ScheduleData *NextInBundle; + ScheduleData *NextInBundle = nullptr; /// Single linked list of all memory instructions (e.g. load, store, call) /// in the block - until the end of the scheduling region. - ScheduleData *NextLoadStore; + ScheduleData *NextLoadStore = nullptr; /// The dependent memory instructions. /// This list is derived on demand in calculateDependencies(). @@ -862,34 +908,33 @@ private: /// This ScheduleData is in the current scheduling region if this matches /// the current SchedulingRegionID of BlockScheduling. - int SchedulingRegionID; + int SchedulingRegionID = 0; /// Used for getting a "good" final ordering of instructions. - int SchedulingPriority; + int SchedulingPriority = 0; /// The number of dependencies. Constitutes of the number of users of the /// instruction plus the number of dependent memory instructions (if any). /// This value is calculated on demand. /// If InvalidDeps, the number of dependencies is not calculated yet. - /// - int Dependencies; + int Dependencies = InvalidDeps; /// The number of dependencies minus the number of dependencies of scheduled /// instructions. As soon as this is zero, the instruction/bundle gets ready /// for scheduling. /// Note that this is negative as long as Dependencies is not calculated. - int UnscheduledDeps; + int UnscheduledDeps = InvalidDeps; /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for /// single instructions. - int UnscheduledDepsInBundle; + int UnscheduledDepsInBundle = InvalidDeps; /// True if this instruction is scheduled (or considered as scheduled in the /// dry-run). - bool IsScheduled; + bool IsScheduled = false; /// Opcode of the current instruction in the schedule data. - Value *OpValue; + Value *OpValue = nullptr; }; #ifndef NDEBUG @@ -903,18 +948,9 @@ private: friend struct DOTGraphTraits<BoUpSLP *>; /// Contains all scheduling data for a basic block. - /// struct BlockScheduling { - BlockScheduling(BasicBlock *BB) - : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize), - ScheduleStart(nullptr), ScheduleEnd(nullptr), - FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr), - ScheduleRegionSize(0), - ScheduleRegionSizeLimit(ScheduleRegionSizeBudget), - // Make sure that the initial SchedulingRegionID is greater than the - // initial SchedulingRegionID in ScheduleData (which is 0). - SchedulingRegionID(1) {} + : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {} void clear() { ReadyInsts.clear(); @@ -1090,28 +1126,30 @@ private: ReadyList ReadyInsts; /// The first instruction of the scheduling region. - Instruction *ScheduleStart; + Instruction *ScheduleStart = nullptr; /// The first instruction _after_ the scheduling region. - Instruction *ScheduleEnd; + Instruction *ScheduleEnd = nullptr; /// The first memory accessing instruction in the scheduling region /// (can be null). - ScheduleData *FirstLoadStoreInRegion; + ScheduleData *FirstLoadStoreInRegion = nullptr; /// The last memory accessing instruction in the scheduling region /// (can be null). - ScheduleData *LastLoadStoreInRegion; + ScheduleData *LastLoadStoreInRegion = nullptr; /// The current size of the scheduling region. - int ScheduleRegionSize; + int ScheduleRegionSize = 0; /// The maximum size allowed for the scheduling region. - int ScheduleRegionSizeLimit; + int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget; /// The ID of the scheduling region. For a new vectorization iteration this /// is incremented which "removes" all ScheduleData from the region. - int SchedulingRegionID; + int SchedulingRegionID = 1; + // Make sure that the initial SchedulingRegionID is greater than the + // initial SchedulingRegionID in ScheduleData (which is 0). }; /// Attaches the BlockScheduling structures to basic blocks. @@ -1125,10 +1163,10 @@ private: ArrayRef<Value *> UserIgnoreList; // Number of load bundles that contain consecutive loads. - int NumLoadsWantToKeepOrder; + int NumLoadsWantToKeepOrder = 0; // Number of load bundles that contain consecutive loads in reversed order. - int NumLoadsWantToChangeOrder; + int NumLoadsWantToChangeOrder = 0; // Analysis and block reference. Function *F; @@ -1155,20 +1193,20 @@ private: /// original width. MapVector<Value *, std::pair<uint64_t, bool>> MinBWs; }; + } // end namespace slpvectorizer template <> struct GraphTraits<BoUpSLP *> { - typedef BoUpSLP::TreeEntry TreeEntry; + using TreeEntry = BoUpSLP::TreeEntry; /// NodeRef has to be a pointer per the GraphWriter. - typedef TreeEntry *NodeRef; + using NodeRef = TreeEntry *; /// \brief Add the VectorizableTree to the index iterator to be able to return /// TreeEntry pointers. struct ChildIteratorType : public iterator_adaptor_base<ChildIteratorType, SmallVector<int, 1>::iterator> { - std::vector<TreeEntry> &VectorizableTree; ChildIteratorType(SmallVector<int, 1>::iterator W, @@ -1183,17 +1221,19 @@ template <> struct GraphTraits<BoUpSLP *> { static ChildIteratorType child_begin(NodeRef N) { return {N->UserTreeIndices.begin(), N->Container}; } + static ChildIteratorType child_end(NodeRef N) { return {N->UserTreeIndices.end(), N->Container}; } /// For the node iterator we just need to turn the TreeEntry iterator into a /// TreeEntry* iterator so that it dereferences to NodeRef. - typedef pointer_iterator<std::vector<TreeEntry>::iterator> nodes_iterator; + using nodes_iterator = pointer_iterator<std::vector<TreeEntry>::iterator>; static nodes_iterator nodes_begin(BoUpSLP *R) { return nodes_iterator(R->VectorizableTree.begin()); } + static nodes_iterator nodes_end(BoUpSLP *R) { return nodes_iterator(R->VectorizableTree.end()); } @@ -1202,7 +1242,7 @@ template <> struct GraphTraits<BoUpSLP *> { }; template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits { - typedef BoUpSLP::TreeEntry TreeEntry; + using TreeEntry = BoUpSLP::TreeEntry; DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} @@ -1239,6 +1279,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ExtraValueToDebugLocsMap ExternallyUsedValues; buildTree(Roots, ExternallyUsedValues, UserIgnoreLst); } + void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ExtraValueToDebugLocsMap &ExternallyUsedValues, ArrayRef<Value *> UserIgnoreLst) { @@ -1627,7 +1668,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, case Instruction::AShr: case Instruction::And: case Instruction::Or: - case Instruction::Xor: { + case Instruction::Xor: newTreeEntry(VL, true, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); @@ -1650,7 +1691,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; - } + case Instruction::GetElementPtr: { // We don't combine GEPs with complicated (nested) indexing. for (unsigned j = 0; j < VL.size(); ++j) { @@ -1784,7 +1825,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, } return; } - case Instruction::ShuffleVector: { + case Instruction::ShuffleVector: // If this is not an alternate sequence of opcode like add-sub // then do not vectorize this instruction. if (!isAltShuffle) { @@ -1814,7 +1855,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; - } + default: BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); @@ -1942,11 +1983,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); Instruction *VL0 = cast<Instruction>(VL[0]); switch (Opcode) { - case Instruction::PHI: { + case Instruction::PHI: return 0; - } + case Instruction::ExtractValue: - case Instruction::ExtractElement: { + case Instruction::ExtractElement: if (canReuseExtract(VL, VL0)) { int DeadCost = 0; for (unsigned i = 0, e = VL.size(); i < e; ++i) { @@ -1962,7 +2003,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return -DeadCost; } return getGatherCost(VecTy); - } + case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -2173,7 +2214,6 @@ bool BoUpSLP::isFullyVectorizableTinyTree() { } bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() { - // We can vectorize the tree if its size is greater than or equal to the // minimum size specified by the MinTreeSize command line option. if (VectorizableTree.size() >= MinTreeSize) @@ -2465,8 +2505,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left, SmallVectorImpl<Value *> &Right) { - - if (VL.size()) { + if (!VL.empty()) { // Peel the first iteration out of the loop since there's nothing // interesting to do anyway and it simplifies the checks in the loop. auto *I = cast<Instruction>(VL[0]); @@ -2556,14 +2595,13 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode, } void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL, Value *OpValue) { - // Get the basic block this bundle is in. All instructions in the bundle // should be in this block. auto *Front = cast<Instruction>(OpValue); auto *BB = Front->getParent(); const unsigned Opcode = cast<Instruction>(OpValue)->getOpcode(); const unsigned AltOpcode = getAltOpcode(Opcode); - assert(all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { + assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { return !sameOpcodeOrAlt(Opcode, AltOpcode, cast<Instruction>(V)->getOpcode()) || cast<Instruction>(V)->getParent() == BB; @@ -3082,7 +3120,6 @@ Value *BoUpSLP::vectorizeTree() { Value * BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { - // All blocks must be scheduled before any instructions are inserted. for (auto &BSIter : BlocksSchedules) { scheduleBlock(BSIter.second.get()); @@ -3482,7 +3519,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, BasicBlock::reverse_iterator UpperEnd = BB->rend(); BasicBlock::iterator DownIter = ScheduleEnd->getIterator(); BasicBlock::iterator LowerEnd = BB->end(); - for (;;) { + while (true) { if (++ScheduleRegionSize > ScheduleRegionSizeLimit) { DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n"); return false; @@ -3696,7 +3733,6 @@ void BoUpSLP::BlockScheduling::resetSchedule() { } void BoUpSLP::scheduleBlock(BlockScheduling *BS) { - if (!BS->ScheduleStart) return; @@ -3828,7 +3864,6 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr, SmallVectorImpl<Value *> &ToDemote, SmallVectorImpl<Value *> &Roots) { - // We can always demote constants. if (isa<Constant>(V)) { ToDemote.push_back(V); @@ -3971,7 +4006,7 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. - IsKnownPositive = all_of(TreeRoot, [&](Value *R) { + IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) { KnownBits Known = computeKnownBits(R, *DL); return Known.isNonNegative(); }); @@ -3979,7 +4014,7 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine the maximum number of bits required to store the scalar // values. for (auto *Scalar : ToDemote) { - auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT); + auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT); auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType()); MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth); } @@ -4024,6 +4059,7 @@ void BoUpSLP::computeMinimumValueSizes() { } namespace { + /// The SLPVectorizer Pass. struct SLPVectorizer : public FunctionPass { SLPVectorizerPass Impl; @@ -4035,7 +4071,6 @@ struct SLPVectorizer : public FunctionPass { initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); } - bool doInitialization(Module &M) override { return false; } @@ -4075,6 +4110,7 @@ struct SLPVectorizer : public FunctionPass { AU.setPreservesCFG(); } }; + } // end anonymous namespace PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) { @@ -4221,7 +4257,9 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); + using namespace ore; + R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized", cast<StoreInst>(Chain[i])) << "Stores SLP vectorized with cost " << NV("Cost", Cost) @@ -4310,7 +4348,6 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, } void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { - // Initialize the collections. We will make a single pass over the block. Stores.clear(); GEPs.clear(); @@ -4319,7 +4356,6 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { // Stores and GEPs according to the underlying objects of their pointer // operands. for (Instruction &I : *BB) { - // Ignore store instructions that are volatile or have a pointer operand // that doesn't point to a scalar type. if (auto *SI = dyn_cast<StoreInst>(&I)) { @@ -4557,6 +4593,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, } namespace { + /// Model horizontal reductions. /// /// A horizontal reduction is a tree of reduction operations (currently add and @@ -4594,10 +4631,13 @@ class HorizontalReduction { struct OperationData { /// true if the operation is a reduced value, false if reduction operation. bool IsReducedValue = false; + /// Opcode of the instruction. unsigned Opcode = 0; + /// Left operand of the reduction operation. Value *LHS = nullptr; + /// Right operand of the reduction operation. Value *RHS = nullptr; @@ -4610,40 +4650,48 @@ class HorizontalReduction { public: explicit OperationData() = default; + /// Construction for reduced values. They are identified by opcode only and /// don't have associated LHS/RHS values. explicit OperationData(Value *V) : IsReducedValue(true) { if (auto *I = dyn_cast<Instruction>(V)) Opcode = I->getOpcode(); } + /// Constructor for binary reduction operations with opcode and its left and /// right operands. OperationData(unsigned Opcode, Value *LHS, Value *RHS) - : IsReducedValue(false), Opcode(Opcode), LHS(LHS), RHS(RHS) {} + : Opcode(Opcode), LHS(LHS), RHS(RHS) {} + explicit operator bool() const { return Opcode; } + /// Get the index of the first operand. unsigned getFirstOperandIndex() const { assert(!!*this && "The opcode is not set."); return 0; } + /// Total number of operands in the reduction operation. unsigned getNumberOfOperands() const { assert(!IsReducedValue && !!*this && LHS && RHS && "Expected reduction operation."); return 2; } + /// Expected number of uses for reduction operations/reduced values. unsigned getRequiredNumberOfUses() const { assert(!IsReducedValue && !!*this && LHS && RHS && "Expected reduction operation."); return 1; } + /// Checks if instruction is associative and can be vectorized. bool isAssociative(Instruction *I) const { assert(!IsReducedValue && *this && LHS && RHS && "Expected reduction operation."); return I->isAssociative(); } + /// Checks if the reduction operation can be vectorized. bool isVectorizable(Instruction *I) const { return isVectorizable() && isAssociative(I); @@ -4665,13 +4713,16 @@ class HorizontalReduction { LHS = nullptr; RHS = nullptr; } + /// Get the opcode of the reduction operation. unsigned getOpcode() const { assert(isVectorizable() && "Expected vectorizable operation."); return Opcode; } + Value *getLHS() const { return LHS; } Value *getRHS() const { return RHS; } + /// Creates reduction operation with the current opcode. Value *createOp(IRBuilder<> &Builder, const Twine &Name = "") const { assert(!IsReducedValue && @@ -4686,8 +4737,10 @@ class HorizontalReduction { /// The operation data of the reduction operation. OperationData ReductionData; + /// The operation data of the values we perform a reduction on. OperationData ReducedValueData; + /// Should we model this reduction as a pairwise reduction tree or a tree that /// splits the vector in halves and adds those halves. bool IsPairwiseReduction = false; @@ -5018,6 +5071,7 @@ private: return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); } }; + } // end anonymous namespace /// \brief Recognize construction of vectors like @@ -5425,7 +5479,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { auto Changed = false; for (auto &Entry : GEPs) { - // If the getelementptr list has fewer than two elements, there's nothing // to do. if (Entry.second.size() < 2) @@ -5530,7 +5583,9 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { } char SLPVectorizer::ID = 0; + static const char lv_name[] = "SLP Vectorizer"; + INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) @@ -5541,6 +5596,4 @@ INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) -namespace llvm { -Pass *createSLPVectorizerPass() { return new SLPVectorizer(); } -} +Pass *llvm::createSLPVectorizerPass() { return new SLPVectorizer(); } |