diff options
author | Sean Silva <chisophugis@gmail.com> | 2016-07-09 22:56:50 +0000 |
---|---|---|
committer | Sean Silva <chisophugis@gmail.com> | 2016-07-09 22:56:50 +0000 |
commit | db90d4d9c1b8fb7400fd328c81dcd147064cb6e2 (patch) | |
tree | 0f2dd310300926368bfa0a87e94ee23f1362f1a7 | |
parent | bf7103543815222234271dc80e05b9ef372e76f1 (diff) | |
download | bcm5719-llvm-db90d4d9c1b8fb7400fd328c81dcd147064cb6e2.tar.gz bcm5719-llvm-db90d4d9c1b8fb7400fd328c81dcd147064cb6e2.zip |
[PM] Port LoopVectorize to the new PM.
llvm-svn: 275000
-rw-r--r-- | llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h | 103 | ||||
-rw-r--r-- | llvm/lib/Passes/PassBuilder.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Passes/PassRegistry.def | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 709 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/conditional-assignment.ll | 1 |
5 files changed, 477 insertions, 338 deletions
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h new file mode 100644 index 00000000000..e6d3e835330 --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -0,0 +1,103 @@ +//===---- LoopVectorize.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops +// and generates target-independent LLVM-IR. +// The vectorizer uses the TargetTransformInfo analysis to estimate the costs +// of instructions in order to estimate the profitability of vectorization. +// +// The loop vectorizer combines consecutive loop iterations into a single +// 'wide' iteration. After this transformation the index is incremented +// by the SIMD vector width, and not by one. +// +// This pass has three parts: +// 1. The main loop pass that drives the different parts. +// 2. LoopVectorizationLegality - A unit that checks for the legality +// of the vectorization. +// 3. InnerLoopVectorizer - A unit that performs the actual +// widening of instructions. +// 4. LoopVectorizationCostModel - A unit that checks for the profitability +// of vectorization. It decides on the optimal vector width, which +// can be one, if vectorization is not profitable. +// +//===----------------------------------------------------------------------===// +// +// The reduction-variable vectorization is based on the paper: +// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. +// +// Variable uniformity checks are inspired by: +// Karrenberg, R. and Hack, S. Whole Function Vectorization. +// +// The interleaved access vectorization is based on the paper: +// Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved +// Data for SIMD +// +// Other ideas/concepts are from: +// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. +// +// S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of +// Vectorizing Compilers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H +#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPassManager.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include <functional> + +namespace llvm { + +/// The LoopVectorize Pass. +struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> { + bool DisableUnrolling = false; + /// If true, consider all loops for vectorization. + /// If false, only loops that explicitly request vectorization are + /// considered. + bool AlwaysVectorize = true; + + ScalarEvolution *SE; + LoopInfo *LI; + TargetTransformInfo *TTI; + DominatorTree *DT; + BlockFrequencyInfo *BFI; + TargetLibraryInfo *TLI; + DemandedBits *DB; + AliasAnalysis *AA; + AssumptionCache *AC; + std::function<const LoopAccessInfo &(Loop &)> *GetLAA; + + BlockFrequency ColdEntryFreq; + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Shim for old PM. + bool runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_, + TargetTransformInfo &TTI_, DominatorTree &DT_, + BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, + DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, + std::function<const LoopAccessInfo &(Loop &)> &GetLAA_); + + bool processLoop(Loop *L); +}; +} + +#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 991ece34607..8292214c17f 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -106,6 +106,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MemorySSA.h" #include "llvm/Transforms/Utils/SimplifyInstructions.h" +#include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include <type_traits> diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index ec3c206c54f..10cbf96284f 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -148,6 +148,7 @@ FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) +FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs())) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4808413cd40..797111276ed 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -46,6 +46,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" @@ -55,21 +56,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Constants.h" @@ -101,7 +94,6 @@ #include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Transforms/Vectorize.h" #include <algorithm> -#include <functional> #include <map> #include <tuple> @@ -1335,15 +1327,14 @@ static void emitMissedWarning(Function *F, Loop *L, /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: - LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, - DominatorTree *DT, TargetLibraryInfo *TLI, - AliasAnalysis *AA, Function *F, - const TargetTransformInfo *TTI, - LoopAccessLegacyAnalysis *LAA, LoopInfo *LI, - LoopVectorizationRequirements *R, - LoopVectorizeHints *H) + LoopVectorizationLegality( + Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, + TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, + const TargetTransformInfo *TTI, + std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI, + LoopVectorizationRequirements *R, LoopVectorizeHints *H) : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F), - TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), + TTI(TTI), DT(DT), GetLAA(GetLAA), LAI(nullptr), InterleaveInfo(PSE, L, DT, LI), Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), Requirements(R), Hints(H) {} @@ -1536,7 +1527,7 @@ private: /// Dominator Tree. DominatorTree *DT; // LoopAccess analysis. - LoopAccessLegacyAnalysis *LAA; + std::function<const LoopAccessInfo &(Loop &)> *GetLAA; // And the loop-accesses info corresponding to this loop. This pointer is // null until canVectorizeMemory sets it up. const LoopAccessInfo *LAI; @@ -1788,336 +1779,35 @@ struct LoopVectorize : public FunctionPass { static char ID; explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true) - : FunctionPass(ID), DisableUnrolling(NoUnrolling), - AlwaysVectorize(AlwaysVectorize) { + : FunctionPass(ID) { + Impl.DisableUnrolling = NoUnrolling; + Impl.AlwaysVectorize = AlwaysVectorize; initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } - ScalarEvolution *SE; - LoopInfo *LI; - TargetTransformInfo *TTI; - DominatorTree *DT; - BlockFrequencyInfo *BFI; - TargetLibraryInfo *TLI; - DemandedBits *DB; - AliasAnalysis *AA; - AssumptionCache *AC; - LoopAccessLegacyAnalysis *LAA; - bool DisableUnrolling; - bool AlwaysVectorize; - - BlockFrequency ColdEntryFreq; + LoopVectorizePass Impl; bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; - SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); - DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits(); - - // Compute some weights outside of the loop over the loops. Compute this - // using a BranchProbability to re-use its scaling math. - const BranchProbability ColdProb(1, 5); // 20% - ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; - - // Don't attempt if - // 1. the target claims to have no vector registers, and - // 2. interleaving won't help ILP. - // - // The second condition is necessary because, even if the target has no - // vector registers, loop vectorization may still enable scalar - // interleaving. - if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2) - return false; - - // Build up a worklist of inner-loops to vectorize. This is necessary as - // the act of vectorizing or partially unrolling a loop creates new loops - // and can invalidate iterators across the loops. - SmallVector<Loop *, 8> Worklist; - - for (Loop *L : *LI) - addInnerLoop(*L, Worklist); - - LoopsAnalyzed += Worklist.size(); - - // Now walk the identified inner loops. - bool Changed = false; - while (!Worklist.empty()) - Changed |= processLoop(Worklist.pop_back_val()); - - // Process each loop nest in the function. - return Changed; - } - - static void AddRuntimeUnrollDisableMetaData(Loop *L) { - SmallVector<Metadata *, 4> MDs; - // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); - bool IsUnrollMetadata = false; - MDNode *LoopID = L->getLoopID(); - if (LoopID) { - // First find existing loop unrolling disable metadata. - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); - if (MD) { - const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); - IsUnrollMetadata = - S && S->getString().startswith("llvm.loop.unroll.disable"); - } - MDs.push_back(LoopID->getOperand(i)); - } - } + auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); + auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits(); - if (!IsUnrollMetadata) { - // Add runtime unroll disable metadata. - LLVMContext &Context = L->getHeader()->getContext(); - SmallVector<Metadata *, 1> DisableOperands; - DisableOperands.push_back( - MDString::get(Context, "llvm.loop.unroll.runtime.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - L->setLoopID(NewLoopID); - } - } - - bool processLoop(Loop *L) { - assert(L->empty() && "Only process inner loops."); - -#ifndef NDEBUG - const std::string DebugLocStr = getDebugLocString(L); -#endif /* NDEBUG */ - - DEBUG(dbgs() << "\nLV: Checking a loop in \"" - << L->getHeader()->getParent()->getName() << "\" from " - << DebugLocStr << "\n"); - - LoopVectorizeHints Hints(L, DisableUnrolling); - - DEBUG(dbgs() << "LV: Loop hints:" - << " force=" - << (Hints.getForce() == LoopVectorizeHints::FK_Disabled - ? "disabled" - : (Hints.getForce() == LoopVectorizeHints::FK_Enabled - ? "enabled" - : "?")) - << " width=" << Hints.getWidth() - << " unroll=" << Hints.getInterleave() << "\n"); - - // Function containing loop - Function *F = L->getHeader()->getParent(); - - // Looking at the diagnostic output is the only way to determine if a loop - // was vectorized (other than looking at the IR or machine code), so it - // is important to generate an optimization remark for each loop. Most of - // these messages are generated by emitOptimizationRemarkAnalysis. Remarks - // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are - // less verbose reporting vectorized loops and unvectorized loops that may - // benefit from vectorization, respectively. - - if (!Hints.allowVectorization(F, L, AlwaysVectorize)) { - DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); - return false; - } - - // Check the loop for a trip count threshold: - // do not vectorize loops with a tiny trip count. - const unsigned TC = SE->getSmallConstantTripCount(L); - if (TC > 0u && TC < TinyTripCountVectorThreshold) { - DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " - << "This loop is not worth vectorizing."); - if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) - DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); - else { - DEBUG(dbgs() << "\n"); - emitAnalysisDiag(F, L, Hints, VectorizationReport() - << "vectorization is not beneficial " - "and is not explicitly forced"); - return false; - } - } + std::function<const LoopAccessInfo &(Loop &)> GetLAA = + [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; - PredicatedScalarEvolution PSE(*SE, *L); - - // Check if it is legal to vectorize the loop. - LoopVectorizationRequirements Requirements; - LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA, LI, - &Requirements, &Hints); - if (!LVL.canVectorize()) { - DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); - emitMissedWarning(F, L, Hints); - return false; - } - - // Use the cost model. - LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F, - &Hints); - CM.collectValuesToIgnore(); - - // Check the function attributes to find out if this function should be - // optimized for size. - bool OptForSize = - Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize(); - - // Compute the weighted frequency of this loop being executed and see if it - // is less than 20% of the function entry baseline frequency. Note that we - // always have a canonical loop here because we think we *can* vectorize. - // FIXME: This is hidden behind a flag due to pervasive problems with - // exactly what block frequency models. - if (LoopVectorizeWithBlockFrequency) { - BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); - if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && - LoopEntryFreq < ColdEntryFreq) - OptForSize = true; - } - - // Check the function attributes to see if implicit floats are allowed. - // FIXME: This check doesn't seem possibly correct -- what if the loop is - // an integer loop and the vector instructions selected are purely integer - // vector instructions? - if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { - DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" - "attribute is used.\n"); - emitAnalysisDiag( - F, L, Hints, - VectorizationReport() - << "loop not vectorized due to NoImplicitFloat attribute"); - emitMissedWarning(F, L, Hints); - return false; - } - - // Check if the target supports potentially unsafe FP vectorization. - // FIXME: Add a check for the type of safety issue (denormal, signaling) - // for the target we're vectorizing for, to make sure none of the - // additional fp-math flags can help. - if (Hints.isPotentiallyUnsafe() && - TTI->isFPVectorizationPotentiallyUnsafe()) { - DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); - emitAnalysisDiag(F, L, Hints, - VectorizationReport() - << "loop not vectorized due to unsafe FP support."); - emitMissedWarning(F, L, Hints); - return false; - } - - // Select the optimal vectorization factor. - const LoopVectorizationCostModel::VectorizationFactor VF = - CM.selectVectorizationFactor(OptForSize); - - // Select the interleave count. - unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); - - // Get user interleave count. - unsigned UserIC = Hints.getInterleave(); - - // Identify the diagnostic messages that should be produced. - std::string VecDiagMsg, IntDiagMsg; - bool VectorizeLoop = true, InterleaveLoop = true; - - if (Requirements.doesNotMeet(F, L, Hints)) { - DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " - "requirements.\n"); - emitMissedWarning(F, L, Hints); - return false; - } - - if (VF.Width == 1) { - DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); - VecDiagMsg = - "the cost-model indicates that vectorization is not beneficial"; - VectorizeLoop = false; - } - - if (IC == 1 && UserIC <= 1) { - // Tell the user interleaving is not beneficial. - DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); - IntDiagMsg = - "the cost-model indicates that interleaving is not beneficial"; - InterleaveLoop = false; - if (UserIC == 1) - IntDiagMsg += - " and is explicitly disabled or interleave count is set to 1"; - } else if (IC > 1 && UserIC == 1) { - // Tell the user interleaving is beneficial, but it explicitly disabled. - DEBUG(dbgs() - << "LV: Interleaving is beneficial but is explicitly disabled."); - IntDiagMsg = "the cost-model indicates that interleaving is beneficial " - "but is explicitly disabled or interleave count is set to 1"; - InterleaveLoop = false; - } - - // Override IC if user provided an interleave count. - IC = UserIC > 0 ? UserIC : IC; - - // Emit diagnostic messages, if any. - const char *VAPassName = Hints.vectorizeAnalysisPassName(); - if (!VectorizeLoop && !InterleaveLoop) { - // Do not vectorize or interleaving the loop. - emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, - L->getStartLoc(), VecDiagMsg); - emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, - L->getStartLoc(), IntDiagMsg); - return false; - } else if (!VectorizeLoop && InterleaveLoop) { - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); - emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, - L->getStartLoc(), VecDiagMsg); - } else if (VectorizeLoop && !InterleaveLoop) { - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); - emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, - L->getStartLoc(), IntDiagMsg); - } else if (VectorizeLoop && InterleaveLoop) { - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); - } - - if (!VectorizeLoop) { - assert(IC > 1 && "interleave count should not be 1 or 0"); - // If we decided that it is not legal to vectorize the loop, then - // interleave it. - InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC); - Unroller.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore); - - emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), - Twine("interleaved loop (interleaved count: ") + - Twine(IC) + ")"); - } else { - // If we decided that it is *legal* to vectorize the loop, then do it. - InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC); - LB.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore); - ++LoopsVectorized; - - // Add metadata to disable runtime unrolling a scalar loop when there are - // no runtime checks about strides and memory. A scalar loop that is - // rarely used is not worth unrolling. - if (!LB.areSafetyChecksAdded()) - AddRuntimeUnrollDisableMetaData(L); - - // Report the vectorization decision. - emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), - Twine("vectorized loop (vectorization width: ") + - Twine(VF.Width) + ", interleaved count: " + - Twine(IC) + ")"); - } - - // Mark the loop as already vectorized to avoid vectorizing again. - Hints.setAlreadyVectorized(); - - DEBUG(verifyFunction(*L->getHeader()->getParent())); - return true; + return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, + GetLAA); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -5126,7 +4816,7 @@ void LoopVectorizationLegality::collectLoopUniforms() { } bool LoopVectorizationLegality::canVectorizeMemory() { - LAI = &LAA->getInfo(TheLoop); + LAI = &(*GetLAA)(*TheLoop); InterleaveInfo.setLAI(LAI); auto &OptionalReport = LAI->getReport(); if (OptionalReport) @@ -6663,3 +6353,346 @@ Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) { Constant *C = ConstantInt::get(ITy, StartIdx); return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction"); } + +static void AddRuntimeUnrollDisableMetaData(Loop *L) { + SmallVector<Metadata *, 4> MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + bool IsUnrollMetadata = false; + MDNode *LoopID = L->getLoopID(); + if (LoopID) { + // First find existing loop unrolling disable metadata. + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (MD) { + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + IsUnrollMetadata = + S && S->getString().startswith("llvm.loop.unroll.disable"); + } + MDs.push_back(LoopID->getOperand(i)); + } + } + + if (!IsUnrollMetadata) { + // Add runtime unroll disable metadata. + LLVMContext &Context = L->getHeader()->getContext(); + SmallVector<Metadata *, 1> DisableOperands; + DisableOperands.push_back( + MDString::get(Context, "llvm.loop.unroll.runtime.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + L->setLoopID(NewLoopID); + } +} + +bool LoopVectorizePass::processLoop(Loop *L) { + assert(L->empty() && "Only process inner loops."); + +#ifndef NDEBUG + const std::string DebugLocStr = getDebugLocString(L); +#endif /* NDEBUG */ + + DEBUG(dbgs() << "\nLV: Checking a loop in \"" + << L->getHeader()->getParent()->getName() << "\" from " + << DebugLocStr << "\n"); + + LoopVectorizeHints Hints(L, DisableUnrolling); + + DEBUG(dbgs() << "LV: Loop hints:" + << " force=" + << (Hints.getForce() == LoopVectorizeHints::FK_Disabled + ? "disabled" + : (Hints.getForce() == LoopVectorizeHints::FK_Enabled + ? "enabled" + : "?")) + << " width=" << Hints.getWidth() + << " unroll=" << Hints.getInterleave() << "\n"); + + // Function containing loop + Function *F = L->getHeader()->getParent(); + + // Looking at the diagnostic output is the only way to determine if a loop + // was vectorized (other than looking at the IR or machine code), so it + // is important to generate an optimization remark for each loop. Most of + // these messages are generated by emitOptimizationRemarkAnalysis. Remarks + // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are + // less verbose reporting vectorized loops and unvectorized loops that may + // benefit from vectorization, respectively. + + if (!Hints.allowVectorization(F, L, AlwaysVectorize)) { + DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); + return false; + } + + // Check the loop for a trip count threshold: + // do not vectorize loops with a tiny trip count. + const unsigned TC = SE->getSmallConstantTripCount(L); + if (TC > 0u && TC < TinyTripCountVectorThreshold) { + DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " + << "This loop is not worth vectorizing."); + if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) + DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); + else { + DEBUG(dbgs() << "\n"); + emitAnalysisDiag(F, L, Hints, VectorizationReport() + << "vectorization is not beneficial " + "and is not explicitly forced"); + return false; + } + } + + PredicatedScalarEvolution PSE(*SE, *L); + + // Check if it is legal to vectorize the loop. + LoopVectorizationRequirements Requirements; + LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, + &Requirements, &Hints); + if (!LVL.canVectorize()) { + DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); + emitMissedWarning(F, L, Hints); + return false; + } + + // Use the cost model. + LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F, + &Hints); + CM.collectValuesToIgnore(); + + // Check the function attributes to find out if this function should be + // optimized for size. + bool OptForSize = + Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize(); + + // Compute the weighted frequency of this loop being executed and see if it + // is less than 20% of the function entry baseline frequency. Note that we + // always have a canonical loop here because we think we *can* vectorize. + // FIXME: This is hidden behind a flag due to pervasive problems with + // exactly what block frequency models. + if (LoopVectorizeWithBlockFrequency) { + BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); + if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && + LoopEntryFreq < ColdEntryFreq) + OptForSize = true; + } + + // Check the function attributes to see if implicit floats are allowed. + // FIXME: This check doesn't seem possibly correct -- what if the loop is + // an integer loop and the vector instructions selected are purely integer + // vector instructions? + if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { + DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" + "attribute is used.\n"); + emitAnalysisDiag( + F, L, Hints, + VectorizationReport() + << "loop not vectorized due to NoImplicitFloat attribute"); + emitMissedWarning(F, L, Hints); + return false; + } + + // Check if the target supports potentially unsafe FP vectorization. + // FIXME: Add a check for the type of safety issue (denormal, signaling) + // for the target we're vectorizing for, to make sure none of the + // additional fp-math flags can help. + if (Hints.isPotentiallyUnsafe() && + TTI->isFPVectorizationPotentiallyUnsafe()) { + DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); + emitAnalysisDiag(F, L, Hints, + VectorizationReport() + << "loop not vectorized due to unsafe FP support."); + emitMissedWarning(F, L, Hints); + return false; + } + + // Select the optimal vectorization factor. + const LoopVectorizationCostModel::VectorizationFactor VF = + CM.selectVectorizationFactor(OptForSize); + + // Select the interleave count. + unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); + + // Get user interleave count. + unsigned UserIC = Hints.getInterleave(); + + // Identify the diagnostic messages that should be produced. + std::string VecDiagMsg, IntDiagMsg; + bool VectorizeLoop = true, InterleaveLoop = true; + + if (Requirements.doesNotMeet(F, L, Hints)) { + DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " + "requirements.\n"); + emitMissedWarning(F, L, Hints); + return false; + } + + if (VF.Width == 1) { + DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); + VecDiagMsg = + "the cost-model indicates that vectorization is not beneficial"; + VectorizeLoop = false; + } + + if (IC == 1 && UserIC <= 1) { + // Tell the user interleaving is not beneficial. + DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); + IntDiagMsg = + "the cost-model indicates that interleaving is not beneficial"; + InterleaveLoop = false; + if (UserIC == 1) + IntDiagMsg += + " and is explicitly disabled or interleave count is set to 1"; + } else if (IC > 1 && UserIC == 1) { + // Tell the user interleaving is beneficial, but it explicitly disabled. + DEBUG(dbgs() + << "LV: Interleaving is beneficial but is explicitly disabled."); + IntDiagMsg = "the cost-model indicates that interleaving is beneficial " + "but is explicitly disabled or interleave count is set to 1"; + InterleaveLoop = false; + } + + // Override IC if user provided an interleave count. + IC = UserIC > 0 ? UserIC : IC; + + // Emit diagnostic messages, if any. + const char *VAPassName = Hints.vectorizeAnalysisPassName(); + if (!VectorizeLoop && !InterleaveLoop) { + // Do not vectorize or interleaving the loop. + emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, + L->getStartLoc(), VecDiagMsg); + emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, + L->getStartLoc(), IntDiagMsg); + return false; + } else if (!VectorizeLoop && InterleaveLoop) { + DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, + L->getStartLoc(), VecDiagMsg); + } else if (VectorizeLoop && !InterleaveLoop) { + DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " + << DebugLocStr << '\n'); + emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, + L->getStartLoc(), IntDiagMsg); + } else if (VectorizeLoop && InterleaveLoop) { + DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " + << DebugLocStr << '\n'); + DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + } + + if (!VectorizeLoop) { + assert(IC > 1 && "interleave count should not be 1 or 0"); + // If we decided that it is not legal to vectorize the loop, then + // interleave it. + InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC); + Unroller.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore); + + emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), + Twine("interleaved loop (interleaved count: ") + + Twine(IC) + ")"); + } else { + // If we decided that it is *legal* to vectorize the loop, then do it. + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC); + LB.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore); + ++LoopsVectorized; + + // Add metadata to disable runtime unrolling a scalar loop when there are + // no runtime checks about strides and memory. A scalar loop that is + // rarely used is not worth unrolling. + if (!LB.areSafetyChecksAdded()) + AddRuntimeUnrollDisableMetaData(L); + + // Report the vectorization decision. + emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), + Twine("vectorized loop (vectorization width: ") + + Twine(VF.Width) + ", interleaved count: " + + Twine(IC) + ")"); + } + + // Mark the loop as already vectorized to avoid vectorizing again. + Hints.setAlreadyVectorized(); + + DEBUG(verifyFunction(*L->getHeader()->getParent())); + return true; +} + +bool LoopVectorizePass::runImpl( + Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, + DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, + DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, + std::function<const LoopAccessInfo &(Loop &)> &GetLAA_) { + + SE = &SE_; + LI = &LI_; + TTI = &TTI_; + DT = &DT_; + BFI = &BFI_; + TLI = TLI_; + AA = &AA_; + AC = &AC_; + GetLAA = &GetLAA_; + DB = &DB_; + + // Compute some weights outside of the loop over the loops. Compute this + // using a BranchProbability to re-use its scaling math. + const BranchProbability ColdProb(1, 5); // 20% + ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; + + // Don't attempt if + // 1. the target claims to have no vector registers, and + // 2. interleaving won't help ILP. + // + // The second condition is necessary because, even if the target has no + // vector registers, loop vectorization may still enable scalar + // interleaving. + if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2) + return false; + + // Build up a worklist of inner-loops to vectorize. This is necessary as + // the act of vectorizing or partially unrolling a loop creates new loops + // and can invalidate iterators across the loops. + SmallVector<Loop *, 8> Worklist; + + for (Loop *L : *LI) + addInnerLoop(*L, Worklist); + + LoopsAnalyzed += Worklist.size(); + + // Now walk the identified inner loops. + bool Changed = false; + while (!Worklist.empty()) + Changed |= processLoop(Worklist.pop_back_val()); + + // Process each loop nest in the function. + return Changed; + +} + + +PreservedAnalyses LoopVectorizePass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &BFI = AM.getResult<BlockFrequencyAnalysis>(F); + auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); + auto &AA = AM.getResult<AAManager>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + auto &DB = AM.getResult<DemandedBitsAnalysis>(F); + + auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); + std::function<const LoopAccessInfo &(Loop &)> GetLAA = + [&](Loop &L) -> const LoopAccessInfo & { + return LAM.getResult<LoopAccessAnalysis>(L); + }; + bool Changed = runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<LoopAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + return PA; +} diff --git a/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll b/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll index a60e8a20cd4..0115b09582f 100644 --- a/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll +++ b/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s ; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization |