diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopDistribute.cpp | 50 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp | 77 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 41 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/Scalar.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp | 144 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnroll.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 263 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 73 |
13 files changed, 633 insertions, 83 deletions
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 4a6b4aa1d56..e625433a8e4 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -702,6 +702,8 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLICMPass()); } + MPM.add(createWarnMissedTransformationsPass()); + // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. MPM.add(createAlignmentFromAssumptionsPass()); @@ -877,6 +879,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { if (!DisableUnrollLoops) PM.add(createLoopUnrollPass(OptLevel)); + PM.add(createWarnMissedTransformationsPass()); + // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index 1dcbaae5343..e3548ce5cd0 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -69,6 +69,7 @@ add_llvm_library(LLVMScalarOpts StraightLineStrengthReduce.cpp StructurizeCFG.cpp TailRecursionElimination.cpp + WarnMissedTransforms.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp index 06083a4f508..d797c9dc9e7 100644 --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -78,6 +78,18 @@ using namespace llvm; #define LDIST_NAME "loop-distribute" #define DEBUG_TYPE LDIST_NAME +/// @{ +/// Metadata attribute names +static const char *const LLVMLoopDistributeFollowupAll = + "llvm.loop.distribute.followup_all"; +static const char *const LLVMLoopDistributeFollowupCoincident = + "llvm.loop.distribute.followup_coincident"; +static const char *const LLVMLoopDistributeFollowupSequential = + "llvm.loop.distribute.followup_sequential"; +static const char *const LLVMLoopDistributeFollowupFallback = + "llvm.loop.distribute.followup_fallback"; +/// @} + static cl::opt<bool> LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " @@ -186,7 +198,7 @@ public: /// Returns the loop where this partition ends up after distribution. /// If this partition is mapped to the original loop then use the block from /// the loop. - const Loop *getDistributedLoop() const { + Loop *getDistributedLoop() const { return ClonedLoop ? ClonedLoop : OrigLoop; } @@ -443,6 +455,9 @@ public: assert(&*OrigPH->begin() == OrigPH->getTerminator() && "preheader not empty"); + // Preserve the original loop ID for use after the transformation. + MDNode *OrigLoopID = L->getLoopID(); + // Create a loop for each partition except the last. Clone the original // loop before PH along with adding a preheader for the cloned loop. Then // update PH to point to the newly added preheader. @@ -457,9 +472,13 @@ public: Part->getVMap()[ExitBlock] = TopPH; Part->remapInstructions(); + setNewLoopID(OrigLoopID, Part); } Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH); + // Also set a new loop ID for the last loop. + setNewLoopID(OrigLoopID, &PartitionContainer.back()); + // Now go in forward order and update the immediate dominator for the // preheaders with the exiting block of the previous loop. Dominance // within the loop is updated in cloneLoopWithPreheader. @@ -575,6 +594,19 @@ private: } } } + + /// Assign new LoopIDs for the partition's cloned loop. + void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) { + Optional<MDNode *> PartitionID = makeFollowupLoopID( + OrigLoopID, + {LLVMLoopDistributeFollowupAll, + Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential + : LLVMLoopDistributeFollowupCoincident}); + if (PartitionID.hasValue()) { + Loop *NewLoop = Part->getDistributedLoop(); + NewLoop->setLoopID(PartitionID.getValue()); + } + } }; /// For each memory instruction, this class maintains difference of the @@ -743,6 +775,9 @@ public: return fail("TooManySCEVRuntimeChecks", "too many SCEV run-time checks needed.\n"); + if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L)) + return fail("HeuristicDisabled", "distribution heuristic disabled"); + LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n"); // We're done forming the partitions set up the reverse mapping from // instructions to partitions. @@ -762,6 +797,8 @@ public: RtPtrChecking); if (!Pred.isAlwaysTrue() || !Checks.empty()) { + MDNode *OrigLoopID = L->getLoopID(); + LLVM_DEBUG(dbgs() << "\nPointers:\n"); LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks)); LoopVersioning LVer(*LAI, L, LI, DT, SE, false); @@ -769,6 +806,17 @@ public: LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate()); LVer.versionLoop(DefsUsedOutside); LVer.annotateLoopWithNoAlias(); + + // The unversioned loop will not be changed, so we inherit all attributes + // from the original loop, but remove the loop distribution metadata to + // avoid to distribute it again. + MDNode *UnversionedLoopID = + makeFollowupLoopID(OrigLoopID, + {LLVMLoopDistributeFollowupAll, + LLVMLoopDistributeFollowupFallback}, + "llvm.loop.distribute.", true) + .getValue(); + LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID); } // Create identical copies of the original loop for each partition and hook diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 30dfb9b5dd2..da46210b6fd 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -56,6 +56,20 @@ using namespace llvm; #define DEBUG_TYPE "loop-unroll-and-jam" +/// @{ +/// Metadata attribute names +static const char *const LLVMLoopUnrollAndJamFollowupAll = + "llvm.loop.unroll_and_jam.followup_all"; +static const char *const LLVMLoopUnrollAndJamFollowupInner = + "llvm.loop.unroll_and_jam.followup_inner"; +static const char *const LLVMLoopUnrollAndJamFollowupOuter = + "llvm.loop.unroll_and_jam.followup_outer"; +static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner = + "llvm.loop.unroll_and_jam.followup_remainder_inner"; +static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter = + "llvm.loop.unroll_and_jam.followup_remainder_outer"; +/// @} + static cl::opt<bool> AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed.")); @@ -112,11 +126,6 @@ static bool HasUnrollAndJamEnablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable"); } -// Returns true if the loop has an unroll_and_jam(disable) pragma. -static bool HasUnrollAndJamDisablePragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.disable"); -} - // If loop has an unroll_and_jam_count pragma return the (necessarily // positive) value from the pragma. Otherwise return 0. static unsigned UnrollAndJamCountPragmaValue(const Loop *L) { @@ -299,13 +308,16 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); + TransformationMode EnableMode = hasUnrollAndJamTransformation(L); + if (EnableMode & TM_Disable) + return LoopUnrollResult::Unmodified; + // A loop with any unroll pragma (enabling/disabling/count/etc) is left for // the unroller, so long as it does not explicitly have unroll_and_jam // metadata. This means #pragma nounroll will disable unroll and jam as well // as unrolling - if (HasUnrollAndJamDisablePragma(L) || - (HasAnyUnrollPragma(L, "llvm.loop.unroll.") && - !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam."))) { + if (HasAnyUnrollPragma(L, "llvm.loop.unroll.") && + !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) { LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n"); return LoopUnrollResult::Unmodified; } @@ -344,6 +356,19 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, return LoopUnrollResult::Unmodified; } + // Save original loop IDs for after the transformation. + MDNode *OrigOuterLoopID = L->getLoopID(); + MDNode *OrigSubLoopID = SubLoop->getLoopID(); + + // To assign the loop id of the epilogue, assign it before unrolling it so it + // is applied to every inner loop of the epilogue. We later apply the loop ID + // for the jammed inner loop. + Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID( + OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, + LLVMLoopUnrollAndJamFollowupRemainderInner}); + if (NewInnerEpilogueLoopID.hasValue()) + SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue()); + // Find trip count and trip multiple unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch); unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch); @@ -359,9 +384,39 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, if (OuterTripCount && UP.Count > OuterTripCount) UP.Count = OuterTripCount; - LoopUnrollResult UnrollResult = - UnrollAndJamLoop(L, UP.Count, OuterTripCount, OuterTripMultiple, - UP.UnrollRemainder, LI, &SE, &DT, &AC, &ORE); + Loop *EpilogueOuterLoop = nullptr; + LoopUnrollResult UnrollResult = UnrollAndJamLoop( + L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI, + &SE, &DT, &AC, &ORE, &EpilogueOuterLoop); + + // Assign new loop attributes. + if (EpilogueOuterLoop) { + Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID( + OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, + LLVMLoopUnrollAndJamFollowupRemainderOuter}); + if (NewOuterEpilogueLoopID.hasValue()) + EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue()); + } + + Optional<MDNode *> NewInnerLoopID = + makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, + LLVMLoopUnrollAndJamFollowupInner}); + if (NewInnerLoopID.hasValue()) + SubLoop->setLoopID(NewInnerLoopID.getValue()); + else + SubLoop->setLoopID(OrigSubLoopID); + + if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) { + Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID( + OrigOuterLoopID, + {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter}); + if (NewOuterLoopID.hasValue()) { + L->setLoopID(NewOuterLoopID.getValue()); + + // Do not setLoopAlreadyUnrolled if a followup was given. + return UnrollResult; + } + } // If loop has an unroll count pragma or unrolled by explicitly set count // mark loop as unrolled to prevent unrolling beyond that requested. diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index d10dae124a7..b7baba6b928 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -661,11 +661,6 @@ static bool HasUnrollEnablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); } -// Returns true if the loop has an unroll(disable) pragma. -static bool HasUnrollDisablePragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); -} - // Returns true if the loop has an runtime unroll(disable) pragma. static bool HasRuntimeUnrollDisablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); @@ -713,12 +708,19 @@ static uint64_t getUnrolledLoopSize( // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. +// Unless IgnoreUser is true, will also use metadata and command-line options +// that are specific to to the LoopUnroll pass (which, for instance, are +// irrelevant for the LoopUnrollAndJam pass). +// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes +// many LoopUnroll-specific options. The shared functionality should be +// refactored into it own function. bool llvm::computeUnrollCount( Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { + // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; @@ -969,7 +971,7 @@ static LoopUnrollResult tryToUnrollLoop( LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - if (HasUnrollDisablePragma(L)) + if (hasUnrollTransformation(L) & TM_Disable) return LoopUnrollResult::Unmodified; if (!L->isLoopSimplifyForm()) { LLVM_DEBUG( @@ -1066,14 +1068,39 @@ static LoopUnrollResult tryToUnrollLoop( if (TripCount && UP.Count > TripCount) UP.Count = TripCount; + // Save loop properties before it is transformed. + MDNode *OrigLoopID = L->getLoopID(); + // Unroll the loop. + Loop *RemainderLoop = nullptr; LoopUnrollResult UnrollResult = UnrollLoop( L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder, - LI, &SE, &DT, &AC, &ORE, PreserveLCSSA); + LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; + if (RemainderLoop) { + Optional<MDNode *> RemainderLoopID = + makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, + LLVMLoopUnrollFollowupRemainder}); + if (RemainderLoopID.hasValue()) + RemainderLoop->setLoopID(RemainderLoopID.getValue()); + } + + if (UnrollResult != LoopUnrollResult::FullyUnrolled) { + Optional<MDNode *> NewLoopID = + makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, + LLVMLoopUnrollFollowupUnrolled}); + if (NewLoopID.hasValue()) { + L->setLoopID(NewLoopID.getValue()); + + // Do not setLoopAlreadyUnrolled if loop attributes have been specified + // explicitly. + return UnrollResult; + } + } + // If loop has an unroll count pragma or unrolled by explicitly set count // mark loop as unrolled to prevent unrolling beyond that requested. // If the loop was peeled, we already "used up" the profile information diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 06e86081e8a..c0c59d24dff 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -594,6 +594,11 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipLoop(L)) return false; + + // Do not do the transformation if disabled by metadata. + if (hasLICMVersioningTransformation(L) & TM_Disable) + return false; + // Get Analysis information. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 4a965e8df83..976daf4c78c 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -75,6 +75,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopUnrollPass(Registry); initializeLoopUnrollAndJamPass(Registry); initializeLoopUnswitchPass(Registry); + initializeWarnMissedTransformationsLegacyPass(Registry); initializeLoopVersioningLICMPass(Registry); initializeLoopIdiomRecognizeLegacyPassPass(Registry); initializeLowerAtomicLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp new file mode 100644 index 00000000000..d06dceecefa --- /dev/null +++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp @@ -0,0 +1,144 @@ +//===- LoopTransformWarning.cpp - ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Emit warnings if forced code transformations have not been performed. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/WarnMissedTransforms.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "transform-warning" + +/// Emit warnings for forced (i.e. user-defined) loop transformations which have +/// still not been performed. +static void warnAboutLeftoverTransformations(Loop *L, + OptimizationRemarkEmitter *ORE) { + if (hasUnrollTransformation(L) == TM_ForcedByUser) { + LLVM_DEBUG(dbgs() << "Leftover unroll transformation\n"); + ORE->emit( + DiagnosticInfoOptimizationFailure(DEBUG_TYPE, + "FailedRequestedUnrolling", + L->getStartLoc(), L->getHeader()) + << "loop not unrolled: the optimizer was unable to perform the " + "requested transformation; the transformation might be disabled or " + "specified as part of an unsupported transformation ordering"); + } + + if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser) { + LLVM_DEBUG(dbgs() << "Leftover unroll-and-jam transformation\n"); + ORE->emit( + DiagnosticInfoOptimizationFailure(DEBUG_TYPE, + "FailedRequestedUnrollAndJamming", + L->getStartLoc(), L->getHeader()) + << "loop not unroll-and-jammed: the optimizer was unable to perform " + "the requested transformation; the transformation might be disabled " + "or specified as part of an unsupported transformation ordering"); + } + + if (hasVectorizeTransformation(L) == TM_ForcedByUser) { + LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n"); + Optional<int> VectorizeWidth = + getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width"); + Optional<int> InterleaveCount = + getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); + + if (VectorizeWidth.getValueOr(0) != 1) + ORE->emit( + DiagnosticInfoOptimizationFailure(DEBUG_TYPE, + "FailedRequestedVectorization", + L->getStartLoc(), L->getHeader()) + << "loop not vectorized: the optimizer was unable to perform the " + "requested transformation; the transformation might be disabled " + "or specified as part of an unsupported transformation ordering"); + else if (InterleaveCount.getValueOr(0) != 1) + ORE->emit( + DiagnosticInfoOptimizationFailure(DEBUG_TYPE, + "FailedRequestedInterleaving", + L->getStartLoc(), L->getHeader()) + << "loop not interleaved: the optimizer was unable to perform the " + "requested transformation; the transformation might be disabled " + "or specified as part of an unsupported transformation ordering"); + } + + if (hasDistributeTransformation(L) == TM_ForcedByUser) { + LLVM_DEBUG(dbgs() << "Leftover distribute transformation\n"); + ORE->emit( + DiagnosticInfoOptimizationFailure(DEBUG_TYPE, + "FailedRequestedDistribution", + L->getStartLoc(), L->getHeader()) + << "loop not distributed: the optimizer was unable to perform the " + "requested transformation; the transformation might be disabled or " + "specified as part of an unsupported transformation ordering"); + } +} + +static void warnAboutLeftoverTransformations(Function *F, LoopInfo *LI, + OptimizationRemarkEmitter *ORE) { + for (auto *L : LI->getLoopsInPreorder()) + warnAboutLeftoverTransformations(L, ORE); +} + +// New pass manager boilerplate +PreservedAnalyses +WarnMissedTransformationsPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + + warnAboutLeftoverTransformations(&F, &LI, &ORE); + + return PreservedAnalyses::all(); +} + +// Legacy pass manager boilerplate +namespace { +class WarnMissedTransformationsLegacy : public FunctionPass { +public: + static char ID; + + explicit WarnMissedTransformationsLegacy() : FunctionPass(ID) { + initializeWarnMissedTransformationsLegacyPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + + warnAboutLeftoverTransformations(&F, &LI, &ORE); + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + + AU.setPreservesAll(); + } +}; +} // end anonymous namespace + +char WarnMissedTransformationsLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(WarnMissedTransformationsLegacy, "transform-warning", + "Warn about non-applied transformations", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_END(WarnMissedTransformationsLegacy, "transform-warning", + "Warn about non-applied transformations", false, false) + +Pass *llvm::createWarnMissedTransformationsPass() { + return new WarnMissedTransformationsLegacy(); +} diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 877e0e4dcf9..efd8b92e814 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -329,12 +329,15 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. +/// +/// If RemainderLoop is non-null, it will receive the remainder loop (if +/// required and not fully unrolled). LoopUnrollResult llvm::UnrollLoop( Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { + OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { @@ -468,7 +471,7 @@ LoopUnrollResult llvm::UnrollLoop( if (RuntimeTripCount && TripMultiple % Count != 0 && !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, EpilogProfitability, UnrollRemainder, LI, SE, - DT, AC, PreserveLCSSA)) { + DT, AC, PreserveLCSSA, RemainderLoop)) { if (Force) RuntimeTripCount = false; else { diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index 8949c603a84..b5d80f669fb 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -167,12 +167,14 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, isSafeToUnrollAndJam should be used prior to calling this to make sure the unrolling will be valid. Checking profitablility is also advisable. + + If EpilogueLoop is non-null, it receives the epilogue loop (if it was + necessary to create one and not fully unrolled). */ -LoopUnrollResult -llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, - unsigned TripMultiple, bool UnrollRemainder, - LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, OptimizationRemarkEmitter *ORE) { +LoopUnrollResult llvm::UnrollAndJamLoop( + Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, + bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) { // When we enter here we should have already checked that it is safe BasicBlock *Header = L->getHeader(); @@ -196,7 +198,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, if (TripMultiple == 1 || TripMultiple % Count != 0) { if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, /*UseEpilogRemainder*/ true, - UnrollRemainder, LI, SE, DT, AC, true)) { + UnrollRemainder, LI, SE, DT, AC, true, + EpilogueLoop)) { LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be " "generated when assuming runtime trip count\n"); return LoopUnrollResult::Unmodified; diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 3361883acd0..3606ec4b9fc 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -380,6 +380,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; + MDNode *LoopID = NewLoop->getLoopID(); assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely @@ -387,6 +388,16 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, if (UnrollRemainder) return NewLoop; + Optional<MDNode *> NewLoopID = makeFollowupLoopID( + LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); + if (NewLoopID.hasValue()) { + NewLoop->setLoopID(NewLoopID.getValue()); + + // Do not setLoopAlreadyUnrolled if loop attributes have been defined + // explicitly. + return NewLoop; + } + // Add unroll disable metadata to disable future unrolling for this loop. NewLoop->setLoopAlreadyUnrolled(); return NewLoop; @@ -525,10 +536,10 @@ static bool canProfitablyUnrollMultiExitLoop( bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, - bool UnrollRemainder, - LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, - bool PreserveLCSSA) { + bool UnrollRemainder, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, bool PreserveLCSSA, + Loop **ResultLoop) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" @@ -911,16 +922,20 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); } + auto UnrollResult = LoopUnrollResult::Unmodified; if (remainderLoop && UnrollRemainder) { LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); - UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, - /*Force*/ false, /*AllowRuntime*/ false, - /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, - /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, - /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, - /*ORE*/ nullptr, PreserveLCSSA); + UnrollResult = + UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, + /*Force*/ false, /*AllowRuntime*/ false, + /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, + /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, + /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, + /*ORE*/ nullptr, PreserveLCSSA); } + if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) + *ResultLoop = remainderLoop; NumRuntimeUnrolled++; return true; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 249869e1bde..388553b1783 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -42,6 +42,8 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-utils" +static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; + bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { bool Changed = false; @@ -183,14 +185,8 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) } -/// Find string metadata for loop -/// -/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an -/// operand or null otherwise. If the string metadata is not found return -/// Optional's not-a-value. -Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, - StringRef Name) { - MDNode *LoopID = TheLoop->getLoopID(); +static Optional<MDNode *> findOptionMDForLoopID(MDNode *LoopID, + StringRef Name) { // Return none if LoopID is false. if (!LoopID) return None; @@ -209,18 +205,253 @@ Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, continue; // Return true if MDString holds expected MetaData. if (Name.equals(S->getString())) - switch (MD->getNumOperands()) { - case 1: - return nullptr; - case 2: - return &MD->getOperand(1); - default: - llvm_unreachable("loop metadata has 0 or 1 operand"); - } + return MD; } return None; } +static Optional<MDNode *> findOptionMDForLoop(const Loop *TheLoop, + StringRef Name) { + return findOptionMDForLoopID(TheLoop->getLoopID(), Name); +} + +/// Find string metadata for loop +/// +/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an +/// operand or null otherwise. If the string metadata is not found return +/// Optional's not-a-value. +Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, + StringRef Name) { + auto MD = findOptionMDForLoop(TheLoop, Name).getValueOr(nullptr); + if (!MD) + return None; + switch (MD->getNumOperands()) { + case 1: + return nullptr; + case 2: + return &MD->getOperand(1); + default: + llvm_unreachable("loop metadata has 0 or 1 operand"); + } +} + +static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, + StringRef Name) { + Optional<MDNode *> MD = findOptionMDForLoop(TheLoop, Name); + if (!MD.hasValue()) + return None; + MDNode *OptionNode = MD.getValue(); + if (OptionNode == nullptr) + return None; + switch (OptionNode->getNumOperands()) { + case 1: + // When the value is absent it is interpreted as 'attribute set'. + return true; + case 2: + return mdconst::extract_or_null<ConstantInt>( + OptionNode->getOperand(1).get()); + } + llvm_unreachable("unexpected number of options"); +} + +static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { + return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); +} + +llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop, + StringRef Name) { + const MDOperand *AttrMD = + findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr); + if (!AttrMD) + return None; + + ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get()); + if (!IntMD) + return None; + + return IntMD->getSExtValue(); +} + +Optional<MDNode *> llvm::makeFollowupLoopID( + MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions, + const char *InheritOptionsExceptPrefix, bool AlwaysNew) { + if (!OrigLoopID) { + if (AlwaysNew) + return nullptr; + return None; + } + + assert(OrigLoopID->getOperand(0) == OrigLoopID); + + bool InheritAllAttrs = !InheritOptionsExceptPrefix; + bool InheritSomeAttrs = + InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0'; + SmallVector<Metadata *, 8> MDs; + MDs.push_back(nullptr); + + bool Changed = false; + if (InheritAllAttrs || InheritSomeAttrs) { + for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) { + MDNode *Op = cast<MDNode>(Existing.get()); + + auto InheritThisAttribute = [InheritSomeAttrs, + InheritOptionsExceptPrefix](MDNode *Op) { + if (!InheritSomeAttrs) + return false; + + // Skip malformatted attribute metadata nodes. + if (Op->getNumOperands() == 0) + return true; + Metadata *NameMD = Op->getOperand(0).get(); + if (!isa<MDString>(NameMD)) + return true; + StringRef AttrName = cast<MDString>(NameMD)->getString(); + + // Do not inherit excluded attributes. + return !AttrName.startswith(InheritOptionsExceptPrefix); + }; + + if (InheritThisAttribute(Op)) + MDs.push_back(Op); + else + Changed = true; + } + } else { + // Modified if we dropped at least one attribute. + Changed = OrigLoopID->getNumOperands() > 1; + } + + bool HasAnyFollowup = false; + for (StringRef OptionName : FollowupOptions) { + MDNode *FollowupNode = + findOptionMDForLoopID(OrigLoopID, OptionName).getValueOr(nullptr); + if (!FollowupNode) + continue; + + HasAnyFollowup = true; + for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) { + MDs.push_back(Option.get()); + Changed = true; + } + } + + // Attributes of the followup loop not specified explicity, so signal to the + // transformation pass to add suitable attributes. + if (!AlwaysNew && !HasAnyFollowup) + return None; + + // If no attributes were added or remove, the previous loop Id can be reused. + if (!AlwaysNew && !Changed) + return OrigLoopID; + + // No attributes is equivalent to having no !llvm.loop metadata at all. + if (MDs.size() == 1) + return nullptr; + + // Build the new loop ID. + MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs); + FollowupLoopID->replaceOperandWith(0, FollowupLoopID); + return FollowupLoopID; +} + +bool llvm::hasDisableAllTransformsHint(const Loop *L) { + return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); +} + +TransformationMode llvm::hasUnrollTransformation(Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) + return TM_SuppressedByUser; + + Optional<int> Count = + getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count"); + if (Count.hasValue()) + return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; + + if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) + return TM_ForcedByUser; + + if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) + return TM_ForcedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) + return TM_SuppressedByUser; + + Optional<int> Count = + getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count"); + if (Count.hasValue()) + return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; + + if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable")) + return TM_ForcedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +TransformationMode llvm::hasVectorizeTransformation(Loop *L) { + Optional<bool> Enable = + getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); + + if (Enable == false) + return TM_SuppressedByUser; + + Optional<int> VectorizeWidth = + getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width"); + Optional<int> InterleaveCount = + getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); + + if (Enable == true) { + // 'Forcing' vector width and interleave count to one effectively disables + // this tranformation. + if (VectorizeWidth == 1 && InterleaveCount == 1) + return TM_SuppressedByUser; + return TM_ForcedByUser; + } + + if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) + return TM_Disable; + + if (VectorizeWidth == 1 && InterleaveCount == 1) + return TM_Disable; + + if (VectorizeWidth > 1 || InterleaveCount > 1) + return TM_Enable; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +TransformationMode llvm::hasDistributeTransformation(Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) + return TM_ForcedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) + return TM_SuppressedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. SmallVector<DomTreeNode *, 16> diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9c46eee9567..0341cce214a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -152,6 +152,16 @@ using namespace llvm; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME +/// @{ +/// Metadata attribute names +static const char *const LLVMLoopVectorizeFollowupAll = + "llvm.loop.vectorize.followup_all"; +static const char *const LLVMLoopVectorizeFollowupVectorized = + "llvm.loop.vectorize.followup_vectorized"; +static const char *const LLVMLoopVectorizeFollowupEpilogue = + "llvm.loop.vectorize.followup_epilogue"; +/// @} + STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); @@ -796,27 +806,6 @@ void InnerLoopVectorizer::addMetadata(ArrayRef<Value *> To, } } -static void emitMissedWarning(Function *F, Loop *L, - const LoopVectorizeHints &LH, - OptimizationRemarkEmitter *ORE) { - LH.emitRemarkWithHints(); - - if (LH.getForce() == LoopVectorizeHints::FK_Enabled) { - if (LH.getWidth() != 1) - ORE->emit(DiagnosticInfoOptimizationFailure( - DEBUG_TYPE, "FailedRequestedVectorization", - L->getStartLoc(), L->getHeader()) - << "loop not vectorized: " - << "failed explicitly specified loop vectorization"); - else if (LH.getInterleave() != 1) - ORE->emit(DiagnosticInfoOptimizationFailure( - DEBUG_TYPE, "FailedRequestedInterleaving", L->getStartLoc(), - L->getHeader()) - << "loop not interleaved: " - << "failed explicitly specified loop interleaving"); - } -} - namespace llvm { /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1377,7 +1366,7 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp, if (!Hints.getWidth()) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n"); - emitMissedWarning(Fn, OuterLp, Hints, ORE); + Hints.emitRemarkWithHints(); return false; } @@ -1385,7 +1374,7 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp, // TODO: Interleave support is future work. LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for " "outer loops.\n"); - emitMissedWarning(Fn, OuterLp, Hints, ORE); + Hints.emitRemarkWithHints(); return false; } @@ -2739,6 +2728,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { BasicBlock *OldBasicBlock = OrigLoop->getHeader(); BasicBlock *VectorPH = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); + MDNode *OrigLoopID = OrigLoop->getLoopID(); assert(VectorPH && "Invalid loop structure"); assert(ExitBlock && "Must have an exit block"); @@ -2882,6 +2872,17 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { LoopVectorBody = VecBody; LoopScalarBody = OldBasicBlock; + Optional<MDNode *> VectorizedLoopID = + makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, + LLVMLoopVectorizeFollowupVectorized}); + if (VectorizedLoopID.hasValue()) { + Lp->setLoopID(VectorizedLoopID.getValue()); + + // Do not setAlreadyVectorized if loop attributes have been defined + // explicitly. + return LoopVectorPreHeader; + } + // Keep all loop hints from the original loop on the vector loop (we'll // replace the vectorizer-specific hints below). if (MDNode *LID = OrigLoop->getLoopID()) @@ -7177,7 +7178,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { &Requirements, &Hints, DB, AC); if (!LVL.canVectorize(EnableVPlanNativePath)) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); - emitMissedWarning(F, L, Hints, ORE); + Hints.emitRemarkWithHints(); return false; } @@ -7250,7 +7251,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "NoImplicitFloat", L) << "loop not vectorized due to NoImplicitFloat attribute"); - emitMissedWarning(F, L, Hints, ORE); + Hints.emitRemarkWithHints(); return false; } @@ -7265,7 +7266,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { ORE->emit( createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L) << "loop not vectorized due to unsafe FP support."); - emitMissedWarning(F, L, Hints, ORE); + Hints.emitRemarkWithHints(); return false; } @@ -7307,7 +7308,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (Requirements.doesNotMeet(F, L, Hints)) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " "requirements.\n"); - emitMissedWarning(F, L, Hints, ORE); + Hints.emitRemarkWithHints(); return false; } @@ -7384,6 +7385,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { LVP.setBestPlan(VF.Width, IC); using namespace ore; + bool DisableRuntimeUnroll = false; + MDNode *OrigLoopID = L->getLoopID(); if (!VectorizeLoop) { assert(IC > 1 && "interleave count should not be 1 or 0"); @@ -7410,7 +7413,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // no runtime checks about strides and memory. A scalar loop that is // rarely used is not worth unrolling. if (!LB.areSafetyChecksAdded()) - AddRuntimeUnrollDisableMetaData(L); + DisableRuntimeUnroll = true; // Report the vectorization decision. ORE->emit([&]() { @@ -7422,8 +7425,18 @@ bool LoopVectorizePass::processLoop(Loop *L) { }); } - // Mark the loop as already vectorized to avoid vectorizing again. - Hints.setAlreadyVectorized(); + Optional<MDNode *> RemainderLoopID = + makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, + LLVMLoopVectorizeFollowupEpilogue}); + if (RemainderLoopID.hasValue()) { + L->setLoopID(RemainderLoopID.getValue()); + } else { + if (DisableRuntimeUnroll) + AddRuntimeUnrollDisableMetaData(L); + + // Mark the loop as already vectorized to avoid vectorizing again. + Hints.setAlreadyVectorized(); + } LLVM_DEBUG(verifyFunction(*L->getHeader()->getParent())); return true; |