From 0aca09fc6cdcda65a44dc084b20d7056912977ef Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Thu, 11 May 2017 17:06:17 +0000 Subject: [SLP] Emit optimization remarks The approach I followed was to emit the remark after getTreeCost concludes that SLP is profitable. I initially tried emitting them after the vectorizeRootInstruction calls in vectorizeChainsInBlock but I vaguely remember missing a few cases for example in HorizontalReduction::tryToReduce. ORE is placed in BoUpSLP so that it's available from everywhere (notably HorizontalReduction::tryToReduce). We use the first instruction in the root bundle as the locator for the remark. In order to get a sense how far the tree is spanning I've include the size of the tree in the remark. This is not perfect of course but it gives you at least a rough idea about the tree. Then you can follow up with -view-slp-tree to really see the actual tree. llvm-svn: 302811 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 42 +++++++++++++++++++++---- 1 file changed, 36 insertions(+), 6 deletions(-) (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp') diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 960019f41df..f6334eb1410 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -299,10 +299,10 @@ public: BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, - const DataLayout *DL) + const DataLayout *DL, OptimizationRemarkEmitter *ORE) : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB), - DL(DL), Builder(Se->getContext()) { + DL(DL), ORE(ORE), Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); // Use the vector register size specified by the target unless overridden // by a command-line option. @@ -361,6 +361,8 @@ public: MinBWs.clear(); } + unsigned getTreeSize() const { return VectorizableTree.size(); } + /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -399,6 +401,8 @@ public: /// vectorizable. We do not vectorize such trees. bool isTreeTinyAndNotFullyVectorizable(); + OptimizationRemarkEmitter *getORE() { return ORE; } + private: struct TreeEntry; @@ -928,6 +932,8 @@ private: AssumptionCache *AC; DemandedBits *DB; const DataLayout *DL; + OptimizationRemarkEmitter *ORE; + unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt. unsigned MinVecRegSize; // Set by cl::opt (default: 128). /// Instruction builder to construct the vectorized tree. @@ -3772,8 +3778,9 @@ struct SLPVectorizer : public FunctionPass { auto *DT = &getAnalysis().getDomTree(); auto *AC = &getAnalysis().getAssumptionCache(F); auto *DB = &getAnalysis().getDemandedBits(); + auto *ORE = &getAnalysis().getORE(); - return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); + return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -3785,6 +3792,7 @@ struct SLPVectorizer : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -3803,8 +3811,9 @@ PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &A auto *DT = &AM.getResult(F); auto *AC = &AM.getResult(F); auto *DB = &AM.getResult(F); + auto *ORE = &AM.getResult(F); - bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); + bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE); if (!Changed) return PreservedAnalyses::all(); @@ -3819,7 +3828,8 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_, DominatorTree *DT_, - AssumptionCache *AC_, DemandedBits *DB_) { + AssumptionCache *AC_, DemandedBits *DB_, + OptimizationRemarkEmitter *ORE_) { SE = SE_; TTI = TTI_; TLI = TLI_; @@ -3847,7 +3857,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // Use the bottom up slp vectorizer to construct chains that start with // store instructions. - BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL); + BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL, ORE_); // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to // delete instructions. @@ -3936,6 +3946,13 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); + using namespace ore; + R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized", + cast(Chain[i])) + << "Stores SLP vectorized with cost " << NV("Cost", Cost) + << " and with tree size " + << NV("TreeSize", R.getTreeSize())); + R.vectorizeTree(); // Move to the next bundle. @@ -4149,6 +4166,12 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); + R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList", + cast(Ops[0])) + << "SLP vectorized with cost " << ore::NV("Cost", Cost) + << " and with tree size " + << ore::NV("TreeSize", R.getTreeSize())); + Value *VectorizedRoot = R.vectorizeTree(); // Reconstruct the build vector by extracting the vectorized root. This @@ -4492,6 +4515,12 @@ public: DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost << ". (HorRdx)\n"); + auto *I0 = cast(VL[0]); + V.getORE()->emit( + OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction", I0) + << "Vectorized horizontal reduction with cost " + << ore::NV("Cost", Cost) << " and with tree size " + << ore::NV("TreeSize", V.getTreeSize())); // Vectorize a tree. DebugLoc Loc = cast(ReducedVals[i])->getDebugLoc(); @@ -5146,6 +5175,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) namespace llvm { -- cgit v1.2.3