From c31032d607c25d8bc53d59b68b797285f2736ea1 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 9 Mar 2016 18:47:11 +0000 Subject: InstCombine: Restrict computeKnownBits() on all Values to OptLevel > 2 As part of r251146 InstCombine was extended to call computeKnownBits on every value in the function to determine whether it happens to be constant. This increases typical compiletime by 1-3% (5% in irgen+opt time) in my measurements. On the other hand this case did not trigger once in the whole llvm-testsuite. This patch introduces the notion of ExpensiveCombines which are only enabled for OptLevel > 2. I removed the check in InstructionSimplify as that is called from various places where the OptLevel is not known but given the rarity of the situation I think a check in InstCombine is enough. Differential Revision: http://reviews.llvm.org/D16835 llvm-svn: 263047 --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 44 +++++++++++++--------- .../Transforms/InstCombine/InstCombineInternal.h | 7 +++- .../InstCombine/InstructionCombining.cpp | 26 ++++++++----- 3 files changed, 48 insertions(+), 29 deletions(-) (limited to 'llvm/lib/Transforms') diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index c490e060f20..ae4d802722a 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -185,6 +185,12 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses( PM.add(createScopedNoAliasAAWrapperPass()); } +void PassManagerBuilder::addInstructionCombiningPass( + legacy::PassManagerBase &PM) const { + bool ExpensiveCombines = OptLevel > 2; + PM.add(createInstructionCombiningPass(ExpensiveCombines)); +} + void PassManagerBuilder::populateFunctionPassManager( legacy::FunctionPassManager &FPM) { addExtensionsToPM(EP_EarlyAsPossible, FPM); @@ -230,7 +236,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Combine silly seq's + // Combine silly seq's + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createTailCallEliminationPass()); // Eliminate tail calls @@ -238,7 +245,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createReassociatePass()); // Reassociate expressions if (PrepareForThinLTO) { MPM.add(createAggressiveDCEPass()); // Delete dead instructions - MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addInstructionCombiningPass(MPM); // Combine silly seq's return; } // Rotate Loop - disable header duplication at -Oz @@ -246,7 +253,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops @@ -273,7 +280,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); @@ -290,7 +297,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (BBVectorize) { MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies @@ -308,7 +315,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Clean up after everything. + // Clean up after everything. + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); } @@ -359,7 +367,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE + addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } @@ -480,7 +488,7 @@ void PassManagerBuilder::populateModulePassManager( // on -O1 and no #pragma is found). Would be good to have these two passes // as function calls, so that we can only pass them when the vectorizer // changed the code. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); if (OptLevel > 1 && ExtraVectorizerPasses) { // At higher optimization levels, try to clean up any runtime overlap and // alignment checks inserted by the vectorizer. We want to track correllated @@ -490,11 +498,11 @@ void PassManagerBuilder::populateModulePassManager( // dead (or speculatable) control flows or more combining opportunities. MPM.add(createEarlyCSEPass()); MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); MPM.add(createLICMPass()); MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); } if (RunSLPAfterLoopVectorization) { @@ -507,7 +515,7 @@ void PassManagerBuilder::populateModulePassManager( if (BBVectorize) { MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies @@ -522,13 +530,13 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { MPM.add(createLoopUnrollPass()); // Unroll small loops // LoopUnroll may generate some redundency to cleanup. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the @@ -595,7 +603,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // simplification opportunities, and both can propagate functions through // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); // Inline small functions @@ -617,7 +625,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createArgumentPromotionPass()); // The IPO passes may leave cruft around. Clean up after them. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); @@ -656,10 +664,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. - PM.add(createInstructionCombiningPass()); // Initial cleanup + addInstructionCombiningPass(PM); // Initial cleanup PM.add(createCFGSimplificationPass()); // if-convert PM.add(createSCCPPass()); // Propagate exposed constants - PM.add(createInstructionCombiningPass()); // Clean up again + addInstructionCombiningPass(PM); // Clean up again PM.add(createBitTrackingDCEPass()); // More scalar chains could be vectorized due to more alias information @@ -675,7 +683,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createLoadCombinePass()); // Cleanup and simplify the code after the scalar optimizations. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index c251683d514..1909cbaea64 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -177,6 +177,8 @@ public: private: // Mode in which we are running the combiner. const bool MinimizeSize; + /// Enable combines that trigger rarely but are costly in compiletime. + const bool ExpensiveCombines; AliasAnalysis *AA; @@ -195,11 +197,12 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, - bool MinimizeSize, AliasAnalysis *AA, + bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache *AC, TargetLibraryInfo *TLI, DominatorTree *DT, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), - AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {} + ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), + DL(DL), LI(LI), MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index af8d5690ff6..d728f1acc75 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -78,6 +78,10 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); +static cl::opt +EnableExpensiveCombines("expensive-combines", + cl::desc("Enable expensive instruction combines")); + Value *InstCombiner::EmitGEPOffset(User *GEP) { return llvm::EmitGEPOffset(Builder, DL, GEP); } @@ -2770,9 +2774,9 @@ bool InstCombiner::run() { } } - // In general, it is possible for computeKnownBits to determine all bits in a - // value even when the operands are not all constants. - if (!I->use_empty() && I->getType()->isIntegerTy()) { + // In general, it is possible for computeKnownBits to determine all bits in + // a value even when the operands are not all constants. + if (ExpensiveCombines && !I->use_empty() && I->getType()->isIntegerTy()) { unsigned BitWidth = I->getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -3043,8 +3047,10 @@ static bool combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, + bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); + ExpensiveCombines |= EnableExpensiveCombines; /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. @@ -3064,8 +3070,8 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, bool Changed = prepareICWorklistFromFunction(F, DL, &TLI, Worklist); - InstCombiner IC(Worklist, &Builder, F.optForMinSize(), AA, &AC, &TLI, &DT, - DL, LI); + InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines, + AA, &AC, &TLI, &DT, DL, LI); Changed |= IC.run(); if (!Changed) @@ -3084,7 +3090,8 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto *LI = AM->getCachedResult(F); // FIXME: The AliasAnalysis is not yet supported in the new pass manager - if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, LI)) + if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, + ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3121,7 +3128,8 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, LI); + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, + ExpensiveCombines, LI); } char InstructionCombiningPass::ID = 0; @@ -3144,6 +3152,6 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { initializeInstructionCombiningPassPass(*unwrap(R)); } -FunctionPass *llvm::createInstructionCombiningPass() { - return new InstructionCombiningPass(); +FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines) { + return new InstructionCombiningPass(ExpensiveCombines); } -- cgit v1.2.3