diff options
author | Sanjay Patel <spatel@rotateright.com> | 2017-12-14 22:05:20 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2017-12-14 22:05:20 +0000 |
commit | 0ab0c1a201ece292fdced1f913fa257bdcb5280f (patch) | |
tree | e896fb6f22c7ce9f2106707b0796e7cab2c2c59e /llvm/lib | |
parent | 34ccadcea9eba33d2e410b2af843a3da602611bf (diff) | |
download | bcm5719-llvm-0ab0c1a201ece292fdced1f913fa257bdcb5280f.tar.gz bcm5719-llvm-0ab0c1a201ece292fdced1f913fa257bdcb5280f.zip |
[SimplifyCFG] don't sink common insts too soon (PR34603)
This should solve:
https://bugs.llvm.org/show_bug.cgi?id=34603
...by preventing SimplifyCFG from altering redundant instructions before early-cse has a chance to run.
It changes the default (canonical-forming) behavior of SimplifyCFG, so we're only doing the
sinking transform later in the optimization pipeline.
Differential Revision: https://reviews.llvm.org/D38566
llvm-svn: 320749
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Passes/PassBuilder.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetMachine.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 |
6 files changed, 36 insertions, 15 deletions
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 56eba691041..d33c4df70c6 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -747,21 +747,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Cleanup after the loop optimization passes. OptimizePM.addPass(InstCombinePass()); - // Now that we've formed fast to execute loop structures, we do further // optimizations. These are run afterward as they might block doing complex // analyses and transforms such as what are needed for loop vectorization. - // Optimize parallel scalar instruction chains into SIMD instructions. - OptimizePM.addPass(SLPVectorizerPass()); - - // Cleanup after all of the vectorizers. Simplification passes like CVP and + // Cleanup after loop vectorization, etc. Simplification passes like CVP and // GVN, loop transforms, and others have already run, so it's now better to // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). - forwardSwitchCondToPhi(true). - convertSwitchToLookupTable(true). - needCanonicalLoops(false))); + forwardSwitchCondToPhi(true). + convertSwitchToLookupTable(true). + needCanonicalLoops(false). + sinkCommonInsts(true))); + + // Optimize parallel scalar instruction chains into SIMD instructions. + OptimizePM.addPass(SLPVectorizerPass()); + OptimizePM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any parallel diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 21b8bfff52c..64583ead73f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -365,7 +365,7 @@ void AArch64PassConfig::addIRPasses() { // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass(1, true, true, false)); + addPass(createCFGSimplificationPass(1, true, true, false, true)); // Run LoopDataPrefetch // diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 007dc2be16e..51982b2dab1 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -385,7 +385,7 @@ void ARMPassConfig::addIRPasses() { // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - 1, false, false, true, [this](const Function &F) { + 1, false, false, true, true, [this](const Function &F) { const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index b5d32a1ca57..3855e6245d8 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -630,6 +630,13 @@ void PassManagerBuilder::populateModulePassManager( addInstructionCombiningPass(MPM); } + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + if (RunSLPAfterLoopVectorization && SLPVectorize) { MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. if (OptLevel > 1 && ExtraVectorizerPasses) { @@ -638,9 +645,6 @@ void PassManagerBuilder::populateModulePassManager( } addExtensionsToPM(EP_Peephole, MPM); - // Switches to lookup tables and other transforms that may not be considered - // canonical by other IR passes. - MPM.add(createCFGSimplificationPass(1, true, true, false)); addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 789e0a47793..1522170dc3b 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -61,6 +61,11 @@ static cl::opt<bool> UserForwardSwitchCond( "forward-switch-cond", cl::Hidden, cl::init(false), cl::desc("Forward switch condition to phi ops (default = false)")); +static cl::opt<bool> UserSinkCommonInsts( + "sink-common-insts", cl::Hidden, cl::init(false), + cl::desc("Sink common instructions (default = false)")); + + STATISTIC(NumSimpl, "Number of blocks simplified"); /// If we have more than one empty (other than phi node) return blocks, @@ -205,6 +210,9 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) { Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences() ? UserKeepLoops : Opts.NeedCanonicalLoop; + Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() + ? UserSinkCommonInsts + : Opts.SinkCommonInsts; } PreservedAnalyses SimplifyCFGPass::run(Function &F, @@ -226,6 +234,7 @@ struct CFGSimplifyPass : public FunctionPass { CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false, bool ConvertSwitch = false, bool KeepLoops = true, + bool SinkCommon = false, std::function<bool(const Function &)> Ftor = nullptr) : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { @@ -246,6 +255,10 @@ struct CFGSimplifyPass : public FunctionPass { Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops; + + Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() + ? UserSinkCommonInsts + : SinkCommon; } bool runOnFunction(Function &F) override { @@ -276,7 +289,8 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, FunctionPass * llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond, bool ConvertSwitch, bool KeepLoops, + bool SinkCommon, std::function<bool(const Function &)> Ftor) { return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch, - KeepLoops, std::move(Ftor)); + KeepLoops, SinkCommon, std::move(Ftor)); } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 394c951630c..f02f80cc1b7 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5728,7 +5728,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, BasicBlock *BB = BI->getParent(); BasicBlock *Succ = BI->getSuccessor(0); - if (SinkCommon && SinkThenElseCodeToEnd(BI)) + if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. |