[SimplifyCFG] don't sink common insts too soon (PR34603)

This should solve: https://bugs.llvm.org/show_bug.cgi?id=34603 ...by preventing SimplifyCFG from altering redundant instructions before early-cse has a chance to run. It changes the default (canonical-forming) behavior of SimplifyCFG, so we're only doing the sinking transform later in the optimization pipeline. Differential Revision: https://reviews.llvm.org/D38566 llvm-svn: 320749
author: Sanjay Patel <spatel@rotateright.com> 2017-12-14 22:05:20 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-12-14 22:05:20 +0000
commit: 0ab0c1a201ece292fdced1f913fa257bdcb5280f (patch)
tree: e896fb6f22c7ce9f2106707b0796e7cab2c2c59e /llvm/lib
parent: 34ccadcea9eba33d2e410b2af843a3da602611bf (diff)
download: bcm5719-llvm-0ab0c1a201ece292fdced1f913fa257bdcb5280f.tar.gz
bcm5719-llvm-0ab0c1a201ece292fdced1f913fa257bdcb5280f.zip
6 files changed, 36 insertions, 15 deletions
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 56eba691041..d33c4df70c6 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -747,21 +747,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   // Cleanup after the loop optimization passes.
   OptimizePM.addPass(InstCombinePass());
 
-
   // Now that we've formed fast to execute loop structures, we do further
   // optimizations. These are run afterward as they might block doing complex
   // analyses and transforms such as what are needed for loop vectorization.
 
-  // Optimize parallel scalar instruction chains into SIMD instructions.
-  OptimizePM.addPass(SLPVectorizerPass());
-
-  // Cleanup after all of the vectorizers. Simplification passes like CVP and
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
   // GVN, loop transforms, and others have already run, so it's now better to
   // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
   OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
-                                         forwardSwitchCondToPhi(true).
-                                         convertSwitchToLookupTable(true).
-                                         needCanonicalLoops(false)));
+                                     forwardSwitchCondToPhi(true).
+                                     convertSwitchToLookupTable(true).
+                                     needCanonicalLoops(false).
+                                     sinkCommonInsts(true)));
+
+  // Optimize parallel scalar instruction chains into SIMD instructions.
+  OptimizePM.addPass(SLPVectorizerPass());
+
   OptimizePM.addPass(InstCombinePass());
 
   // Unroll small loops to hide loop backedge latency and saturate any parallel
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 21b8bfff52c..64583ead73f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -365,7 +365,7 @@ void AArch64PassConfig::addIRPasses() {
   // determine whether it succeeded. We can exploit existing control-flow in
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
-    addPass(createCFGSimplificationPass(1, true, true, false));
+    addPass(createCFGSimplificationPass(1, true, true, false, true));
 
   // Run LoopDataPrefetch
   //
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 007dc2be16e..51982b2dab1 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -385,7 +385,7 @@ void ARMPassConfig::addIRPasses() {
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
     addPass(createCFGSimplificationPass(
-        1, false, false, true, [this](const Function &F) {
+        1, false, false, true, true, [this](const Function &F) {
           const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
           return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
         }));
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index b5d32a1ca57..3855e6245d8 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -630,6 +630,13 @@ void PassManagerBuilder::populateModulePassManager(
     addInstructionCombiningPass(MPM);
   }
 
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+  // GVN, loop transforms, and others have already run, so it's now better to
+  // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
+  MPM.add(createCFGSimplificationPass(1, true, true, false, true));
+
   if (RunSLPAfterLoopVectorization && SLPVectorize) {
     MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
     if (OptLevel > 1 && ExtraVectorizerPasses) {
@@ -638,9 +645,6 @@ void PassManagerBuilder::populateModulePassManager(
   }
 
   addExtensionsToPM(EP_Peephole, MPM);
-  // Switches to lookup tables and other transforms that may not be considered
-  // canonical by other IR passes.
-  MPM.add(createCFGSimplificationPass(1, true, true, false));
   addInstructionCombiningPass(MPM);
 
   if (!DisableUnrollLoops) {
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 789e0a47793..1522170dc3b 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -61,6 +61,11 @@ static cl::opt<bool> UserForwardSwitchCond(
     "forward-switch-cond", cl::Hidden, cl::init(false),
     cl::desc("Forward switch condition to phi ops (default = false)"));
 
+static cl::opt<bool> UserSinkCommonInsts(
+    "sink-common-insts", cl::Hidden, cl::init(false),
+    cl::desc("Sink common instructions (default = false)"));
+
+
 STATISTIC(NumSimpl, "Number of blocks simplified");
 
 /// If we have more than one empty (other than phi node) return blocks,
@@ -205,6 +210,9 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
   Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
                                   ? UserKeepLoops
                                   : Opts.NeedCanonicalLoop;
+  Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
+                                ? UserSinkCommonInsts
+                                : Opts.SinkCommonInsts;
 }
 
 PreservedAnalyses SimplifyCFGPass::run(Function &F,
@@ -226,6 +234,7 @@ struct CFGSimplifyPass : public FunctionPass {
 
   CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
                   bool ConvertSwitch = false, bool KeepLoops = true,
+                  bool SinkCommon = false,
                   std::function<bool(const Function &)> Ftor = nullptr)
       : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
 
@@ -246,6 +255,10 @@ struct CFGSimplifyPass : public FunctionPass {
 
     Options.NeedCanonicalLoop =
         UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
+
+    Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
+                                  ? UserSinkCommonInsts
+                                  : SinkCommon;
   }
 
   bool runOnFunction(Function &F) override {
@@ -276,7 +289,8 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
 FunctionPass *
 llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
                                   bool ConvertSwitch, bool KeepLoops,
+                                  bool SinkCommon,
                                   std::function<bool(const Function &)> Ftor) {
   return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
-                             KeepLoops, std::move(Ftor));
+                             KeepLoops, SinkCommon, std::move(Ftor));
 }
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 394c951630c..f02f80cc1b7 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5728,7 +5728,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
   BasicBlock *BB = BI->getParent();
   BasicBlock *Succ = BI->getSuccessor(0);
 
-  if (SinkCommon && SinkThenElseCodeToEnd(BI))
+  if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
     return true;
 
   // If the Terminator is the only non-phi instruction, simplify the block.
author	Sanjay Patel <spatel@rotateright.com>	2017-12-14 22:05:20 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-12-14 22:05:20 +0000
commit	0ab0c1a201ece292fdced1f913fa257bdcb5280f (patch)
tree	e896fb6f22c7ce9f2106707b0796e7cab2c2c59e /llvm/lib
parent	34ccadcea9eba33d2e410b2af843a3da602611bf (diff)
download	bcm5719-llvm-0ab0c1a201ece292fdced1f913fa257bdcb5280f.tar.gz bcm5719-llvm-0ab0c1a201ece292fdced1f913fa257bdcb5280f.zip