1 files changed, 11 insertions, 8 deletions
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 56eba691041..d33c4df70c6 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -747,21 +747,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   // Cleanup after the loop optimization passes.
   OptimizePM.addPass(InstCombinePass());
 
-
   // Now that we've formed fast to execute loop structures, we do further
   // optimizations. These are run afterward as they might block doing complex
   // analyses and transforms such as what are needed for loop vectorization.
 
-  // Optimize parallel scalar instruction chains into SIMD instructions.
-  OptimizePM.addPass(SLPVectorizerPass());
-
-  // Cleanup after all of the vectorizers. Simplification passes like CVP and
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
   // GVN, loop transforms, and others have already run, so it's now better to
   // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
   OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
-                                         forwardSwitchCondToPhi(true).
-                                         convertSwitchToLookupTable(true).
-                                         needCanonicalLoops(false)));
+                                     forwardSwitchCondToPhi(true).
+                                     convertSwitchToLookupTable(true).
+                                     needCanonicalLoops(false).
+                                     sinkCommonInsts(true)));
+
+  // Optimize parallel scalar instruction chains into SIMD instructions.
+  OptimizePM.addPass(SLPVectorizerPass());
+
   OptimizePM.addPass(InstCombinePass());
 
   // Unroll small loops to hide loop backedge latency and saturate any parallel