diff options
| -rw-r--r-- | llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/early-inline.ll | 2 | ||||
| -rw-r--r-- | llvm/test/Other/pass-pipelines.ll | 2 |
3 files changed, 17 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index d23892292c3..1a17508d2b2 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -489,9 +489,11 @@ void PassManagerBuilder::populateModulePassManager( // Start of CallGraph SCC passes. if (!DisableUnitAtATime) MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; if (Inliner) { MPM.add(Inliner); Inliner = nullptr; + RunInliner = true; } if (!DisableUnitAtATime) MPM.add(createPostOrderFunctionAttrsLegacyPass()); @@ -505,6 +507,18 @@ void PassManagerBuilder::populateModulePassManager( // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + + // The inliner performs some kind of dead code elimination as it goes, + // but there are cases that are not really caught by it. We might + // at some point consider teaching the inliner about them, but it + // is OK for now to run GlobalOpt + GlobalDCE in tandem as their + // benefits generally outweight the cost, making the whole pipeline + // faster. + if (RunInliner) { + MPM.add(createGlobalOptimizerPass()); + MPM.add(createGlobalDCEPass()); + } + if (RunPartialInlining) MPM.add(createPartialInliningPass()); diff --git a/llvm/test/CodeGen/AMDGPU/early-inline.ll b/llvm/test/CodeGen/AMDGPU/early-inline.ll index c871d54bec7..a4f970ee238 100644 --- a/llvm/test/CodeGen/AMDGPU/early-inline.ll +++ b/llvm/test/CodeGen/AMDGPU/early-inline.ll @@ -1,6 +1,5 @@ ; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s -; CHECK: @c_alias @c_alias = alias i32 (i32), i32 (i32)* @callee define i32 @callee(i32 %x) { @@ -17,6 +16,7 @@ entry: ; CHECK: mul i32 ; CHECK-NOT: call i32 +; CHECK: define i32 @c_alias define amdgpu_kernel void @caller(i32 %x) { entry: %res = call i32 @callee(i32 %x) diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll index d47c02ee7a4..9ecfc4f6201 100644 --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -56,6 +56,8 @@ ; a barrier pass. ; CHECK-O2: A No-Op Barrier Pass ; Reduce the size of the IR ASAP after the inliner. +; CHECK-O2-NEXT: Global Variable Optimizer +; CHECK-O2: Dead Global Elimination ; CHECK-O2-NEXT: Eliminate Available Externally ; Inferring function attribute should be right after the CGSCC pipeline, before ; any other optimizations/analyses. |

