summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp14
-rw-r--r--llvm/test/CodeGen/AMDGPU/early-inline.ll2
-rw-r--r--llvm/test/Other/pass-pipelines.ll2
3 files changed, 17 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index d23892292c3..1a17508d2b2 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -489,9 +489,11 @@ void PassManagerBuilder::populateModulePassManager(
// Start of CallGraph SCC passes.
if (!DisableUnitAtATime)
MPM.add(createPruneEHPass()); // Remove dead EH info
+ bool RunInliner = false;
if (Inliner) {
MPM.add(Inliner);
Inliner = nullptr;
+ RunInliner = true;
}
if (!DisableUnitAtATime)
MPM.add(createPostOrderFunctionAttrsLegacyPass());
@@ -505,6 +507,18 @@ void PassManagerBuilder::populateModulePassManager(
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+
+ // The inliner performs some kind of dead code elimination as it goes,
+ // but there are cases that are not really caught by it. We might
+ // at some point consider teaching the inliner about them, but it
+ // is OK for now to run GlobalOpt + GlobalDCE in tandem as their
+ // benefits generally outweight the cost, making the whole pipeline
+ // faster.
+ if (RunInliner) {
+ MPM.add(createGlobalOptimizerPass());
+ MPM.add(createGlobalDCEPass());
+ }
+
if (RunPartialInlining)
MPM.add(createPartialInliningPass());
diff --git a/llvm/test/CodeGen/AMDGPU/early-inline.ll b/llvm/test/CodeGen/AMDGPU/early-inline.ll
index c871d54bec7..a4f970ee238 100644
--- a/llvm/test/CodeGen/AMDGPU/early-inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/early-inline.ll
@@ -1,6 +1,5 @@
; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s
-; CHECK: @c_alias
@c_alias = alias i32 (i32), i32 (i32)* @callee
define i32 @callee(i32 %x) {
@@ -17,6 +16,7 @@ entry:
; CHECK: mul i32
; CHECK-NOT: call i32
+; CHECK: define i32 @c_alias
define amdgpu_kernel void @caller(i32 %x) {
entry:
%res = call i32 @callee(i32 %x)
diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll
index d47c02ee7a4..9ecfc4f6201 100644
--- a/llvm/test/Other/pass-pipelines.ll
+++ b/llvm/test/Other/pass-pipelines.ll
@@ -56,6 +56,8 @@
; a barrier pass.
; CHECK-O2: A No-Op Barrier Pass
; Reduce the size of the IR ASAP after the inliner.
+; CHECK-O2-NEXT: Global Variable Optimizer
+; CHECK-O2: Dead Global Elimination
; CHECK-O2-NEXT: Eliminate Available Externally
; Inferring function attribute should be right after the CGSCC pipeline, before
; any other optimizations/analyses.
OpenPOWER on IntegriCloud