diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2017-08-02 02:09:22 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2017-08-02 02:09:22 +0000 |
| commit | 95055d8f8b0757cec2c9078e6eca982d7b0f997c (patch) | |
| tree | 993392fde9872877bb051bbefdb5aadd59b4ba98 /llvm/test/Transforms/Inline | |
| parent | 8e8f8f43b043b1839973fcc28694ca8d220a2137 (diff) | |
| download | bcm5719-llvm-95055d8f8b0757cec2c9078e6eca982d7b0f997c.tar.gz bcm5719-llvm-95055d8f8b0757cec2c9078e6eca982d7b0f997c.zip | |
[PM] Fix a bug where through CGSCC iteration we can get
infinite-inlining across multiple runs of the inliner by keeping a tiny
history of internal-to-SCC inlining decisions.
This is still a bit gross, but I don't yet have any fundamentally better
ideas and numerous people are blocked on this to use new PM and ThinLTO
together.
The core of the idea is to detect when we are about to do an inline that
has a chance of re-splitting an SCC which we have split before with
a similar inlining step. That is a critical component in the inlining
forming a cycle and so far detects all of the various cyclic patterns
I can come up with as well as the original real-world test case (which
comes from a ThinLTO build of libunwind).
I've added some tests that I think really demonstrate what is going on
here. They are essentially state machines that march the inliner through
various steps of a cycle and check that we stop when the cycle is closed
and that we actually did do inlining to form that cycle.
A lot of thanks go to Eric Christopher and Sanjoy Das for the help
understanding this issue and improving the test cases.
The biggest "yuck" here is the layering issue -- the CGSCC pass manager
is providing somewhat magical state to the inliner for it to use to make
itself converge. This isn't great, but I don't honestly have a lot of
better ideas yet and at least seems nicely isolated.
I have tested this patch, and it doesn't block *any* inlining on the
entire LLVM test suite and SPEC, so it seems sufficiently narrowly
targeted to the issue at hand.
We have come up with hypothetical issues that this patch doesn't cover,
but so far none of them are practical and we don't have a viable
solution yet that covers the hypothetical stuff, so proceeding here in
the interim. Definitely an area that we will be back and revisiting in
the future.
Differential Revision: https://reviews.llvm.org/D36188
llvm-svn: 309784
Diffstat (limited to 'llvm/test/Transforms/Inline')
| -rw-r--r-- | llvm/test/Transforms/Inline/cgscc-cycle.ll | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/llvm/test/Transforms/Inline/cgscc-cycle.ll b/llvm/test/Transforms/Inline/cgscc-cycle.ll new file mode 100644 index 00000000000..69874c3ef2f --- /dev/null +++ b/llvm/test/Transforms/Inline/cgscc-cycle.ll @@ -0,0 +1,125 @@ +; This test contains extremely tricky call graph structures for the inliner to +; handle correctly. They form cycles where the inliner introduces code that is +; immediately or can eventually be transformed back into the original code. And +; each step changes the call graph and so will trigger iteration. This requires +; some out-of-band way to prevent infinitely re-inlining and re-transforming the +; code. +; +; RUN: opt < %s -passes='cgscc(inline,function(sroa,instcombine))' -S | FileCheck %s + + +; The `test1_*` collection of functions form a directly cycling pattern. + +define void @test1_a(i8** %ptr) { +; CHECK-LABEL: define void @test1_a( +entry: + call void @test1_b(i8* bitcast (void (i8*, i1, i32)* @test1_b to i8*), i1 false, i32 0) +; Inlining and simplifying this call will reliably produce the exact same call, +; over and over again. However, each inlining increments the count, and so we +; expect this test case to stop after one round of inlining with a final +; argument of '1'. +; CHECK-NOT: call +; CHECK: call void @test1_b(i8* bitcast (void (i8*, i1, i32)* @test1_b to i8*), i1 false, i32 1) +; CHECK-NOT: call + + ret void +} + +define void @test1_b(i8* %arg, i1 %flag, i32 %inline_count) { +; CHECK-LABEL: define void @test1_b( +entry: + %a = alloca i8* + store i8* %arg, i8** %a +; This alloca and store should remain through any optimization. +; CHECK: %[[A:.*]] = alloca +; CHECK: store i8* %arg, i8** %[[A]] + + br i1 %flag, label %bb1, label %bb2 + +bb1: + call void @test1_a(i8** %a) noinline + br label %bb2 + +bb2: + %cast = bitcast i8** %a to void (i8*, i1, i32)** + %p = load void (i8*, i1, i32)*, void (i8*, i1, i32)** %cast + %inline_count_inc = add i32 %inline_count, 1 + call void %p(i8* %arg, i1 %flag, i32 %inline_count_inc) +; And we should continue to load and call indirectly through optimization. +; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i1, i32)** +; CHECK: %[[P:.*]] = load void (i8*, i1, i32)*, void (i8*, i1, i32)** %[[CAST]] +; CHECK: call void %[[P]]( + + ret void +} + +define void @test2_a(i8** %ptr) { +; CHECK-LABEL: define void @test2_a( +entry: + call void @test2_b(i8* bitcast (void (i8*, i8*, i1, i32)* @test2_b to i8*), i8* bitcast (void (i8*, i8*, i1, i32)* @test2_c to i8*), i1 false, i32 0) +; Inlining and simplifying this call will reliably produce the exact same call, +; but only after doing two rounds if inlining, first from @test2_b then +; @test2_c. We check the exact number of inlining rounds before we cut off to +; break the cycle by inspecting the last paramater that gets incremented with +; each inlined function body. +; CHECK-NOT: call +; CHECK: call void @test2_b(i8* bitcast (void (i8*, i8*, i1, i32)* @test2_b to i8*), i8* bitcast (void (i8*, i8*, i1, i32)* @test2_c to i8*), i1 false, i32 2) +; CHECK-NOT: call + ret void +} + +define void @test2_b(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count) { +; CHECK-LABEL: define void @test2_b( +entry: + %a = alloca i8* + store i8* %arg2, i8** %a +; This alloca and store should remain through any optimization. +; CHECK: %[[A:.*]] = alloca +; CHECK: store i8* %arg2, i8** %[[A]] + + br i1 %flag, label %bb1, label %bb2 + +bb1: + call void @test2_a(i8** %a) noinline + br label %bb2 + +bb2: + %p = load i8*, i8** %a + %cast = bitcast i8* %p to void (i8*, i8*, i1, i32)* + %inline_count_inc = add i32 %inline_count, 1 + call void %cast(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count_inc) +; And we should continue to load and call indirectly through optimization. +; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i8*, i1, i32)** +; CHECK: %[[P:.*]] = load void (i8*, i8*, i1, i32)*, void (i8*, i8*, i1, i32)** %[[CAST]] +; CHECK: call void %[[P]]( + + ret void +} + +define void @test2_c(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count) { +; CHECK-LABEL: define void @test2_c( +entry: + %a = alloca i8* + store i8* %arg1, i8** %a +; This alloca and store should remain through any optimization. +; CHECK: %[[A:.*]] = alloca +; CHECK: store i8* %arg1, i8** %[[A]] + + br i1 %flag, label %bb1, label %bb2 + +bb1: + call void @test2_a(i8** %a) noinline + br label %bb2 + +bb2: + %p = load i8*, i8** %a + %cast = bitcast i8* %p to void (i8*, i8*, i1, i32)* + %inline_count_inc = add i32 %inline_count, 1 + call void %cast(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count_inc) +; And we should continue to load and call indirectly through optimization. +; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i8*, i1, i32)** +; CHECK: %[[P:.*]] = load void (i8*, i8*, i1, i32)*, void (i8*, i8*, i1, i32)** %[[CAST]] +; CHECK: call void %[[P]]( + + ret void +} |

