summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/Inline
diff options
context:
space:
mode:
authorEaswaran Raman <eraman@google.com>2017-01-20 22:44:04 +0000
committerEaswaran Raman <eraman@google.com>2017-01-20 22:44:04 +0000
commit12585b0148a104dad35feed91f440836b2a97623 (patch)
treea006d63085616b0253c1e68b99116de08fce9c95 /llvm/test/Transforms/Inline
parent760ad4da6006422c5a6097232b5ffc72d6b906dd (diff)
downloadbcm5719-llvm-12585b0148a104dad35feed91f440836b2a97623.tar.gz
bcm5719-llvm-12585b0148a104dad35feed91f440836b2a97623.zip
Improve PGO support for the new inliner
This adds the following to the new PM based inliner in PGO mode: * Use block frequency analysis to derive callsite's profile count and use that to adjust thresholds of hot and cold callsites. * Incrementally update the BFI of the caller after a callee gets inlined into it. This incremental update is only within an invocation of the run method - BFI is not preserved across calls to run. Update the function entry count of the callee after inlining it into a caller. * I've tuned the thresholds for the hot and cold callsites using a hacked up version of the old inliner that explicitly computes BFI on a set of internal benchmarks and spec. Once the new PM based pipeline stabilizes (IIRC Chandler mentioned there are known issues) I'll benchmark this again and adjust the thresholds if required. Inliner PGO support. Differential revision: https://reviews.llvm.org/D28331 llvm-svn: 292666
Diffstat (limited to 'llvm/test/Transforms/Inline')
-rw-r--r--llvm/test/Transforms/Inline/function-count-update-2.ll33
-rw-r--r--llvm/test/Transforms/Inline/function-count-update-3.ll78
-rw-r--r--llvm/test/Transforms/Inline/function-count-update.ll50
-rw-r--r--llvm/test/Transforms/Inline/inline-cold-callee.ll1
-rw-r--r--llvm/test/Transforms/Inline/inline-cold-callsite.ll54
-rw-r--r--llvm/test/Transforms/Inline/inline-hot-callsite-2.ll56
-rw-r--r--llvm/test/Transforms/Inline/inline-hot-callsite.ll2
7 files changed, 272 insertions, 2 deletions
diff --git a/llvm/test/Transforms/Inline/function-count-update-2.ll b/llvm/test/Transforms/Inline/function-count-update-2.ll
new file mode 100644
index 00000000000..702fa6292c2
--- /dev/null
+++ b/llvm/test/Transforms/Inline/function-count-update-2.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+
+; This tests that the function count of a callee gets correctly updated after it
+; has been inlined into a two callsites.
+
+; CHECK: @callee() !prof [[COUNT:![0-9]+]]
+define i32 @callee() !prof !1 {
+ ret i32 0
+}
+
+define i32 @caller1() !prof !2 {
+; CHECK-LABEL: @caller1
+; CHECK-NOT: callee
+; CHECK: ret
+ %i = call i32 @callee()
+ ret i32 %i
+}
+
+define i32 @caller2() !prof !3 {
+; CHECK-LABEL: @caller2
+; CHECK-NOT: callee
+; CHECK: ret
+ %i = call i32 @callee()
+ ret i32 %i
+}
+
+!llvm.module.flags = !{!0}
+; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
+!1 = !{!"function_entry_count", i64 1000}
+!2 = !{!"function_entry_count", i64 600}
+!3 = !{!"function_entry_count", i64 400}
+
diff --git a/llvm/test/Transforms/Inline/function-count-update-3.ll b/llvm/test/Transforms/Inline/function-count-update-3.ll
new file mode 100644
index 00000000000..215d64175fa
--- /dev/null
+++ b/llvm/test/Transforms/Inline/function-count-update-3.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 | FileCheck %s
+
+; This tests that the function count of a function gets properly scaled after
+; inlining a call chain leading to the function.
+; Function a calls c with count 200 (C1)
+; Function c calls e with count 250 (C2)
+; Entry count of e is 500 (C3)
+; Entry count of c is 500 (C4)
+; Function b calls c with count 300 (C5)
+; c->e inlining does not happen since the cost exceeds threshold.
+; c then inlined into a.
+; e now gets inlined into a (through c) since the branch condition in e is now
+; known and hence the cost gets reduced.
+; Estimated count of a->e callsite = C2 * (C1 / C4)
+; Estimated count of a->e callsite = 250 * (200 / 500) = 100
+; Remaining count of e = C3 - 100 = 500 - 100 = 400
+; Remaining count of c = C4 - C1 - C5 = 500 - 200 - 300 = 0
+
+@data = external global i32
+
+define i32 @a(i32 %a1) !prof !1 {
+ %a2 = call i32 @c(i32 %a1, i32 1)
+ ret i32 %a2
+}
+
+define i32 @b(i32 %b1) !prof !2 {
+ %b2 = call i32 @c(i32 %b1, i32 %b1)
+ ret i32 %b2
+}
+
+declare void @ext();
+
+; CHECK: @c(i32 %c1, i32 %c100) !prof [[COUNT1:![0-9]+]]
+define i32 @c(i32 %c1, i32 %c100) !prof !3 {
+ call void @ext()
+ %cond = icmp sle i32 %c1, 1
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_false:
+ ret i32 0
+
+cond_true:
+ %c11 = call i32 @e(i32 %c100)
+ ret i32 %c11
+}
+
+
+; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]]
+define i32 @e(i32 %c1) !prof !4 {
+ %cond = icmp sle i32 %c1, 1
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_false:
+ call void @ext()
+ %c2 = load i32, i32* @data, align 4
+ %c3 = add i32 %c1, %c2
+ %c4 = mul i32 %c3, %c2
+ %c5 = add i32 %c4, %c2
+ %c6 = mul i32 %c5, %c2
+ %c7 = add i32 %c6, %c2
+ %c8 = mul i32 %c7, %c2
+ %c9 = add i32 %c8, %c2
+ %c10 = mul i32 %c9, %c2
+ ret i32 %c10
+
+cond_true:
+ ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 0}
+; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 400}
+!0 = !{i32 1, !"MaxFunctionCount", i32 5000}
+!1 = !{!"function_entry_count", i64 200}
+!2 = !{!"function_entry_count", i64 300}
+!3 = !{!"function_entry_count", i64 500}
+!4 = !{!"function_entry_count", i64 500}
+
diff --git a/llvm/test/Transforms/Inline/function-count-update.ll b/llvm/test/Transforms/Inline/function-count-update.ll
new file mode 100644
index 00000000000..094ad5a2ae6
--- /dev/null
+++ b/llvm/test/Transforms/Inline/function-count-update.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+
+; This tests that the function count of two callees get correctly updated after
+; they have been inlined into two back-to-back callsites in a single basic block
+; in the caller. The callees have the alwaysinline attribute and so they get
+; inlined both with the regular inliner pass and the always inline pass. In
+; both cases, the new count of each callee is the original count minus callsite
+; count which is 200 (since the caller's entry count is 400 and the block
+; containing the calls have a relative block frequency of 0.5).
+
+; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]]
+define i32 @callee1(i32 %n) #0 !prof !1 {
+ %cond = icmp sle i32 %n, 10
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_true:
+ %r1 = add i32 %n, 1
+ ret i32 %r1
+cond_false:
+ %r2 = add i32 %n, 2
+ ret i32 %r2
+}
+
+; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]]
+define i32 @callee2(i32 %n) #0 !prof !2 {
+ %r1 = add i32 %n, 1
+ ret i32 %r1
+}
+
+define i32 @caller(i32 %n) !prof !3 {
+ %cond = icmp sle i32 %n, 100
+ br i1 %cond, label %cond_true, label %cond_false
+
+cond_true:
+ %i = call i32 @callee1(i32 %n)
+ %j = call i32 @callee2(i32 %i)
+ ret i32 %j
+cond_false:
+ ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800}
+; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800}
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
+!1 = !{!"function_entry_count", i64 1000}
+!2 = !{!"function_entry_count", i64 2000}
+!3 = !{!"function_entry_count", i64 400}
+attributes #0 = { alwaysinline }
+
diff --git a/llvm/test/Transforms/Inline/inline-cold-callee.ll b/llvm/test/Transforms/Inline/inline-cold-callee.ll
index 153f446c5c2..404c537b297 100644
--- a/llvm/test/Transforms/Inline/inline-cold-callee.ll
+++ b/llvm/test/Transforms/Inline/inline-cold-callee.ll
@@ -1,5 +1,4 @@
; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s
-; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inlinecold-threshold=0 -S | FileCheck %s
; This tests that a cold callee gets the (lower) inlinecold-threshold even without
; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold.
diff --git a/llvm/test/Transforms/Inline/inline-cold-callsite.ll b/llvm/test/Transforms/Inline/inline-cold-callsite.ll
new file mode 100644
index 00000000000..26ea8e50eaf
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-cold-callsite.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
+
+; This tests that a cold callsite gets the inline-cold-callsite-threshold
+; and does not get inlined. Another callsite to an identical callee that
+; is not cold gets inlined because cost is below the inline-threshold.
+
+define i32 @callee1(i32 %x) !prof !21 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+ call void @extern()
+ ret i32 %x3
+}
+
+define i32 @caller(i32 %n) !prof !22 {
+; CHECK-LABEL: @caller(
+ %cond = icmp sle i32 %n, 100
+ br i1 %cond, label %cond_true, label %cond_false, !prof !0
+
+cond_true:
+; CHECK-LABEL: cond_true:
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %i = call i32 @callee1(i32 %n)
+ ret i32 %i
+cond_false:
+; CHECK-LABEL: cond_false:
+; CHECK: call i32 @callee1
+; CHECK: ret i32 %j
+ %j = call i32 @callee1(i32 %n)
+ ret i32 %j
+}
+declare void @extern()
+
+!0 = !{!"branch_weights", i32 200, i32 1}
+
+!llvm.module.flags = !{!1}
+!21 = !{!"function_entry_count", i64 200}
+!22 = !{!"function_entry_count", i64 200}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite-2.ll b/llvm/test/Transforms/Inline/inline-hot-callsite-2.ll
new file mode 100644
index 00000000000..ccfe2f0b5de
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-hot-callsite-2.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=0 -inlinehint-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s
+
+; This tests that a callsite which is determined to be hot based on the caller's
+; entry count and the callsite block frequency gets the hot-callsite-threshold.
+; Another callsite with the same callee that is not hot does not get inlined
+; because cost exceeds the inline-threshold. inlinthint-threshold is set to 0
+; to ensure callee's hotness is not used to boost the threshold.
+
+define i32 @callee1(i32 %x) !prof !21 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+ call void @extern()
+ ret i32 %x3
+}
+
+define i32 @caller(i32 %n) !prof !22 {
+; CHECK-LABEL: @caller(
+ %cond = icmp sle i32 %n, 100
+ br i1 %cond, label %cond_true, label %cond_false, !prof !0
+
+cond_true:
+; CHECK-LABEL: cond_true:
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %i = call i32 @callee1(i32 %n)
+ ret i32 %i
+cond_false:
+; CHECK-LABEL: cond_false:
+; CHECK: call i32 @callee1
+; CHECK: ret i32 %j
+ %j = call i32 @callee1(i32 %n)
+ ret i32 %j
+}
+declare void @extern()
+
+!0 = !{!"branch_weights", i32 64, i32 4}
+
+!llvm.module.flags = !{!1}
+!21 = !{!"function_entry_count", i64 200}
+!22 = !{!"function_entry_count", i64 200}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite.ll b/llvm/test/Transforms/Inline/inline-hot-callsite.ll
index bdd7175b3ee..ebf4030d3d1 100644
--- a/llvm/test/Transforms/Inline/inline-hot-callsite.ll
+++ b/llvm/test/Transforms/Inline/inline-hot-callsite.ll
@@ -41,7 +41,7 @@ declare void @extern()
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
-!3 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
OpenPOWER on IntegriCloud