summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/SampleProfile
diff options
context:
space:
mode:
authorWei Mi <wmi@google.com>2018-05-10 23:02:27 +0000
committerWei Mi <wmi@google.com>2018-05-10 23:02:27 +0000
commit0c2f6be662d8b034b7f694df360a216463494fca (patch)
tree53a7688efab782f4ae9b77c81b6f55e44950eb7a /llvm/test/Transforms/SampleProfile
parente0b5f86b3083747beaf5d7639333af0109c9e6ef (diff)
downloadbcm5719-llvm-0c2f6be662d8b034b7f694df360a216463494fca.tar.gz
bcm5719-llvm-0c2f6be662d8b034b7f694df360a216463494fca.zip
[SampleFDO] Don't treat warm callsite with inline instance in the profile as cold
We found current sampleFDO had a performance issue when triaging a regression. For a callsite with inline instance in the profile, even if hot callsite inliner cannot inline it, it may still execute enough times and should not be treated as cold in regular inliner later. However, currently if such callsite is not inlined by hot callsite inliner, and the BB where the callsite locates doesn't get samples from other instructions inside of it, the callsite will have no profile metadata annotated. In regular inliner cost analysis, if the callsite has no profile annotated and its caller has profile information, it will be treated as cold. The fix changes the isCallsiteHot check and chooses to compare CallsiteTotalSamples with hot cutoff value computed by ProfileSummaryInfo. Differential Revision: https://reviews.llvm.org/D45377 llvm-svn: 332058
Diffstat (limited to 'llvm/test/Transforms/SampleProfile')
-rw-r--r--llvm/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof11
-rw-r--r--llvm/test/Transforms/SampleProfile/function_metadata.ll2
-rw-r--r--llvm/test/Transforms/SampleProfile/inline.ll4
-rw-r--r--llvm/test/Transforms/SampleProfile/warm-inline-instance.ll115
4 files changed, 129 insertions, 3 deletions
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof b/llvm/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof
new file mode 100644
index 00000000000..a1b0e27dd58
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof
@@ -0,0 +1,11 @@
+main:2257150:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: foo:5860
+ 0: 5279
+ 1: 5279
+ 2: 5279
+ 4.1: goo:60
+ 0: 20
+ 1: 20
+ 2: 20
diff --git a/llvm/test/Transforms/SampleProfile/function_metadata.ll b/llvm/test/Transforms/SampleProfile/function_metadata.ll
index e6e57dba984..41cff6e7977 100644
--- a/llvm/test/Transforms/SampleProfile/function_metadata.ll
+++ b/llvm/test/Transforms/SampleProfile/function_metadata.ll
@@ -28,7 +28,7 @@ define void @test_liveness() !dbg !12 {
; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to
; make sure hot inline stacks are imported.
-; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713}
+; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7546896869197086323, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713}
; Check GUIDs for both foo and foo_available are included in the metadata to
; make sure the liveness analysis can capture the dependency from test_liveness
diff --git a/llvm/test/Transforms/SampleProfile/inline.ll b/llvm/test/Transforms/SampleProfile/inline.ll
index 17e0c5990f4..bd7b024c553 100644
--- a/llvm/test/Transforms/SampleProfile/inline.ll
+++ b/llvm/test/Transforms/SampleProfile/inline.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
; Original C++ test case
;
diff --git a/llvm/test/Transforms/SampleProfile/warm-inline-instance.ll b/llvm/test/Transforms/SampleProfile/warm-inline-instance.ll
new file mode 100644
index 00000000000..622db49b707
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/warm-inline-instance.ll
@@ -0,0 +1,115 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/warm-inline-instance.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/warm-inline-instance.prof -S | FileCheck %s
+
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32 %x, i32 %y) !dbg !4 {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %t0 = load i32, i32* %x.addr, align 4, !dbg !11
+ %t1 = load i32, i32* %y.addr, align 4, !dbg !11
+ %add = add nsw i32 %t0, %t1, !dbg !11
+ ret i32 %add, !dbg !11
+}
+
+define i32 @goo(i32 %x, i32 %y) {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %t0 = load i32, i32* %x.addr, align 4, !dbg !11
+ %t1 = load i32, i32* %y.addr, align 4, !dbg !11
+ %add = add nsw i32 %t0, %t1, !dbg !11
+ ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+ %retval = alloca i32, align 4
+ %s = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval
+ store i32 0, i32* %i, align 4, !dbg !12
+ br label %while.cond, !dbg !13
+
+while.cond: ; preds = %if.end, %entry
+ %t0 = load i32, i32* %i, align 4, !dbg !14
+ %inc = add nsw i32 %t0, 1, !dbg !14
+ store i32 %inc, i32* %i, align 4, !dbg !14
+ %cmp = icmp slt i32 %t0, 400000000, !dbg !14
+ br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body: ; preds = %while.cond
+ %t1 = load i32, i32* %i, align 4, !dbg !16
+ %cmp1 = icmp ne i32 %t1, 100, !dbg !16
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+if.then: ; preds = %while.body
+ %t2 = load i32, i32* %i, align 4, !dbg !18
+ %t3 = load i32, i32* %s, align 4, !dbg !18
+; Although the ratio of total samples of @foo vs total samples of @main is
+; small, since the total samples count is larger than hot cutoff computed by
+; ProfileSummaryInfo, we will still regard the callsite of foo as hot and
+; early inlining will inline it.
+; CHECK-LABEL: @main(
+; CHECK-NOT: call i32 @foo(i32 %t2, i32 %t3)
+ %call1 = call i32 @foo(i32 %t2, i32 %t3), !dbg !18
+ store i32 %call1, i32* %s, align 4, !dbg !18
+ br label %if.end, !dbg !18
+
+if.else: ; preds = %while.body
+; call @goo 's basicblock doesn't get any sample, so no profile will be annotated.
+; CHECK: call i32 @goo(i32 2, i32 3), !dbg !{{[0-9]+}}
+; CHECK-NOT: !prof
+; CHECK-SAME: {{$}}
+ %call2 = call i32 @goo(i32 2, i32 3), !dbg !26
+ store i32 %call2, i32* %s, align 4, !dbg !20
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ br label %while.cond, !dbg !22
+
+while.end: ; preds = %while.cond
+ %t4 = load i32, i32* %s, align 4, !dbg !24
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %t4), !dbg !24
+ ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)
+!26 = !DILocation(line: 11, scope: !19)
OpenPOWER on IntegriCloud