summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWei Mi <wmi@google.com>2019-09-23 22:11:35 +0000
committerWei Mi <wmi@google.com>2019-09-23 22:11:35 +0000
commit22fd88530b11af92e1c2658c755d3915538a844d (patch)
tree4ac6500419878ada1ffd254d8c369d7b57da616e
parent28b38c277a2941e9e891b2db30652cfd962f070b (diff)
downloadbcm5719-llvm-22fd88530b11af92e1c2658c755d3915538a844d.tar.gz
bcm5719-llvm-22fd88530b11af92e1c2658c755d3915538a844d.zip
[SampleFDO] Treat names in profile as not cold only when profile symbol list
is available In rL372232, we treated names showing up in profile as not cold when profile-sample-accurate is enabled. This caused 70k size regression in Chrome/Android. The patch put a guard and only enable the change when profile symbol list is available, i.e., keep the old behavior when profile symbol list is not available. Differential Revision: https://reviews.llvm.org/D67931 llvm-svn: 372665
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp45
-rw-r--r--llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll28
2 files changed, 44 insertions, 29 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 9c9d851a470..b647818345d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1770,27 +1770,32 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
if (ProfSampleAccEnabled) {
// PSL -- profile symbol list include all the symbols in sampled binary.
// It is used to prevent new functions to be treated as cold.
- // If ProfileSampleAccurate is true or F has profile-sample-accurate
- // attribute, and if there is no profile symbol list read in, initialize
- // all the function entry counts to 0; if there is profile symbol list, only
- // initialize the entry count to 0 when current function is in the list.
- if (!PSL || PSL->contains(F.getName()))
+ if (PSL) {
+ // Profile symbol list is available, initialize the entry count to 0
+ // only for functions in the list.
+ if (PSL->contains(F.getName()))
+ initialEntryCount = 0;
+
+ // When ProfileSampleAccurate is true, function without sample will be
+ // regarded as cold. To minimize the potential negative performance
+ // impact it could have, we want to be a little conservative here
+ // saying if a function shows up in the profile, no matter as outline
+ // function, inline instance or call targets, treat the function as not
+ // being cold. This will handle the cases such as most callsites of a
+ // function are inlined in sampled binary but not inlined in current
+ // build (because of source code drift, imprecise debug information, or
+ // the callsites are all cold individually but not cold
+ // accumulatively...), so the outline function showing up as cold in
+ // sampled binary will actually not be cold after current build.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (NamesInProfile.count(CanonName))
+ initialEntryCount = -1;
+ } else {
+ // If there is no profile symbol list available, initialize all the
+ // function entry counts to 0. It means all the functions without
+ // profile will be regarded as cold.
initialEntryCount = 0;
-
- // When ProfileSampleAccurate is true, function without sample will be
- // regarded as cold. To minimize the potential negative performance
- // impact it could have, we want to be a little conservative here
- // saying if a function shows up in the profile, no matter as outline
- // function, inline instance or call targets, treat the function as not
- // being cold. This will handle the cases such as most callsites of a
- // function are inlined in sampled binary but not inlined in current
- // build (because of source code drift, imprecise debug information, or
- // the callsites are all cold individually but not cold accumulatively...),
- // so the outline function showing up as cold in sampled binary will
- // actually not be cold after current build.
- StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
- if (NamesInProfile.count(CanonName))
- initialEntryCount = -1;
+ }
}
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
diff --git a/llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll b/llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll
index 31b2d363e6d..a469537e0ee 100644
--- a/llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll
@@ -1,11 +1,9 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s
-; With the hot cutoff being set to 600000, the inline instance of _Z3sumii
-; in main is neither hot nor cold. Check it will still be inlined when
-; profile-sample-accurate is enabled, and check _Z3sumii's function entry
-; count won't be initialized to 0 because it shows up in the profile as
-; inline instance.
-
+; RUN: llvm-profdata merge -sample -extbinary -prof-sym-list=%S/Inputs/profile-symbol-list.text %S/Inputs/profsampleacc.extbinary.afdo -o %t.symlist.afdo
+; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=PROFSYMLIST
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=PROFSYMLIST
+;
; Original C++ test case
;
; #include <stdio.h>
@@ -24,7 +22,13 @@
;
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
-; CHECK: define i32 @_Z3sumii{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
+; Check _Z3sumii's function entry count will be intialized to 0 if no profile
+; symbol list is available.
+; If symbol list is available, _Z3sumii's function entry count will be
+; initialized to -1 if it shows up in the profile.
+;
+; CHECK: define i32 @_Z3sumii{{.*}}!prof ![[ZERO_ID:[0-9]+]]
+; PROFSYMLIST: define i32 @_Z3sumii{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
; Function Attrs: nounwind uwtable
define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
entry:
@@ -60,10 +64,15 @@ while.body: ; preds = %while.cond
%cmp1 = icmp ne i32 %1, 100, !dbg !16
br i1 %cmp1, label %if.then, label %if.else, !dbg !16
-; Check _Z3sumii is inlined at this callsite.
+; With the hot cutoff being set to 600000, the inline instance of _Z3sumii
+; in main is neither hot nor cold. Check it will still be inlined when
+; profile-sample-accurate is enabled.
; CHECK: if.then:
; CHECK-NOT: call i32 @_Z3sumii
; CHECK: if.else:
+; PROFSYMLIST: if.then:
+; PROFSYMLIST-NOT: call i32 @_Z3sumii
+; PROFSYMLIST: if.else:
if.then: ; preds = %while.body
%2 = load i32, i32* %i, align 4, !dbg !18
%3 = load i32, i32* %s, align 4, !dbg !18
@@ -90,7 +99,8 @@ declare i32 @printf(i8*, ...) #2
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
+; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; PROFSYMLIST: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
OpenPOWER on IntegriCloud