summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/ProfileData/SampleProf.h5
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfReader.h9
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp28
-rw-r--r--llvm/test/CodeGen/X86/insert-prefetch-inline.afdo2
-rw-r--r--llvm/test/Transforms/SampleProfile/Inputs/einline.prof4
-rw-r--r--llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof2
-rw-r--r--llvm/test/Transforms/SampleProfile/Inputs/inline-mergeprof.prof13
-rw-r--r--llvm/test/Transforms/SampleProfile/inline-mergeprof.ll97
8 files changed, 152 insertions, 8 deletions
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 55418d9d0f9..f8be89c569b 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -387,7 +387,10 @@ public:
if (FS != iter->second.end())
return &FS->second;
// If we cannot find exact match of the callee name, return the FS with
- // the max total count.
+ // the max total count. Only do this when CalleeName is not provided,
+ // i.e., only for indirect calls.
+ if (!CalleeName.empty())
+ return nullptr;
uint64_t MaxTotalSamples = 0;
const FunctionSamples *R = nullptr;
for (const auto &NameFS : iter->second)
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 5a5d4cfde22..72b178edc26 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -358,6 +358,15 @@ public:
return getSamplesFor(CanonName);
}
+ /// Return the samples collected for function \p F, create empty
+ /// FunctionSamples if it doesn't exist.
+ FunctionSamples *getOrCreateSamplesFor(const Function &F) {
+ std::string FGUID;
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ CanonName = getRepInFormat(CanonName, getFormat(), FGUID);
+ return &Profiles[CanonName];
+ }
+
/// Return the samples collected for function \p F.
virtual FunctionSamples *getSamplesFor(StringRef Fname) {
if (Remapper) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 21461a609c9..0a3e6ada58b 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -137,6 +137,11 @@ static cl::opt<bool> ProfileAccurateForSymsInList(
cl::desc("For symbols in profile symbol list, regard their profiles to "
"be accurate. It may be overriden by profile-sample-accurate. "));
+static cl::opt<bool> ProfileMergeInlinee(
+ "sample-profile-merge-inlinee", cl::Hidden, cl::init(false),
+ cl::desc("Merge past inlinee's profile to outline version if sample "
+ "profile loader decided not to inline a call site."));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -1008,9 +1013,26 @@ bool SampleProfileLoader::inlineHotFunctions(
if (!Callee || Callee->isDeclaration())
continue;
const FunctionSamples *FS = Pair.getSecond();
- auto pair =
- notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
- pair.first->second.entryCount += FS->getEntrySamples();
+ if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
+ continue;
+ }
+
+ if (ProfileMergeInlinee) {
+ // Use entry samples as head samples during the merge, as inlinees
+ // don't have head samples.
+ assert(FS->getHeadSamples() == 0 && "Expect 0 head sample for inlinee");
+ const_cast<FunctionSamples *>(FS)->addHeadSamples(FS->getEntrySamples());
+
+ // Note that we have to do the merge right after processing function.
+ // This allows OutlineFS's profile to be used for annotation during
+ // top-down processing of functions' annotation.
+ FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+ OutlineFS->merge(*FS);
+ } else {
+ auto pair =
+ notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
+ pair.first->second.entryCount += FS->getEntrySamples();
+ }
}
return Changed;
}
diff --git a/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo b/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo
index 83b30f6e210..935b707ff10 100644
--- a/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo
+++ b/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo
@@ -1,4 +1,4 @@
caller:0:0
- 2:sum:0
+ 2: sum:0
3: 0 __prefetch_nta_0:23456
3.1: 0 __prefetch_nta_0:8764 __prefetch_nta_1:64 \ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/einline.prof b/llvm/test/Transforms/SampleProfile/Inputs/einline.prof
index 624990b47ef..1b0f3922658 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/einline.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/einline.prof
@@ -1,7 +1,7 @@
_Z3foov:200:100
- 1: _Z3barv:0
+ 1: _ZL3barv:0
2: no_inline:100
- 3: _Z3barv:100
+ 3: _ZL3barv:100
recursive:200:100
1: recursive:100
2: recursive:100
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof
index b33f2bf998b..ecd93127472 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof
@@ -4,7 +4,7 @@ sample_loader_inlinee:3000:0
1: direct_leaf_func:35000
11: 3000
test_cgscc_inline:63067:0
- 1: sample_loader_inlinee:1
+ 1: cgscc_inlinee:1
cgscc_inlinee:3000:0
1: direct_leaf_func:35000
11: 3000 \ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-mergeprof.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-mergeprof.prof
new file mode 100644
index 00000000000..96ac2decb7c
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-mergeprof.prof
@@ -0,0 +1,13 @@
+main:225715:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: _Z3sumii:46
+ 1: 23
+ 2: _Z3subii:2
+ 1: 2
+ 3: 21
+
+_Z3sumii:11:22
+ 1: 11
+ 2: 10 _Z3subii:10
+ 3: 1 \ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll
new file mode 100644
index 00000000000..8b5989f7a62
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll
@@ -0,0 +1,97 @@
+; Test we lose details of not inlined profile without '-sample-profile-merge-inlinee'
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -S | FileCheck -check-prefix=SCALE %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -S | FileCheck -check-prefix=SCALE %s
+
+; Test we properly merge not inlined profile properly with '-sample-profile-merge-inlinee'
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee -S | FileCheck -check-prefix=MERGE %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee -S | FileCheck -check-prefix=MERGE %s
+
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+define i32 @main() !dbg !6 {
+entry:
+ %retval = alloca i32, align 4
+ %s = alloca i32, align 4
+ %i = alloca i32, align 4
+ %tmp = load i32, i32* %i, align 4, !dbg !8
+ %tmp1 = load i32, i32* %s, align 4, !dbg !8
+ %call = call i32 @_Z3sumii(i32 %tmp, i32 %tmp1), !dbg !8
+; SCALE: call i32 @_Z3sumii
+; MERGE: call i32 @_Z3sumii
+ store i32 %call, i32* %s, align 4, !dbg !8
+ ret i32 0, !dbg !11
+}
+
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !12 {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4, !dbg !13
+ %tmp1 = load i32, i32* %y.addr, align 4, !dbg !13
+ %add = add nsw i32 %tmp, %tmp1, !dbg !13
+ %tmp2 = load i32, i32* %x.addr, align 4, !dbg !13
+ %tmp3 = load i32, i32* %y.addr, align 4, !dbg !13
+ %cmp1 = icmp ne i32 %tmp3, 100, !dbg !13
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !13
+
+if.then: ; preds = %entry
+ %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !14
+ ret i32 %add, !dbg !14
+
+if.else: ; preds = %entry
+ ret i32 %add, !dbg !15
+}
+
+define i32 @_Z3subii(i32 %x, i32 %y) !dbg !16 {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4, !dbg !17
+ %tmp1 = load i32, i32* %y.addr, align 4, !dbg !17
+ %add = sub nsw i32 %tmp, %tmp1, !dbg !17
+ ret i32 %add, !dbg !18
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.5 "}
+!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 10, scope: !9)
+!9 = !DILexicalBlockFile(scope: !10, file: !1, discriminator: 2)
+!10 = distinct !DILexicalBlock(scope: !6, file: !1, line: 10)
+!11 = !DILocation(line: 12, scope: !6)
+!12 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!13 = !DILocation(line: 4, scope: !12)
+!14 = !DILocation(line: 5, scope: !12)
+!15 = !DILocation(line: 6, scope: !12)
+!16 = distinct !DISubprogram(name: "sub", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!17 = !DILocation(line: 20, scope: !16)
+!18 = !DILocation(line: 21, scope: !16)
+
+; SCALE: name: "sum"
+; SCALE-NEXT: {!"function_entry_count", i64 46}
+; SCALE: !{!"branch_weights", i32 11, i32 2}
+; SCALE: !{!"branch_weights", i64 20}
+; SCALE: name: "sub"
+; SCALE-NEXT: {!"function_entry_count", i64 -1}
+
+; MERGE: name: "sum"
+; MERGE-NEXT: {!"function_entry_count", i64 46}
+; MERGE: !{!"branch_weights", i32 11, i32 23}
+; MERGE: !{!"branch_weights", i32 10}
+; MERGE: name: "sub"
+; MERGE-NEXT: {!"function_entry_count", i64 3} \ No newline at end of file
OpenPOWER on IntegriCloud