diff options
author | Piotr Padlewski <piotr.padlewski@gmail.com> | 2016-09-26 20:37:32 +0000 |
---|---|---|
committer | Piotr Padlewski <piotr.padlewski@gmail.com> | 2016-09-26 20:37:32 +0000 |
commit | d9830eb79fdc42368d370abeab9a3b56c08e3963 (patch) | |
tree | 20fab6b2cd9ae3a74f212b7d4bd4814727a91717 /llvm/lib/Bitcode/Reader/BitcodeReader.cpp | |
parent | 2cd84c905df4fe9a6005320a45e98cd4b8ade045 (diff) | |
download | bcm5719-llvm-d9830eb79fdc42368d370abeab9a3b56c08e3963.tar.gz bcm5719-llvm-d9830eb79fdc42368d370abeab9a3b56c08e3963.zip |
[thinlto] Basic thinlto fdo heuristic
Summary:
This patch improves thinlto importer
by importing 3x larger functions that are called from hot block.
I compared performance with the trunk on spec, and there
were about 2% on povray and 3.33% on milc. These results seems
to be consistant and match the results Teresa got with her simple
heuristic. Some benchmarks got slower but I think they are just
noisy (mcf, xalancbmki, omnetpp)- running the benchmarks again with
more iterations to confirm. Geomean of all benchmarks including the noisy ones
were about +0.02%.
I see much better improvement on google branch with Easwaran patch
for pgo callsite inlining (the inliner actually inline those big functions)
Over all I see +0.5% improvement, and I get +8.65% on povray.
So I guess we will see much bigger change when Easwaran patch will land
(it depends on new pass manager), but it is still worth putting this to trunk
before it.
Implementation details changes:
- Removed CallsiteCount.
- ProfileCount got replaced by Hotness
- hot-import-multiplier is set to 3.0 for now,
didn't have time to tune it up, but I see that we get most of the interesting
functions with 3, so there is no much performance difference with higher, and
binary size doesn't grow as much as with 10.0.
Reviewers: eraman, mehdi_amini, tejohnson
Subscribers: mehdi_amini, llvm-commits
Differential Revision: https://reviews.llvm.org/D24638
llvm-svn: 282437
Diffstat (limited to 'llvm/lib/Bitcode/Reader/BitcodeReader.cpp')
-rw-r--r-- | llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 57 |
1 files changed, 38 insertions, 19 deletions
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 49bf8136f4e..101e8eba6b1 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -651,6 +651,9 @@ private: std::pair<GlobalValue::GUID, GlobalValue::GUID> getGUIDFromValueId(unsigned ValueId); + std::pair<GlobalValue::GUID, CalleeInfo::HotnessType> + readCallGraphEdge(const SmallVector<uint64_t, 64> &Record, unsigned int &I, + bool IsOldProfileFormat, bool HasProfile); }; } // end anonymous namespace @@ -6218,8 +6221,10 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { return error("Invalid Summary Block: version expected"); } const uint64_t Version = Record[0]; - if (Version != 1) - return error("Invalid summary version " + Twine(Version) + ", 1 expected"); + const bool IsOldProfileFormat = Version == 1; + if (!IsOldProfileFormat && Version != 2) + return error("Invalid summary version " + Twine(Version) + + ", 1 or 2 expected"); Record.clear(); // Keep around the last seen summary to be used when we see an optional @@ -6264,10 +6269,10 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { default: // Default behavior: ignore. break; // FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, - // n x (valueid, callsitecount)] + // n x (valueid)] // FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, hotness)] case bitc::FS_PERMODULE: case bitc::FS_PERMODULE_PROFILE: { unsigned ValueID = Record[0]; @@ -6296,12 +6301,11 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE); for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E; ++I) { - unsigned CalleeValueId = Record[I]; - unsigned CallsiteCount = Record[++I]; - uint64_t ProfileCount = HasProfile ? Record[++I] : 0; - GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first; - FS->addCallGraphEdge(CalleeGUID, - CalleeInfo(CallsiteCount, ProfileCount)); + CalleeInfo::HotnessType Hotness; + GlobalValue::GUID CalleeGUID; + std::tie(CalleeGUID, Hotness) = + readCallGraphEdge(Record, I, IsOldProfileFormat, HasProfile); + FS->addCallGraphEdge(CalleeGUID, CalleeInfo(Hotness)); } auto GUID = getGUIDFromValueId(ValueID); FS->setOriginalName(GUID.second); @@ -6356,10 +6360,9 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { break; } // FS_COMBINED: [valueid, modid, flags, instcount, numrefs, - // numrefs x valueid, n x (valueid, callsitecount)] + // numrefs x valueid, n x (valueid)] // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs, - // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // numrefs x valueid, n x (valueid, hotness)] case bitc::FS_COMBINED: case bitc::FS_COMBINED_PROFILE: { unsigned ValueID = Record[0]; @@ -6385,12 +6388,11 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE); for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E; ++I) { - unsigned CalleeValueId = Record[I]; - unsigned CallsiteCount = Record[++I]; - uint64_t ProfileCount = HasProfile ? Record[++I] : 0; - GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first; - FS->addCallGraphEdge(CalleeGUID, - CalleeInfo(CallsiteCount, ProfileCount)); + CalleeInfo::HotnessType Hotness; + GlobalValue::GUID CalleeGUID; + std::tie(CalleeGUID, Hotness) = + readCallGraphEdge(Record, I, IsOldProfileFormat, HasProfile); + FS->addCallGraphEdge(CalleeGUID, CalleeInfo(Hotness)); } GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first; TheIndex->addGlobalValueSummary(GUID, std::move(FS)); @@ -6456,6 +6458,23 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { llvm_unreachable("Exit infinite loop"); } +std::pair<GlobalValue::GUID, CalleeInfo::HotnessType> +ModuleSummaryIndexBitcodeReader::readCallGraphEdge( + const SmallVector<uint64_t, 64> &Record, unsigned int &I, + const bool IsOldProfileFormat, const bool HasProfile) { + + auto Hotness = CalleeInfo::HotnessType::Unknown; + unsigned CalleeValueId = Record[I]; + GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first; + if (IsOldProfileFormat) { + I += 1; // Skip old callsitecount field + if (HasProfile) + I += 1; // Skip old profilecount field + } else if (HasProfile) + Hotness = static_cast<CalleeInfo::HotnessType>(Record[++I]); + return {CalleeGUID, Hotness}; +} + // Parse the module string table block into the Index. // This populates the ModulePathStringTable map in the index. std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { |