diff options
| author | Reid Kleckner <rnk@google.com> | 2019-11-25 11:36:47 -0800 |
|---|---|---|
| committer | Reid Kleckner <rnk@google.com> | 2020-01-02 16:10:36 -0800 |
| commit | 783db7883562b8edd59bf35788057c02ddb7bdfb (patch) | |
| tree | 1f6b160db10f930f0ecdd6d5e8b16acf2d5af627 | |
| parent | 355983103f008b094b5cdd26233eb0ed7113e7ec (diff) | |
| download | bcm5719-llvm-783db7883562b8edd59bf35788057c02ddb7bdfb.tar.gz bcm5719-llvm-783db7883562b8edd59bf35788057c02ddb7bdfb.zip | |
[PDB] Print the most redundant type record indices with /summary
Summary:
I used this information to motivate splitting up the Intrinsic::ID enum
(5d986953c8b917bacfaa1f800fc1e242559f76be) and adding a key method to
clang::Sema (586f65d31f32ca6bc8cfdb8a4f61bee5057bf6c8) which saved a
fair amount of object file size.
Example output for clang.pdb:
Top 10 types responsible for the most TPI input bytes:
index total bytes count size
0x3890: 8,671,220 = 1,805 * 4,804
0xE13BE: 5,634,720 = 252 * 22,360
0x6874C: 5,181,600 = 408 * 12,700
0x2A1F: 4,520,528 = 1,574 * 2,872
0x64BFF: 4,024,020 = 469 * 8,580
0x1123: 4,012,020 = 2,157 * 1,860
0x6952: 3,753,792 = 912 * 4,116
0xC16F: 3,630,888 = 633 * 5,736
0x69DD: 3,601,160 = 985 * 3,656
0x678D: 3,577,904 = 319 * 11,216
In this case, we can see that record 0x3890 is responsible for ~8MB of
total object file size for objects in clang.
The user can then use llvm-pdbutil to find out what the record is:
$ llvm-pdbutil dump -types -type-index 0x3890
Types (TPI Stream)
============================================================
Showing 1 records.
0x3890 | LF_FIELDLIST [size = 4804]
- LF_STMEMBER [name = `WORDTYPE_MAX`, type = 0x1001, attrs = public]
- LF_MEMBER [name = `U`, Type = 0x37F0, offset = 0, attrs = private]
- LF_MEMBER [name = `BitWidth`, Type = 0x0075 (unsigned), offset = 8, attrs = private]
- LF_METHOD [name = `APInt`, # overloads = 8, overload list = 0x3805]
...
In this case, we can see that these are members of the APInt class,
which is emitted in 1805 object files.
The next largest type is ASTContext:
$ llvm-pdbutil dump -types -type-index 0xE13BE bin/clang.pdb
0xE13BE | LF_FIELDLIST [size = 22360]
- LF_BCLASS
type = 0x653EA, offset = 0, attrs = public
- LF_MEMBER [name = `Types`, Type = 0x653EB, offset = 8, attrs = private]
- LF_MEMBER [name = `ExtQualNodes`, Type = 0x653EC, offset = 24, attrs = private]
- LF_MEMBER [name = `ComplexTypes`, Type = 0x653ED, offset = 48, attrs = private]
- LF_MEMBER [name = `PointerTypes`, Type = 0x653EE, offset = 72, attrs = private]
...
ASTContext only appears 252 times, but the list of members is long, and
must be repeated everywhere it is used.
This was the output before I split Intrinsic::ID:
Top 10 types responsible for the most TPI input:
0x686C: 69,823,920 = 1,070 * 65,256
0x686D: 69,819,640 = 1,070 * 65,252
0x686E: 69,819,640 = 1,070 * 65,252
0x686B: 16,371,000 = 1,070 * 15,300
...
These records were all lists of intrinsic enums.
Reviewers: MaskRay, ruiu
Subscribers: mgrang, zturner, thakis, hans, akhuang, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71437
| -rw-r--r-- | lld/COFF/PDB.cpp | 91 | ||||
| -rw-r--r-- | lld/test/COFF/pdb-type-server-simple.test | 14 | ||||
| -rw-r--r-- | llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h | 2 | ||||
| -rw-r--r-- | llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h | 37 | ||||
| -rw-r--r-- | llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp | 16 |
5 files changed, 133 insertions, 27 deletions
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 6ba4d394094..f68c60a1325 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -16,8 +16,8 @@ #include "TypeMerger.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" -#include "lld/Common/Timer.h" #include "lld/Common/Threads.h" +#include "lld/Common/Timer.h" #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" @@ -30,6 +30,7 @@ #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" @@ -54,6 +55,7 @@ #include "llvm/Support/CRC.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" @@ -189,6 +191,11 @@ private: uint64_t globalSymbols = 0; uint64_t moduleSymbols = 0; uint64_t publicSymbols = 0; + + // When showSummary is enabled, these are histograms of TPI and IPI records + // keyed by type index. + SmallVector<uint32_t, 0> tpiCounts; + SmallVector<uint32_t, 0> ipiCounts; }; class DebugSHandler { @@ -415,6 +422,27 @@ PDBLinker::mergeDebugT(ObjFile *file, CVIndexMap *objectIndexMap) { fatal("codeview::mergeTypeAndIdRecords failed: " + toString(std::move(err))); } + + if (config->showSummary) { + // Count how many times we saw each type record in our input. This + // calculation requires a second pass over the type records to classify each + // record as a type or index. This is slow, but this code executes when + // collecting statistics. + tpiCounts.resize(tMerger.getTypeTable().size()); + ipiCounts.resize(tMerger.getIDTable().size()); + uint32_t srcIdx = 0; + for (CVType &ty : types) { + TypeIndex dstIdx = objectIndexMap->tpiMap[srcIdx++]; + // Type merging may fail, so a complex source type may become the simple + // NotTranslated type, which cannot be used as an array index. + if (dstIdx.isSimple()) + continue; + SmallVectorImpl<uint32_t> &counts = + isIdRecord(ty.kind()) ? ipiCounts : tpiCounts; + ++counts[dstIdx.toArrayIndex()]; + } + } + return *objectIndexMap; } @@ -482,6 +510,20 @@ Expected<const CVIndexMap &> PDBLinker::maybeMergeTypeServerPDB(ObjFile *file) { } } + if (config->showSummary) { + // Count how many times we saw each type record in our input. If a + // destination type index is present in the source to destination type index + // map, that means we saw it once in the input. Add it to our histogram. + tpiCounts.resize(tMerger.getTypeTable().size()); + ipiCounts.resize(tMerger.getIDTable().size()); + for (TypeIndex ti : indexMap.tpiMap) + if (!ti.isSimple()) + ++tpiCounts[ti.toArrayIndex()]; + for (TypeIndex ti : indexMap.ipiMap) + if (!ti.isSimple()) + ++ipiCounts[ti.toArrayIndex()]; + } + return indexMap; } @@ -1334,6 +1376,53 @@ void PDBLinker::printStats() { print(moduleSymbols, "Module symbol records"); print(publicSymbols, "Public symbol records"); + auto printLargeInputTypeRecs = [&](StringRef name, + ArrayRef<uint32_t> recCounts, + TypeCollection &records) { + // Figure out which type indices were responsible for the most duplicate + // bytes in the input files. These should be frequently emitted LF_CLASS and + // LF_FIELDLIST records. + struct TypeSizeInfo { + uint32_t typeSize; + uint32_t dupCount; + TypeIndex typeIndex; + uint64_t totalInputSize() const { return uint64_t(dupCount) * typeSize; } + bool operator<(const TypeSizeInfo &rhs) const { + return totalInputSize() < rhs.totalInputSize(); + } + }; + SmallVector<TypeSizeInfo, 0> tsis; + for (auto e : enumerate(recCounts)) { + TypeIndex typeIndex = TypeIndex::fromArrayIndex(e.index()); + uint32_t typeSize = records.getType(typeIndex).length(); + uint32_t dupCount = e.value(); + tsis.push_back({typeSize, dupCount, typeIndex}); + } + + if (!tsis.empty()) { + stream << "\nTop 10 types responsible for the most " << name + << " input:\n"; + stream << " index total bytes count size\n"; + llvm::sort(tsis); + unsigned i = 0; + for (const auto &tsi : reverse(tsis)) { + stream << formatv(" {0,10:X}: {1,14:N} = {2,5:N} * {3,6:N}\n", + tsi.typeIndex.getIndex(), tsi.totalInputSize(), + tsi.dupCount, tsi.typeSize); + if (++i >= 10) + break; + } + stream + << "Run llvm-pdbutil to print details about a particular record:\n"; + stream << formatv("llvm-pdbutil dump -{0}s -{0}-index {1:X} {2}\n", + (name == "TPI" ? "type" : "id"), + tsis.back().typeIndex.getIndex(), config->pdbPath); + } + }; + + printLargeInputTypeRecs("TPI", tpiCounts, tMerger.getTypeTable()); + printLargeInputTypeRecs("IPI", ipiCounts, tMerger.getIDTable()); + message(buffer); } diff --git a/lld/test/COFF/pdb-type-server-simple.test b/lld/test/COFF/pdb-type-server-simple.test index 6243f5b2055..bcba6da28b6 100644 --- a/lld/test/COFF/pdb-type-server-simple.test +++ b/lld/test/COFF/pdb-type-server-simple.test @@ -105,4 +105,16 @@ SUMMARY-NEXT: 25 Merged TPI records SUMMARY-NEXT: 3 Output PDB strings SUMMARY-NEXT: 4 Global symbol records SUMMARY-NEXT: 14 Module symbol records -SUMMARY-NEXT: 2 Public symbol records
\ No newline at end of file +SUMMARY-NEXT: 2 Public symbol records + +SUMMARY: Top 10 types responsible for the most TPI input: +SUMMARY-NEXT: index total bytes count size +SUMMARY-NEXT: 0x1006: 36 = 1 * 36 +SUMMARY: Run llvm-pdbutil to print details about a particular record: +SUMMARY-NEXT: llvm-pdbutil dump -types -type-index 0x1006 t.pdb + +SUMMARY: Top 10 types responsible for the most IPI input: +SUMMARY-NEXT: index total bytes count size +SUMMARY-NEXT: 0x1006: 256 = 1 * 256 +SUMMARY: Run llvm-pdbutil to print details about a particular record: +SUMMARY-NEXT: llvm-pdbutil dump -ids -id-index 0x1006 t.pdb diff --git a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h index a43ce20edde..3b103c22770 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -43,7 +43,7 @@ class GlobalTypeTableBuilder : public TypeCollection { /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). SmallVector<ArrayRef<uint8_t>, 2> SeenRecords; - /// Contains a list of all hash values inexed by TypeIndex.toArrayIndex(). + /// Contains a list of all hash values indexed by TypeIndex.toArrayIndex(). SmallVector<GloballyHashedType, 2> SeenHashes; public: diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h index e84704d99dd..19492b93681 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h @@ -12,16 +12,35 @@ #include "llvm/DebugInfo/CodeView/TypeRecord.h" namespace llvm { - namespace codeview { - /// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE, - /// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class - /// option. - bool isUdtForwardRef(CVType CVT); - - /// Given a CVType which is assumed to be an LF_MODIFIER, return the - /// TypeIndex of the type that the LF_MODIFIER modifies. - TypeIndex getModifiedType(const CVType &CVT); +namespace codeview { + +/// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE, +/// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class +/// option. +bool isUdtForwardRef(CVType CVT); + +/// Given a CVType which is assumed to be an LF_MODIFIER, return the +/// TypeIndex of the type that the LF_MODIFIER modifies. +TypeIndex getModifiedType(const CVType &CVT); + +/// Return true if this record should be in the IPI stream of a PDB. In an +/// object file, these record kinds will appear mixed into the .debug$T section. +inline bool isIdRecord(TypeLeafKind K) { + switch (K) { + case TypeLeafKind::LF_FUNC_ID: + case TypeLeafKind::LF_MFUNC_ID: + case TypeLeafKind::LF_STRING_ID: + case TypeLeafKind::LF_SUBSTR_LIST: + case TypeLeafKind::LF_BUILDINFO: + case TypeLeafKind::LF_UDT_SRC_LINE: + case TypeLeafKind::LF_UDT_MOD_SRC_LINE: + return true; + default: + return false; } } +} // namespace codeview +} // namespace llvm + #endif diff --git a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index aba0e96d606..f9fca74a219 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -15,6 +15,7 @@ #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/Support/Error.h" using namespace llvm; @@ -202,21 +203,6 @@ private: const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); -static bool isIdRecord(TypeLeafKind K) { - switch (K) { - case TypeLeafKind::LF_FUNC_ID: - case TypeLeafKind::LF_MFUNC_ID: - case TypeLeafKind::LF_STRING_ID: - case TypeLeafKind::LF_SUBSTR_LIST: - case TypeLeafKind::LF_BUILDINFO: - case TypeLeafKind::LF_UDT_SRC_LINE: - case TypeLeafKind::LF_UDT_MOD_SRC_LINE: - return true; - default: - return false; - } -} - void TypeStreamMerger::addMapping(TypeIndex Idx) { if (!IsSecondPass) { assert(IndexMap.size() == slotForIndex(CurIndex) && |

