summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorReid Kleckner <rnk@google.com>2019-11-25 11:36:47 -0800
committerReid Kleckner <rnk@google.com>2020-01-02 16:10:36 -0800
commit783db7883562b8edd59bf35788057c02ddb7bdfb (patch)
tree1f6b160db10f930f0ecdd6d5e8b16acf2d5af627
parent355983103f008b094b5cdd26233eb0ed7113e7ec (diff)
downloadbcm5719-llvm-783db7883562b8edd59bf35788057c02ddb7bdfb.tar.gz
bcm5719-llvm-783db7883562b8edd59bf35788057c02ddb7bdfb.zip
[PDB] Print the most redundant type record indices with /summary
Summary: I used this information to motivate splitting up the Intrinsic::ID enum (5d986953c8b917bacfaa1f800fc1e242559f76be) and adding a key method to clang::Sema (586f65d31f32ca6bc8cfdb8a4f61bee5057bf6c8) which saved a fair amount of object file size. Example output for clang.pdb: Top 10 types responsible for the most TPI input bytes: index total bytes count size 0x3890: 8,671,220 = 1,805 * 4,804 0xE13BE: 5,634,720 = 252 * 22,360 0x6874C: 5,181,600 = 408 * 12,700 0x2A1F: 4,520,528 = 1,574 * 2,872 0x64BFF: 4,024,020 = 469 * 8,580 0x1123: 4,012,020 = 2,157 * 1,860 0x6952: 3,753,792 = 912 * 4,116 0xC16F: 3,630,888 = 633 * 5,736 0x69DD: 3,601,160 = 985 * 3,656 0x678D: 3,577,904 = 319 * 11,216 In this case, we can see that record 0x3890 is responsible for ~8MB of total object file size for objects in clang. The user can then use llvm-pdbutil to find out what the record is: $ llvm-pdbutil dump -types -type-index 0x3890 Types (TPI Stream) ============================================================ Showing 1 records. 0x3890 | LF_FIELDLIST [size = 4804] - LF_STMEMBER [name = `WORDTYPE_MAX`, type = 0x1001, attrs = public] - LF_MEMBER [name = `U`, Type = 0x37F0, offset = 0, attrs = private] - LF_MEMBER [name = `BitWidth`, Type = 0x0075 (unsigned), offset = 8, attrs = private] - LF_METHOD [name = `APInt`, # overloads = 8, overload list = 0x3805] ... In this case, we can see that these are members of the APInt class, which is emitted in 1805 object files. The next largest type is ASTContext: $ llvm-pdbutil dump -types -type-index 0xE13BE bin/clang.pdb 0xE13BE | LF_FIELDLIST [size = 22360] - LF_BCLASS type = 0x653EA, offset = 0, attrs = public - LF_MEMBER [name = `Types`, Type = 0x653EB, offset = 8, attrs = private] - LF_MEMBER [name = `ExtQualNodes`, Type = 0x653EC, offset = 24, attrs = private] - LF_MEMBER [name = `ComplexTypes`, Type = 0x653ED, offset = 48, attrs = private] - LF_MEMBER [name = `PointerTypes`, Type = 0x653EE, offset = 72, attrs = private] ... ASTContext only appears 252 times, but the list of members is long, and must be repeated everywhere it is used. This was the output before I split Intrinsic::ID: Top 10 types responsible for the most TPI input: 0x686C: 69,823,920 = 1,070 * 65,256 0x686D: 69,819,640 = 1,070 * 65,252 0x686E: 69,819,640 = 1,070 * 65,252 0x686B: 16,371,000 = 1,070 * 15,300 ... These records were all lists of intrinsic enums. Reviewers: MaskRay, ruiu Subscribers: mgrang, zturner, thakis, hans, akhuang, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71437
-rw-r--r--lld/COFF/PDB.cpp91
-rw-r--r--lld/test/COFF/pdb-type-server-simple.test14
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h37
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp16
5 files changed, 133 insertions, 27 deletions
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index 6ba4d394094..f68c60a1325 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -16,8 +16,8 @@
#include "TypeMerger.h"
#include "Writer.h"
#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Timer.h"
#include "lld/Common/Threads.h"
+#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
@@ -30,6 +30,7 @@
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
@@ -54,6 +55,7 @@
#include "llvm/Support/CRC.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -189,6 +191,11 @@ private:
uint64_t globalSymbols = 0;
uint64_t moduleSymbols = 0;
uint64_t publicSymbols = 0;
+
+ // When showSummary is enabled, these are histograms of TPI and IPI records
+ // keyed by type index.
+ SmallVector<uint32_t, 0> tpiCounts;
+ SmallVector<uint32_t, 0> ipiCounts;
};
class DebugSHandler {
@@ -415,6 +422,27 @@ PDBLinker::mergeDebugT(ObjFile *file, CVIndexMap *objectIndexMap) {
fatal("codeview::mergeTypeAndIdRecords failed: " +
toString(std::move(err)));
}
+
+ if (config->showSummary) {
+ // Count how many times we saw each type record in our input. This
+ // calculation requires a second pass over the type records to classify each
+ // record as a type or index. This is slow, but this code executes when
+ // collecting statistics.
+ tpiCounts.resize(tMerger.getTypeTable().size());
+ ipiCounts.resize(tMerger.getIDTable().size());
+ uint32_t srcIdx = 0;
+ for (CVType &ty : types) {
+ TypeIndex dstIdx = objectIndexMap->tpiMap[srcIdx++];
+ // Type merging may fail, so a complex source type may become the simple
+ // NotTranslated type, which cannot be used as an array index.
+ if (dstIdx.isSimple())
+ continue;
+ SmallVectorImpl<uint32_t> &counts =
+ isIdRecord(ty.kind()) ? ipiCounts : tpiCounts;
+ ++counts[dstIdx.toArrayIndex()];
+ }
+ }
+
return *objectIndexMap;
}
@@ -482,6 +510,20 @@ Expected<const CVIndexMap &> PDBLinker::maybeMergeTypeServerPDB(ObjFile *file) {
}
}
+ if (config->showSummary) {
+ // Count how many times we saw each type record in our input. If a
+ // destination type index is present in the source to destination type index
+ // map, that means we saw it once in the input. Add it to our histogram.
+ tpiCounts.resize(tMerger.getTypeTable().size());
+ ipiCounts.resize(tMerger.getIDTable().size());
+ for (TypeIndex ti : indexMap.tpiMap)
+ if (!ti.isSimple())
+ ++tpiCounts[ti.toArrayIndex()];
+ for (TypeIndex ti : indexMap.ipiMap)
+ if (!ti.isSimple())
+ ++ipiCounts[ti.toArrayIndex()];
+ }
+
return indexMap;
}
@@ -1334,6 +1376,53 @@ void PDBLinker::printStats() {
print(moduleSymbols, "Module symbol records");
print(publicSymbols, "Public symbol records");
+ auto printLargeInputTypeRecs = [&](StringRef name,
+ ArrayRef<uint32_t> recCounts,
+ TypeCollection &records) {
+ // Figure out which type indices were responsible for the most duplicate
+ // bytes in the input files. These should be frequently emitted LF_CLASS and
+ // LF_FIELDLIST records.
+ struct TypeSizeInfo {
+ uint32_t typeSize;
+ uint32_t dupCount;
+ TypeIndex typeIndex;
+ uint64_t totalInputSize() const { return uint64_t(dupCount) * typeSize; }
+ bool operator<(const TypeSizeInfo &rhs) const {
+ return totalInputSize() < rhs.totalInputSize();
+ }
+ };
+ SmallVector<TypeSizeInfo, 0> tsis;
+ for (auto e : enumerate(recCounts)) {
+ TypeIndex typeIndex = TypeIndex::fromArrayIndex(e.index());
+ uint32_t typeSize = records.getType(typeIndex).length();
+ uint32_t dupCount = e.value();
+ tsis.push_back({typeSize, dupCount, typeIndex});
+ }
+
+ if (!tsis.empty()) {
+ stream << "\nTop 10 types responsible for the most " << name
+ << " input:\n";
+ stream << " index total bytes count size\n";
+ llvm::sort(tsis);
+ unsigned i = 0;
+ for (const auto &tsi : reverse(tsis)) {
+ stream << formatv(" {0,10:X}: {1,14:N} = {2,5:N} * {3,6:N}\n",
+ tsi.typeIndex.getIndex(), tsi.totalInputSize(),
+ tsi.dupCount, tsi.typeSize);
+ if (++i >= 10)
+ break;
+ }
+ stream
+ << "Run llvm-pdbutil to print details about a particular record:\n";
+ stream << formatv("llvm-pdbutil dump -{0}s -{0}-index {1:X} {2}\n",
+ (name == "TPI" ? "type" : "id"),
+ tsis.back().typeIndex.getIndex(), config->pdbPath);
+ }
+ };
+
+ printLargeInputTypeRecs("TPI", tpiCounts, tMerger.getTypeTable());
+ printLargeInputTypeRecs("IPI", ipiCounts, tMerger.getIDTable());
+
message(buffer);
}
diff --git a/lld/test/COFF/pdb-type-server-simple.test b/lld/test/COFF/pdb-type-server-simple.test
index 6243f5b2055..bcba6da28b6 100644
--- a/lld/test/COFF/pdb-type-server-simple.test
+++ b/lld/test/COFF/pdb-type-server-simple.test
@@ -105,4 +105,16 @@ SUMMARY-NEXT: 25 Merged TPI records
SUMMARY-NEXT: 3 Output PDB strings
SUMMARY-NEXT: 4 Global symbol records
SUMMARY-NEXT: 14 Module symbol records
-SUMMARY-NEXT: 2 Public symbol records \ No newline at end of file
+SUMMARY-NEXT: 2 Public symbol records
+
+SUMMARY: Top 10 types responsible for the most TPI input:
+SUMMARY-NEXT: index total bytes count size
+SUMMARY-NEXT: 0x1006: 36 = 1 * 36
+SUMMARY: Run llvm-pdbutil to print details about a particular record:
+SUMMARY-NEXT: llvm-pdbutil dump -types -type-index 0x1006 t.pdb
+
+SUMMARY: Top 10 types responsible for the most IPI input:
+SUMMARY-NEXT: index total bytes count size
+SUMMARY-NEXT: 0x1006: 256 = 1 * 256
+SUMMARY: Run llvm-pdbutil to print details about a particular record:
+SUMMARY-NEXT: llvm-pdbutil dump -ids -id-index 0x1006 t.pdb
diff --git a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
index a43ce20edde..3b103c22770 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
@@ -43,7 +43,7 @@ class GlobalTypeTableBuilder : public TypeCollection {
/// Contains a list of all records indexed by TypeIndex.toArrayIndex().
SmallVector<ArrayRef<uint8_t>, 2> SeenRecords;
- /// Contains a list of all hash values inexed by TypeIndex.toArrayIndex().
+ /// Contains a list of all hash values indexed by TypeIndex.toArrayIndex().
SmallVector<GloballyHashedType, 2> SeenHashes;
public:
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
index e84704d99dd..19492b93681 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
@@ -12,16 +12,35 @@
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
namespace llvm {
- namespace codeview {
- /// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE,
- /// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class
- /// option.
- bool isUdtForwardRef(CVType CVT);
-
- /// Given a CVType which is assumed to be an LF_MODIFIER, return the
- /// TypeIndex of the type that the LF_MODIFIER modifies.
- TypeIndex getModifiedType(const CVType &CVT);
+namespace codeview {
+
+/// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE,
+/// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class
+/// option.
+bool isUdtForwardRef(CVType CVT);
+
+/// Given a CVType which is assumed to be an LF_MODIFIER, return the
+/// TypeIndex of the type that the LF_MODIFIER modifies.
+TypeIndex getModifiedType(const CVType &CVT);
+
+/// Return true if this record should be in the IPI stream of a PDB. In an
+/// object file, these record kinds will appear mixed into the .debug$T section.
+inline bool isIdRecord(TypeLeafKind K) {
+ switch (K) {
+ case TypeLeafKind::LF_FUNC_ID:
+ case TypeLeafKind::LF_MFUNC_ID:
+ case TypeLeafKind::LF_STRING_ID:
+ case TypeLeafKind::LF_SUBSTR_LIST:
+ case TypeLeafKind::LF_BUILDINFO:
+ case TypeLeafKind::LF_UDT_SRC_LINE:
+ case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
+ return true;
+ default:
+ return false;
}
}
+} // namespace codeview
+} // namespace llvm
+
#endif
diff --git a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index aba0e96d606..f9fca74a219 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -15,6 +15,7 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/Support/Error.h"
using namespace llvm;
@@ -202,21 +203,6 @@ private:
const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
-static bool isIdRecord(TypeLeafKind K) {
- switch (K) {
- case TypeLeafKind::LF_FUNC_ID:
- case TypeLeafKind::LF_MFUNC_ID:
- case TypeLeafKind::LF_STRING_ID:
- case TypeLeafKind::LF_SUBSTR_LIST:
- case TypeLeafKind::LF_BUILDINFO:
- case TypeLeafKind::LF_UDT_SRC_LINE:
- case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
- return true;
- default:
- return false;
- }
-}
-
void TypeStreamMerger::addMapping(TypeIndex Idx) {
if (!IsSecondPass) {
assert(IndexMap.size() == slotForIndex(CurIndex) &&
OpenPOWER on IntegriCloud