diff options
author | Duncan P. N. Exon Smith <dexonsmith@apple.com> | 2016-03-27 23:17:54 +0000 |
---|---|---|
committer | Duncan P. N. Exon Smith <dexonsmith@apple.com> | 2016-03-27 23:17:54 +0000 |
commit | 6565a0d4b2c98722eb8fee9093cdde4f37928986 (patch) | |
tree | 9a98af7c4407ac1b6d74a183c4cf30bec6919fc4 /llvm/lib/Bitcode/Reader/BitcodeReader.cpp | |
parent | 376fa2606069cdd5840fd035312bad027d8b2428 (diff) | |
download | bcm5719-llvm-6565a0d4b2c98722eb8fee9093cdde4f37928986.tar.gz bcm5719-llvm-6565a0d4b2c98722eb8fee9093cdde4f37928986.zip |
Reapply ~"Bitcode: Collect all MDString records into a single blob"
Spiritually reapply commit r264409 (reverted in r264410), albeit with a
bit of a redesign.
Firstly, avoid splitting the big blob into multiple chunks of strings.
r264409 imposed an arbitrary limit to avoid a massive allocation on the
shared 'Record' SmallVector. The bug with that commit only reproduced
when there were more than "chunk-size" strings. A test for this would
have been useless long-term, since we're liable to adjust the chunk-size
in the future.
Thus, eliminate the motivation for chunk-ing by storing the string sizes
in the blob. Here's the layout:
vbr6: # of strings
vbr6: offset-to-blob
blob:
[vbr6]: string lengths
[char]: concatenated strings
Secondly, make the output of llvm-bcanalyzer readable.
I noticed when debugging r264409 that llvm-bcanalyzer was outputting a
massive blob all in one line. Past a small number, the strings were
impossible to split in my head, and the lines were way too long. This
version adds support in llvm-bcanalyzer for pretty-printing.
<STRINGS abbrevid=4 op0=3 op1=9/> num-strings = 3 {
'abc'
'def'
'ghi'
}
From the original commit:
Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this
should (a) slightly reduce bitcode size, since there is less record
overhead, and (b) greatly improve reading speed, since blobs are super
cheap to deserialize.
llvm-svn: 264551
Diffstat (limited to 'llvm/lib/Bitcode/Reader/BitcodeReader.cpp')
-rw-r--r-- | llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 54 |
1 files changed, 52 insertions, 2 deletions
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index bb479dbcedc..cb2784ff204 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -396,6 +396,9 @@ private: std::error_code globalCleanup(); std::error_code resolveGlobalAndAliasInits(); std::error_code parseMetadata(bool ModuleLevel = false); + std::error_code parseMetadataStrings(ArrayRef<uint64_t> Record, + StringRef Blob, + unsigned &NextMetadataNo); std::error_code parseMetadataKinds(); std::error_code parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record); std::error_code parseMetadataAttachment(Function &F); @@ -1883,6 +1886,47 @@ BitcodeReader::parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record) { static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; } +std::error_code BitcodeReader::parseMetadataStrings(ArrayRef<uint64_t> Record, + StringRef Blob, + unsigned &NextMetadataNo) { + // All the MDStrings in the block are emitted together in a single + // record. The strings are concatenated and stored in a blob along with + // their sizes. + if (Record.size() != 2) + return error("Invalid record: metadata strings layout"); + + unsigned NumStrings = Record[0]; + unsigned StringsOffset = Record[1]; + if (!NumStrings) + return error("Invalid record: metadata strings with no strings"); + if (StringsOffset >= Blob.size()) + return error("Invalid record: metadata strings corrupt offset"); + + StringRef Lengths = Blob.slice(0, StringsOffset); + SimpleBitstreamCursor R(*StreamFile); + R.jumpToPointer(Lengths.begin()); + + // Ensure that Blob doesn't get invalidated, even if this is reading from + // a StreamingMemoryObject with corrupt data. + R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset); + + StringRef Strings = Blob.drop_front(StringsOffset); + do { + if (R.AtEndOfStream()) + return error("Invalid record: metadata strings bad length"); + + unsigned Size = R.ReadVBR(6); + if (Strings.size() < Size) + return error("Invalid record: metadata strings truncated chars"); + + MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)), + NextMetadataNo++); + Strings = Strings.drop_front(Size); + } while (--NumStrings); + + return std::error_code(); +} + /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing /// module level metadata. std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { @@ -1929,7 +1973,8 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { // Read a record. Record.clear(); - unsigned Code = Stream.readRecord(Entry.ID, Record); + StringRef Blob; + unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob); bool IsDistinct = false; switch (Code) { default: // Default behavior: ignore. @@ -2363,7 +2408,7 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { NextMetadataNo++); break; } - case bitc::METADATA_STRING: { + case bitc::METADATA_STRING_OLD: { std::string String(Record.begin(), Record.end()); // Test for upgrading !llvm.loop. @@ -2373,6 +2418,11 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { MetadataList.assignValue(MD, NextMetadataNo++); break; } + case bitc::METADATA_STRINGS: + if (std::error_code EC = + parseMetadataStrings(Record, Blob, NextMetadataNo)) + return EC; + break; case bitc::METADATA_KIND: { // Support older bitcode files that had METADATA_KIND records in a // block with METADATA_BLOCK_ID. |