From fdbf0a5af805b764927bd8b38da89ddffc67f531 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 25 Mar 2016 14:40:18 +0000 Subject: Bitcode: Collect all MDString records into a single blob Optimize output of MDStrings in bitcode. This emits them in big blocks (currently 1024) in a pair of records: - BULK_STRING_SIZES: the sizes of the strings in the block, and - BULK_STRING_DATA: a single blob, which is the concatenation of all the strings. Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this should (a) slightly reduce bitcode size, since there is less record overhead, and (b) greatly improve reading speed, since blobs are super cheap to deserialize. I needed to add support for blobs to streaming input to get the test suite passing. - StreamingMemoryObject::getPointer reads ahead and returns the address of the blob. - To avoid a possible reallocation of StreamingMemoryObject::Bytes, BitstreamCursor::readRecord needs to move the call to JumpToEnd forward so that getPointer is the last bitstream operation. llvm-svn: 264409 --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 34 ++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'llvm/lib/Bitcode/Reader/BitcodeReader.cpp') diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index bb479dbcedc..b849db4427a 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2363,7 +2363,7 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { NextMetadataNo++); break; } - case bitc::METADATA_STRING: { + case bitc::METADATA_STRING_OLD: { std::string String(Record.begin(), Record.end()); // Test for upgrading !llvm.loop. @@ -2373,6 +2373,38 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { MetadataList.assignValue(MD, NextMetadataNo++); break; } + case bitc::METADATA_BULK_STRING_SIZES: { + // This is a pair of records for an MDString block: SIZES, which is a + // list of string lengths; and DATA, which is a blob with all the strings + // concatenated together. + // + // Note: since this record type was introduced after the upgrade for + // !llvm.loop, we don't need to change HasSeenOldLoopTags. + if (Record.empty()) + return error("Invalid record: missing bulk metadata string sizes"); + + StringRef Blob; + SmallVector BlobRecord; + Code = Stream.ReadCode(); + unsigned BlobCode = Stream.readRecord(Code, BlobRecord, &Blob); + if (BlobCode != bitc::METADATA_BULK_STRING_DATA) + return error("Invalid record: missing bulk metadata string data"); + if (!BlobRecord.empty()) + return error("Invalid record: unexpected bulk metadata arguments"); + + for (uint64_t Size : Record) { + if (Blob.size() < Size) + return error("Invalid record: not enough bulk metadata string bytes"); + + // Extract the current string. + MetadataList.assignValue(MDString::get(Context, Blob.slice(0, Size)), + NextMetadataNo++); + Blob = Blob.drop_front(Size); + } + if (!Blob.empty()) + return error("Invalid record: too many bulk metadata string bytes"); + break; + } case bitc::METADATA_KIND: { // Support older bitcode files that had METADATA_KIND records in a // block with METADATA_BLOCK_ID. -- cgit v1.2.3