Reapply ~"Bitcode: Collect all MDString records into a single blob"

Spiritually reapply commit r264409 (reverted in r264410), albeit with a bit of a redesign. Firstly, avoid splitting the big blob into multiple chunks of strings. r264409 imposed an arbitrary limit to avoid a massive allocation on the shared 'Record' SmallVector. The bug with that commit only reproduced when there were more than "chunk-size" strings. A test for this would have been useless long-term, since we're liable to adjust the chunk-size in the future. Thus, eliminate the motivation for chunk-ing by storing the string sizes in the blob. Here's the layout: vbr6: # of strings vbr6: offset-to-blob blob: [vbr6]: string lengths [char]: concatenated strings Secondly, make the output of llvm-bcanalyzer readable. I noticed when debugging r264409 that llvm-bcanalyzer was outputting a massive blob all in one line. Past a small number, the strings were impossible to split in my head, and the lines were way too long. This version adds support in llvm-bcanalyzer for pretty-printing. <STRINGS abbrevid=4 op0=3 op1=9/> num-strings = 3 { 'abc' 'def' 'ghi' } From the original commit: Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this should (a) slightly reduce bitcode size, since there is less record overhead, and (b) greatly improve reading speed, since blobs are super cheap to deserialize. llvm-svn: 264551
author: Duncan P. N. Exon Smith <dexonsmith@apple.com> 2016-03-27 23:17:54 +0000
committer: Duncan P. N. Exon Smith <dexonsmith@apple.com> 2016-03-27 23:17:54 +0000
commit: 6565a0d4b2c98722eb8fee9093cdde4f37928986 (patch)
tree: 9a98af7c4407ac1b6d74a183c4cf30bec6919fc4 /llvm/lib/Bitcode/Reader/BitcodeReader.cpp
parent: 376fa2606069cdd5840fd035312bad027d8b2428 (diff)
download: bcm5719-llvm-6565a0d4b2c98722eb8fee9093cdde4f37928986.tar.gz
bcm5719-llvm-6565a0d4b2c98722eb8fee9093cdde4f37928986.zip
1 files changed, 52 insertions, 2 deletions
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index bb479dbcedc..cb2784ff204 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -396,6 +396,9 @@ private:
   std::error_code globalCleanup();
   std::error_code resolveGlobalAndAliasInits();
   std::error_code parseMetadata(bool ModuleLevel = false);
+  std::error_code parseMetadataStrings(ArrayRef<uint64_t> Record,
+                                       StringRef Blob,
+                                       unsigned &NextMetadataNo);
   std::error_code parseMetadataKinds();
   std::error_code parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
   std::error_code parseMetadataAttachment(Function &F);
@@ -1883,6 +1886,47 @@ BitcodeReader::parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record) {
 
 static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
 
+std::error_code BitcodeReader::parseMetadataStrings(ArrayRef<uint64_t> Record,
+                                                    StringRef Blob,
+                                                    unsigned &NextMetadataNo) {
+  // All the MDStrings in the block are emitted together in a single
+  // record.  The strings are concatenated and stored in a blob along with
+  // their sizes.
+  if (Record.size() != 2)
+    return error("Invalid record: metadata strings layout");
+
+  unsigned NumStrings = Record[0];
+  unsigned StringsOffset = Record[1];
+  if (!NumStrings)
+    return error("Invalid record: metadata strings with no strings");
+  if (StringsOffset >= Blob.size())
+    return error("Invalid record: metadata strings corrupt offset");
+
+  StringRef Lengths = Blob.slice(0, StringsOffset);
+  SimpleBitstreamCursor R(*StreamFile);
+  R.jumpToPointer(Lengths.begin());
+
+  // Ensure that Blob doesn't get invalidated, even if this is reading from
+  // a StreamingMemoryObject with corrupt data.
+  R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
+
+  StringRef Strings = Blob.drop_front(StringsOffset);
+  do {
+    if (R.AtEndOfStream())
+      return error("Invalid record: metadata strings bad length");
+
+    unsigned Size = R.ReadVBR(6);
+    if (Strings.size() < Size)
+      return error("Invalid record: metadata strings truncated chars");
+
+    MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)),
+                             NextMetadataNo++);
+    Strings = Strings.drop_front(Size);
+  } while (--NumStrings);
+
+  return std::error_code();
+}
+
 /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
 /// module level metadata.
 std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
@@ -1929,7 +1973,8 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
 
     // Read a record.
     Record.clear();
-    unsigned Code = Stream.readRecord(Entry.ID, Record);
+    StringRef Blob;
+    unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
     bool IsDistinct = false;
     switch (Code) {
     default:  // Default behavior: ignore.
@@ -2363,7 +2408,7 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
           NextMetadataNo++);
       break;
     }
-    case bitc::METADATA_STRING: {
+    case bitc::METADATA_STRING_OLD: {
       std::string String(Record.begin(), Record.end());
 
       // Test for upgrading !llvm.loop.
@@ -2373,6 +2418,11 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
       MetadataList.assignValue(MD, NextMetadataNo++);
       break;
     }
+    case bitc::METADATA_STRINGS:
+      if (std::error_code EC =
+              parseMetadataStrings(Record, Blob, NextMetadataNo))
+        return EC;
+      break;
     case bitc::METADATA_KIND: {
       // Support older bitcode files that had METADATA_KIND records in a
       // block with METADATA_BLOCK_ID.
author	Duncan P. N. Exon Smith <dexonsmith@apple.com>	2016-03-27 23:17:54 +0000
committer	Duncan P. N. Exon Smith <dexonsmith@apple.com>	2016-03-27 23:17:54 +0000
commit	6565a0d4b2c98722eb8fee9093cdde4f37928986 (patch)
tree	9a98af7c4407ac1b6d74a183c4cf30bec6919fc4 /llvm/lib/Bitcode/Reader/BitcodeReader.cpp
parent	376fa2606069cdd5840fd035312bad027d8b2428 (diff)
download	bcm5719-llvm-6565a0d4b2c98722eb8fee9093cdde4f37928986.tar.gz bcm5719-llvm-6565a0d4b2c98722eb8fee9093cdde4f37928986.zip