diff options
author | Duncan P. N. Exon Smith <dexonsmith@apple.com> | 2016-03-25 14:40:18 +0000 |
---|---|---|
committer | Duncan P. N. Exon Smith <dexonsmith@apple.com> | 2016-03-25 14:40:18 +0000 |
commit | fdbf0a5af805b764927bd8b38da89ddffc67f531 (patch) | |
tree | 5f0610fb25f0524b13474cf0c1a339893c424687 /llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | |
parent | 59bcbba6b4cf513068cef5fb7cd3d7aad8f5fb5e (diff) | |
download | bcm5719-llvm-fdbf0a5af805b764927bd8b38da89ddffc67f531.tar.gz bcm5719-llvm-fdbf0a5af805b764927bd8b38da89ddffc67f531.zip |
Bitcode: Collect all MDString records into a single blob
Optimize output of MDStrings in bitcode. This emits them in big blocks
(currently 1024) in a pair of records:
- BULK_STRING_SIZES: the sizes of the strings in the block, and
- BULK_STRING_DATA: a single blob, which is the concatenation of all
the strings.
Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this
should (a) slightly reduce bitcode size, since there is less record
overhead, and (b) greatly improve reading speed, since blobs are super
cheap to deserialize.
I needed to add support for blobs to streaming input to get the test
suite passing.
- StreamingMemoryObject::getPointer reads ahead and returns the
address of the blob.
- To avoid a possible reallocation of StreamingMemoryObject::Bytes,
BitstreamCursor::readRecord needs to move the call to JumpToEnd
forward so that getPointer is the last bitstream operation.
llvm-svn: 264409
Diffstat (limited to 'llvm/lib/Bitcode/Writer/BitcodeWriter.cpp')
-rw-r--r-- | llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 85 |
1 files changed, 61 insertions, 24 deletions
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 5d051649699..0108667c9e2 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1347,31 +1347,78 @@ static void writeNamedMetadata(const Module &M, const ValueEnumerator &VE, } } +static unsigned createMDStringDataAbbrev(BitstreamWriter &Stream) { + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_BULK_STRING_DATA)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + return Stream.EmitAbbrev(Abbv); +} + +static void emitMDStringBlob(unsigned DataAbbrev, + ArrayRef<const Metadata *> Strings, + BitstreamWriter &Stream, + SmallVectorImpl<uint64_t> &Record, + SmallString<4096> &Blob) { + for (const Metadata *MD : Strings) { + StringRef S = cast<MDString>(MD)->getString(); + Record.push_back(S.size()); + Blob.append(S.begin(), S.end()); + } + + Stream.EmitRecord(bitc::METADATA_BULK_STRING_SIZES, Record); + Record.clear(); + + Record.push_back(bitc::METADATA_BULK_STRING_DATA); + Stream.EmitRecordWithBlob(DataAbbrev, Record, Blob); + Record.clear(); +} + +/// Write out a section of records for MDString. +/// +/// All the MDString elements in a metadata block are emitted in bulk. They're +/// grouped into blocks, and each block is emitted with pair of records: +/// +/// - SIZES: a list of the sizes of the strings in the block. +/// - DATA: the blob itself. +static void writeMetadataStrings(ArrayRef<const Metadata *> Strings, + BitstreamWriter &Stream, + SmallVectorImpl<uint64_t> &Record) { + if (Strings.empty()) + return; + + // Emit strings in large blocks to reduce record overhead. Somewhat + // arbitrarily, limit this to 512 strings per blob: + // - big enough to eliminate overhead; + // - small enough that the reader's SIZES record will stay within a page. + const size_t NumStringsPerBlob = 512; + Record.reserve(std::min(NumStringsPerBlob, Strings.size())); + + unsigned DataAbbrev = createMDStringDataAbbrev(Stream); + SmallString<4096> Blob; + while (Strings.size() > NumStringsPerBlob) { + emitMDStringBlob(DataAbbrev, Strings.slice(0, NumStringsPerBlob), Stream, + Record, Blob); + Strings = Strings.slice(NumStringsPerBlob); + } + if (!Strings.empty()) + emitMDStringBlob(DataAbbrev, Strings, Stream, Record, Blob); +} + static void WriteModuleMetadata(const Module &M, const ValueEnumerator &VE, BitstreamWriter &Stream) { - const auto &MDs = VE.getMDs(); - if (MDs.empty() && M.named_metadata_empty()) + if (VE.getMDs().empty() && M.named_metadata_empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); - unsigned MDSAbbrev = 0; - if (VE.hasMDString()) { - // Abbrev for METADATA_STRING. - BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - MDSAbbrev = Stream.EmitAbbrev(Abbv); - } - // Initialize MDNode abbreviations. #define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0; #include "llvm/IR/Metadata.def" SmallVector<uint64_t, 64> Record; - for (const Metadata *MD : MDs) { + writeMetadataStrings(VE.getMDStrings(), Stream, Record); + for (const Metadata *MD : VE.getNonMDStrings()) { if (const MDNode *N = dyn_cast<MDNode>(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); @@ -1385,17 +1432,7 @@ static void WriteModuleMetadata(const Module &M, #include "llvm/IR/Metadata.def" } } - if (const auto *MDC = dyn_cast<ConstantAsMetadata>(MD)) { - WriteValueAsMetadata(MDC, VE, Stream, Record); - continue; - } - const MDString *MDS = cast<MDString>(MD); - // Code: [strchar x N] - Record.append(MDS->bytes_begin(), MDS->bytes_end()); - - // Emit the finished record. - Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev); - Record.clear(); + WriteValueAsMetadata(cast<ConstantAsMetadata>(MD), VE, Stream, Record); } writeNamedMetadata(M, VE, Stream, Record); |