From 6565a0d4b2c98722eb8fee9093cdde4f37928986 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Sun, 27 Mar 2016 23:17:54 +0000 Subject: Reapply ~"Bitcode: Collect all MDString records into a single blob" Spiritually reapply commit r264409 (reverted in r264410), albeit with a bit of a redesign. Firstly, avoid splitting the big blob into multiple chunks of strings. r264409 imposed an arbitrary limit to avoid a massive allocation on the shared 'Record' SmallVector. The bug with that commit only reproduced when there were more than "chunk-size" strings. A test for this would have been useless long-term, since we're liable to adjust the chunk-size in the future. Thus, eliminate the motivation for chunk-ing by storing the string sizes in the blob. Here's the layout: vbr6: # of strings vbr6: offset-to-blob blob: [vbr6]: string lengths [char]: concatenated strings Secondly, make the output of llvm-bcanalyzer readable. I noticed when debugging r264409 that llvm-bcanalyzer was outputting a massive blob all in one line. Past a small number, the strings were impossible to split in my head, and the lines were way too long. This version adds support in llvm-bcanalyzer for pretty-printing. num-strings = 3 { 'abc' 'def' 'ghi' } From the original commit: Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this should (a) slightly reduce bitcode size, since there is less record overhead, and (b) greatly improve reading speed, since blobs are super cheap to deserialize. llvm-svn: 264551 --- llvm/lib/Bitcode/Writer/ValueEnumerator.cpp | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'llvm/lib/Bitcode/Writer/ValueEnumerator.cpp') diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 08b5e45703a..69cafb7eeee 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -280,8 +280,7 @@ static bool isIntOrIntVectorValue(const std::pair &V) { ValueEnumerator::ValueEnumerator(const Module &M, bool ShouldPreserveUseListOrder) - : HasMDString(false), - ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { + : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { if (ShouldPreserveUseListOrder) UseListOrders = predictUseListOrder(M); @@ -375,6 +374,9 @@ ValueEnumerator::ValueEnumerator(const Module &M, // Optimize constant ordering. OptimizeConstants(FirstConstant, Values.size()); + + // Organize metadata ordering. + organizeMetadata(); } unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { @@ -530,8 +532,8 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) { EnumerateMDNodeOperands(N); else if (auto *C = dyn_cast(MD)) EnumerateValue(C->getValue()); - - HasMDString |= isa(MD); + else + ++NumMDStrings; // Replace the dummy ID inserted above with the correct one. MetadataMap may // have changed by inserting operands, so we need a fresh lookup here. @@ -557,6 +559,19 @@ void ValueEnumerator::EnumerateFunctionLocalMetadata( FunctionLocalMDs.push_back(Local); } +void ValueEnumerator::organizeMetadata() { + if (!NumMDStrings) + return; + + // Put the strings first. + std::stable_partition(MDs.begin(), MDs.end(), + [](const Metadata *MD) { return isa(MD); }); + + // Renumber. + for (unsigned I = 0, E = MDs.size(); I != E; ++I) + MetadataMap[MDs[I]] = I + 1; +} + void ValueEnumerator::EnumerateValue(const Value *V) { assert(!V->getType()->isVoidTy() && "Can't insert void values!"); assert(!isa(V) && "EnumerateValue doesn't handle Metadata!"); -- cgit v1.2.3