From 6565a0d4b2c98722eb8fee9093cdde4f37928986 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Sun, 27 Mar 2016 23:17:54 +0000 Subject: Reapply ~"Bitcode: Collect all MDString records into a single blob" Spiritually reapply commit r264409 (reverted in r264410), albeit with a bit of a redesign. Firstly, avoid splitting the big blob into multiple chunks of strings. r264409 imposed an arbitrary limit to avoid a massive allocation on the shared 'Record' SmallVector. The bug with that commit only reproduced when there were more than "chunk-size" strings. A test for this would have been useless long-term, since we're liable to adjust the chunk-size in the future. Thus, eliminate the motivation for chunk-ing by storing the string sizes in the blob. Here's the layout: vbr6: # of strings vbr6: offset-to-blob blob: [vbr6]: string lengths [char]: concatenated strings Secondly, make the output of llvm-bcanalyzer readable. I noticed when debugging r264409 that llvm-bcanalyzer was outputting a massive blob all in one line. Past a small number, the strings were impossible to split in my head, and the lines were way too long. This version adds support in llvm-bcanalyzer for pretty-printing. num-strings = 3 { 'abc' 'def' 'ghi' } From the original commit: Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this should (a) slightly reduce bitcode size, since there is less record overhead, and (b) greatly improve reading speed, since blobs are super cheap to deserialize. llvm-svn: 264551 --- llvm/lib/Bitcode/Writer/ValueEnumerator.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'llvm/lib/Bitcode/Writer/ValueEnumerator.h') diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/llvm/lib/Bitcode/Writer/ValueEnumerator.h index 7665210d014..fd09a695a99 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.h +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.h @@ -66,7 +66,7 @@ private: SmallVector FunctionLocalMDs; typedef DenseMap MetadataMapType; MetadataMapType MetadataMap; - bool HasMDString; + unsigned NumMDStrings = 0; bool ShouldPreserveUseListOrder; typedef DenseMap AttributeGroupMapType; @@ -121,8 +121,6 @@ public: } unsigned numMDs() const { return MDs.size(); } - bool hasMDString() const { return HasMDString; } - bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; } unsigned getTypeID(Type *T) const { @@ -157,9 +155,16 @@ public: const ValueList &getValues() const { return Values; } const std::vector &getMDs() const { return MDs; } + ArrayRef getMDStrings() const { + return makeArrayRef(MDs).slice(0, NumMDStrings); + } + ArrayRef getNonMDStrings() const { + return makeArrayRef(MDs).slice(NumMDStrings); + } const SmallVectorImpl &getFunctionLocalMDs() const { return FunctionLocalMDs; } + const TypeList &getTypes() const { return Types; } const std::vector &getBasicBlocks() const { return BasicBlocks; @@ -189,6 +194,10 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); + // Reorder the reachable metadata. This is not just an optimization, but is + // mandatory for emitting MDString correctly. + void organizeMetadata(); + void EnumerateMDNodeOperands(const MDNode *N); void EnumerateMetadata(const Metadata *MD); void EnumerateFunctionLocalMetadata(const LocalAsMetadata *Local); -- cgit v1.2.3