From 5d303285b9825a44f233102766be39bd1cff4de9 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 26 Oct 2015 18:37:00 +0000 Subject: Add an (optional) identification block in the bitcode Processing bitcode from a different LLVM version can lead to unexpected behavior. The LLVM project guarantees autoupdating bitcode from a previous minor revision for the same major, but can't make any promise when reading bitcode generated from a either a non-released LLVM, a vendor toolchain, or a "future" LLVM release. This patch aims at being more user-friendly and allows a bitcode produce to emit an optional block at the beginning of the bitcode that will contains an opaque string intended to describe the bitcode producer information. The bitcode reader will dump this information alongside any error it reports. The optional block also includes an "epoch" number, monotonically increasing when incompatible changes are made to the bitcode. The reader will reject bitcode whose epoch is different from the one expected. Differential Revision: http://reviews.llvm.org/D13666 From: Mehdi Amini llvm-svn: 251325 --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 67 +++++++++++++++++++++++++++++++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 25 ++++++++++++ 2 files changed, 92 insertions(+) (limited to 'llvm/lib/Bitcode') diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index e8d45790e3b..d926f324145 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -152,6 +152,8 @@ class BitcodeReader : public GVMaterializer { uint64_t LastFunctionBlockBit = 0; bool SeenValueSymbolTable = false; unsigned VSTOffset = 0; + // Contains an arbitrary and optional string identifying the bitcode producer + std::string ProducerIdentification; std::vector TypeList; BitcodeReaderValueList ValueList; @@ -273,6 +275,11 @@ public: void setStripDebugInfo() override; private: + /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the + // ProducerIdentification data member, and do some basic enforcement on the + // "epoch" encoded in the bitcode. + std::error_code parseBitcodeVersion(); + std::vector IdentifiedStructTypes; StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); StructType *createIdentifiedStructType(LLVMContext &Context); @@ -518,10 +525,21 @@ static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler, } std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) { + if (!ProducerIdentification.empty()) { + Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification + + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"; + return ::error(DiagnosticHandler, make_error_code(E), MsgWithID); + } return ::error(DiagnosticHandler, make_error_code(E), Message); } std::error_code BitcodeReader::error(const Twine &Message) { + if (!ProducerIdentification.empty()) { + Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification + + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"; + return ::error(DiagnosticHandler, + make_error_code(BitcodeError::CorruptedBitcode), MsgWithID); + } return ::error(DiagnosticHandler, make_error_code(BitcodeError::CorruptedBitcode), Message); } @@ -3061,6 +3079,50 @@ std::error_code BitcodeReader::rememberAndSkipFunctionBodies() { } } +std::error_code BitcodeReader::parseBitcodeVersion() { + if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID)) + return error("Invalid record"); + + // Read all the records. + SmallVector Record; + while (1) { + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + default: + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. + Record.clear(); + unsigned BitCode = Stream.readRecord(Entry.ID, Record); + switch (BitCode) { + default: // Default behavior: reject + return error("Invalid value"); + case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION: [strchr x + // N] + convertToString(Record, 0, ProducerIdentification); + break; + } + case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#] + unsigned epoch = (unsigned)Record[0]; + if (epoch != bitc::BITCODE_CURRENT_EPOCH) { + auto BitcodeEpoch = std::to_string(epoch); + auto CurrentEpoch = std::to_string(bitc::BITCODE_CURRENT_EPOCH); + return error(Twine("Incompatible epoch: Bitcode '") + BitcodeEpoch + + "' vs current: '" + CurrentEpoch + "'"); + } + } + } + } +} + std::error_code BitcodeReader::parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata) { if (ResumeBit) @@ -3552,6 +3614,11 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr Streamer, if (Entry.Kind != BitstreamEntry::SubBlock) return error("Malformed block"); + if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { + parseBitcodeVersion(); + continue; + } + if (Entry.ID == bitc::MODULE_BLOCK_ID) return parseModule(0, ShouldLazyLoadMetadata); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 5b50a905e1b..5e90cd1bf10 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2829,6 +2829,29 @@ static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I, Stream.ExitBlock(); } +// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the +// current llvm version, and a record for the epoch number. +static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5); + + // Write the "user readable" string identifying the bitcode producer + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + auto StringAbbrev = Stream.EmitAbbrev(Abbv); + WriteStringRecord(bitc::IDENTIFICATION_CODE_STRING, + "LLVM" LLVM_VERSION_STRING, StringAbbrev, Stream); + + // Write the epoch version + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + SmallVector Vals = {bitc::BITCODE_CURRENT_EPOCH}; + Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals); + Stream.ExitBlock(); +} + /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, @@ -3000,6 +3023,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, // Emit the file header. WriteBitcodeHeader(Stream); + WriteIdentificationBlock(M, Stream); + // Emit the module. WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit, EmitFunctionSummary); -- cgit v1.2.3