diff options
author | Teresa Johnson <tejohnson@google.com> | 2019-12-12 11:59:36 -0800 |
---|---|---|
committer | Teresa Johnson <tejohnson@google.com> | 2019-12-12 12:34:19 -0800 |
commit | c8e0bb3b2c24ef59556d81a275fb1f5db64899d3 (patch) | |
tree | 9f700b857676d676ff03dea2a7d4f68c9f5940a6 /llvm/lib/Bitcode/Writer | |
parent | 0ee89c1bad8cef81725ef892d60b4aa254d84744 (diff) | |
download | bcm5719-llvm-c8e0bb3b2c24ef59556d81a275fb1f5db64899d3.tar.gz bcm5719-llvm-c8e0bb3b2c24ef59556d81a275fb1f5db64899d3.zip |
[LTO] Support for embedding bitcode section during LTO
Summary:
This adds support for embedding bitcode in a binary during LTO. The libLTO gains supports the `-lto-embed-bitcode` flag. The option allows users of the LTO library to embed a bitcode section. For example, LLD can pass the option via `ld.lld -mllvm=-lto-embed-bitcode`.
This feature allows doing something comparable to `clang -c -fembed-bitcode`, but on the (LTO) linker level. Having bitcode alongside native code has many use-cases. To give an example, the MacOS linker can create a `-bitcode_bundle` section containing bitcode. Also, having this feature built into LLVM is an alternative to 3rd party tools such as [[ https://github.com/travitch/whole-program-llvm | wllvm ]] or [[ https://github.com/SRI-CSL/gllvm | gllvm ]]. As with these tools, this feature simplifies creating "whole-program" llvm bitcode files, but in contrast to wllvm/gllvm it does not rely on a specific llvm frontend/driver.
Patch by Josef Eisl <josef.eisl@oracle.com>
Reviewers: #llvm, #clang, rsmith, pcc, alexshap, tejohnson
Reviewed By: tejohnson
Subscribers: tejohnson, mehdi_amini, inglorion, hiraditya, aheejin, steven_wu, dexonsmith, dang, cfe-commits, llvm-commits, #llvm, #clang
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D68213
Diffstat (limited to 'llvm/lib/Bitcode/Writer')
-rw-r--r-- | llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 125 |
1 files changed, 124 insertions, 1 deletions
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index fca2cc2c300..dcff7c421fc 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -24,9 +24,10 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Bitstream/BitCodes.h" #include "llvm/Bitstream/BitstreamWriter.h" -#include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -4677,3 +4678,125 @@ void llvm::WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out, Out.write((char *)&Buffer.front(), Buffer.size()); } + +static const char *getSectionNameForBitcode(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__bitcode"; + case Triple::COFF: + case Triple::ELF: + case Triple::Wasm: + case Triple::UnknownObjectFormat: + return ".llvmbc"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + +static const char *getSectionNameForCommandline(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__cmdline"; + case Triple::COFF: + case Triple::ELF: + case Triple::Wasm: + case Triple::UnknownObjectFormat: + return ".llvmcmd"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + +void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, + bool EmbedBitcode, bool EmbedMarker, + const std::vector<uint8_t> *CmdArgs) { + // Save llvm.compiler.used and remove it. + SmallVector<Constant *, 2> UsedArray; + SmallPtrSet<GlobalValue *, 4> UsedGlobals; + Type *UsedElementType = Type::getInt8Ty(M.getContext())->getPointerTo(0); + GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true); + for (auto *GV : UsedGlobals) { + if (GV->getName() != "llvm.embedded.module" && + GV->getName() != "llvm.cmdline") + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + } + if (Used) + Used->eraseFromParent(); + + // Embed the bitcode for the llvm module. + std::string Data; + ArrayRef<uint8_t> ModuleData; + Triple T(M.getTargetTriple()); + // Create a constant that contains the bitcode. + // In case of embedding a marker, ignore the input Buf and use the empty + // ArrayRef. It is also legal to create a bitcode marker even Buf is empty. + if (EmbedBitcode) { + if (!isBitcode((const unsigned char *)Buf.getBufferStart(), + (const unsigned char *)Buf.getBufferEnd())) { + // If the input is LLVM Assembly, bitcode is produced by serializing + // the module. Use-lists order need to be preserved in this case. + llvm::raw_string_ostream OS(Data); + llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true); + ModuleData = + ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size()); + } else + // If the input is LLVM bitcode, write the input byte stream directly. + ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(), + Buf.getBufferSize()); + } + llvm::Constant *ModuleConstant = + llvm::ConstantDataArray::get(M.getContext(), ModuleData); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, + ModuleConstant); + GV->setSection(getSectionNameForBitcode(T)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = + M.getGlobalVariable("llvm.embedded.module", true)) { + assert(Old->hasOneUse() && + "llvm.embedded.module can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.embedded.module"); + } + + // Skip if only bitcode needs to be embedded. + if (EmbedMarker) { + // Embed command-line options. + ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CmdArgs->data()), + CmdArgs->size()); + llvm::Constant *CmdConstant = + llvm::ConstantDataArray::get(M.getContext(), CmdData); + GV = new llvm::GlobalVariable(M, CmdConstant->getType(), true, + llvm::GlobalValue::PrivateLinkage, + CmdConstant); + GV->setSection(getSectionNameForCommandline(T)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) { + assert(Old->hasOneUse() && + "llvm.cmdline can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.cmdline"); + } + } + + if (UsedArray.empty()) + return; + + // Recreate llvm.compiler.used. + ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); + auto *NewUsed = new GlobalVariable( + M, ATy, false, llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); + NewUsed->setSection("llvm.metadata"); +} |