//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h" #include "llvm/Support/Process.h" #include using namespace llvm::orc; namespace { uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM, TargetAddress CallbackID) { return JCBM->executeCompileCallback(CallbackID); } } namespace llvm { namespace orc { const char* OrcX86_64::ResolverBlockName = "orc_resolver_block"; void OrcX86_64::insertResolverBlock( Module &M, JITCompileCallbackManagerBase &JCBM) { // Trampoline code-sequence length, used to get trampoline address from return // address. const unsigned X86_64_TrampolineLength = 6; // List of x86-64 GPRs to save. Note - RBP saved separately below. std::array GPRs = {{ "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }}; // Address of the executeCompileCallback function. uint64_t CallbackAddr = static_cast( reinterpret_cast(executeCompileCallback)); std::ostringstream AsmStream; Triple TT(M.getTargetTriple()); // Switch to text section. if (TT.getOS() == Triple::Darwin) AsmStream << ".section __TEXT,__text,regular,pure_instructions\n" << ".align 4, 0x90\n"; else AsmStream << ".text\n" << ".align 16, 0x90\n"; // Bake in a pointer to the callback manager immediately before the // start of the resolver function. AsmStream << "jit_callback_manager_addr:\n" << " .quad " << &JCBM << "\n"; // Start the resolver function. AsmStream << ResolverBlockName << ":\n" << " pushq %rbp\n" << " movq %rsp, %rbp\n"; // Store the GPRs. for (const auto &GPR : GPRs) AsmStream << " pushq %" << GPR << "\n"; // Store floating-point state with FXSAVE. // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd // number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add // an extra 64 bits of padding to the FXSave area. unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0; unsigned FXSaveSize = 512 + Padding; AsmStream << " subq $" << FXSaveSize << ", %rsp\n" << " fxsave64 (%rsp)\n" // Load callback manager address, compute trampoline address, call JIT. << " lea jit_callback_manager_addr(%rip), %rdi\n" << " movq (%rdi), %rdi\n" << " movq 0x8(%rbp), %rsi\n" << " subq $" << X86_64_TrampolineLength << ", %rsi\n" << " movabsq $" << CallbackAddr << ", %rax\n" << " callq *%rax\n" // Replace the return to the trampoline with the return address of the // compiled function body. << " movq %rax, 0x8(%rbp)\n" // Restore the floating point state. << " fxrstor64 (%rsp)\n" << " addq $" << FXSaveSize << ", %rsp\n"; for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend())) AsmStream << " popq %" << GPR << "\n"; // Restore original RBP and return to compiled function body. AsmStream << " popq %rbp\n" << " retq\n"; M.appendModuleInlineAsm(AsmStream.str()); } OrcX86_64::LabelNameFtor OrcX86_64::insertCompileCallbackTrampolines(Module &M, TargetAddress ResolverBlockAddr, unsigned NumCalls, unsigned StartIndex) { const char *ResolverBlockPtrName = "Lorc_resolve_block_addr"; std::ostringstream AsmStream; Triple TT(M.getTargetTriple()); if (TT.getOS() == Triple::Darwin) AsmStream << ".section __TEXT,__text,regular,pure_instructions\n" << ".align 4, 0x90\n"; else AsmStream << ".text\n" << ".align 16, 0x90\n"; AsmStream << ResolverBlockPtrName << ":\n" << " .quad " << ResolverBlockAddr << "\n"; auto GetLabelName = [=](unsigned I) { std::ostringstream LabelStream; LabelStream << "orc_jcc_" << (StartIndex + I); return LabelStream.str(); }; for (unsigned I = 0; I < NumCalls; ++I) AsmStream << GetLabelName(I) << ":\n" << " callq *" << ResolverBlockPtrName << "(%rip)\n"; M.appendModuleInlineAsm(AsmStream.str()); return GetLabelName; } std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, unsigned MinStubs, void *InitialPtrVal) { // Stub format is: // // .section __orc_stubs // stub1: // jmpq *ptr1(%rip) // .byte 0xC4 ; <- Invalid opcode padding. // .byte 0xF1 // stub2: // jmpq *ptr2(%rip) // // ... // // .section __orc_ptrs // ptr1: // .quad 0x0 // ptr2: // .quad 0x0 // // ... const unsigned StubSize = IndirectStubsInfo::StubSize; // Emit at least MinStubs, rounded up to fill the pages allocated. unsigned PageSize = sys::Process::getPageSize(); unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; unsigned NumStubs = (NumPages * PageSize) / StubSize; // Allocate memory for stubs and pointers in one call. std::error_code EC; auto StubsMem = sys::OwningMemoryBlock( sys::Memory::allocateMappedMemory(2 * NumPages * PageSize, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); if (EC) return EC; // Create separate MemoryBlocks representing the stubs and pointers. sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); sys::MemoryBlock PtrsBlock(static_cast(StubsMem.base()) + NumPages * PageSize, NumPages * PageSize); // Populate the stubs page stubs and mark it executable. uint64_t *Stub = reinterpret_cast(StubsBlock.base()); uint64_t PtrOffsetField = static_cast(NumPages * PageSize - 6) << 16; for (unsigned I = 0; I < NumStubs; ++I) Stub[I] = 0xF1C40000000025ff | PtrOffsetField; if (auto EC = sys::Memory::protectMappedMemory(StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC)) return EC; // Initialize all pointers to point at FailureAddress. void **Ptr = reinterpret_cast(PtrsBlock.base()); for (unsigned I = 0; I < NumStubs; ++I) Ptr[I] = InitialPtrVal; StubsInfo.NumStubs = NumStubs; StubsInfo.StubsMem = std::move(StubsMem); return std::error_code(); } } // End namespace orc. } // End namespace llvm.