summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLang Hames <lhames@gmail.com>2016-04-29 21:32:00 +0000
committerLang Hames <lhames@gmail.com>2016-04-29 21:32:00 +0000
commitf74e93b60004ecc23832492264116c38249a87c7 (patch)
treeb02cb58b844159bcf1c5b1e96b878be2f646f7e5
parentae643ee2d1e637643b365e077ff0def93cf3e0b2 (diff)
downloadbcm5719-llvm-f74e93b60004ecc23832492264116c38249a87c7.tar.gz
bcm5719-llvm-f74e93b60004ecc23832492264116c38249a87c7.zip
[Orc] Add ORC lazy-compilation support for AArch64.
The ORC compile callbacks and indirect stubs APIs will now work for AArc64, allowing functions to be lazily compiled and/or updated. llvm-svn: 268112
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h31
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp144
2 files changed, 175 insertions, 0 deletions
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
index c8e4e26d56d..3adee86082c 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
@@ -107,6 +107,37 @@ private:
sys::OwningMemoryBlock StubsMem;
};
+class OrcAArch64 {
+public:
+ static const unsigned PointerSize = 8;
+ static const unsigned TrampolineSize = 12;
+ static const unsigned ResolverCodeSize = 0x6C;
+
+ typedef GenericIndirectStubsInfo<8> IndirectStubsInfo;
+
+ typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+
+ /// @brief Write the resolver code into the given memory. The user is be
+ /// responsible for allocating the memory and setting permissions.
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+ void *CallbackMgr);
+
+ /// @brief Write the requsted number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
+ static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines);
+
+ /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+ /// the nearest page size.
+ ///
+ /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
+ /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+ /// will return a block of 1024 (2-pages worth).
+ static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs, void *InitialPtrVal);
+};
+
/// @brief X86_64 support.
///
/// X86_64 supports lazy JITing.
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
index b51e3cd002d..c92a8d251d2 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
@@ -14,6 +14,150 @@
namespace llvm {
namespace orc {
+void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
+ void *CallbackMgr) {
+
+ const uint32_t ResolverCode[] = {
+ // resolver_entry:
+ 0xa9bf47fd, // 0x00: stp x29, x17, [sp, #-16]!
+ 0x910003fd, // 0x04: mov x29, sp
+ 0xa9bf73fb, // 0x08: stp x27, x28, [sp, #-16]!
+ 0xa9bf6bf9, // 0x0C: stp x25, x26, [sp, #-16]!
+ 0xa9bf63f7, // 0x10: stp x23, x24, [sp, #-16]!
+ 0xa9bf5bf5, // 0x14: stp x21, x22, [sp, #-16]!
+ 0xa9bf53f3, // 0x18: stp x19, x20, [sp, #-16]!
+ 0xa9bf3fee, // 0x1C: stp x14, x15, [sp, #-16]!
+ 0xa9bf37ec, // 0x20: stp x12, x13, [sp, #-16]!
+ 0xa9bf2fea, // 0x24: stp x10, x11, [sp, #-16]!
+ 0xa9bf27e8, // 0x28: stp x8, x9, [sp, #-16]!
+ 0xa9bf1fe6, // 0x2C: stp x6, x7, [sp, #-16]!
+ 0xa9bf17e4, // 0x30: stp x4, x5, [sp, #-16]!
+ 0xa9bf0fe2, // 0x34: stp x2, x3, [sp, #-16]!
+ 0xa9bf07e0, // 0x38: stp x0, x1, [sp, #-16]!
+ 0x580002e0, // 0x3C: ldr x0, Lcallback_mgr
+ 0xaa1e03e1, // 0x40: mov x1, x30
+ 0xd1003021, // 0x44: sub x1, x1, #12
+ 0x58000242, // 0x48: ldr x2, Lreentry_fn
+ 0xd63f0040, // 0x4C: blr x2
+ 0xaa0003f1, // 0x50: mov x17, x0
+ 0xa8c107e0, // 0x54: ldp x0, x1, [sp], #16
+ 0xa8c10fe2, // 0x58: ldp x2, x3, [sp], #16
+ 0xa8c117e4, // 0x5C: ldp x4, x5, [sp], #16
+ 0xa8c11fe6, // 0x60: ldp x6, x7, [sp], #16
+ 0xa8c127e8, // 0x64: ldp x8, x9, [sp], #16
+ 0xa8c12fea, // 0x68: ldp x10, x11, [sp], #16
+ 0xa8c137ec, // 0x6C: ldp x12, x13, [sp], #16
+ 0xa8c13fee, // 0x70: ldp x14, x15, [sp], #16
+ 0xa8c153f3, // 0x74: ldp x19, x20, [sp], #16
+ 0xa8c15bf5, // 0x78: ldp x21, x22, [sp], #16
+ 0xa8c163f7, // 0x7C: ldp x23, x24, [sp], #16
+ 0xa8c16bf9, // 0x80: ldp x25, x26, [sp], #16
+ 0xa8c173fb, // 0x84: ldp x27, x28, [sp], #16
+ 0xa8c17bfd, // 0x88: ldp x29, x30, [sp], #16
+ 0xd65f0220, // 0x8C: ret x17
+ 0x00000000, // 0x90: Lresolver_fn:
+ 0x00000000, // .quad resolver_fn
+ 0x00000000, // 0x98: Lcallback_mgr:
+ 0x00000000, // .quad callback_mgr
+ };
+
+ const unsigned ReentryFnAddrOffset = 0x90;
+ const unsigned CallbackMgrAddrOffset = 0x98;
+
+ memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+ memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
+ memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
+ sizeof(CallbackMgr));
+}
+
+void OrcAArch64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines) {
+
+ unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
+
+ memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *));
+
+ // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so
+ // subtract 32-bits.
+ OffsetToPtr -= 4;
+
+ uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
+
+ for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
+ Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30
+ Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // mov x16, Lptr
+ Trampolines[3 * I + 2] = 0xd63f0200; // blr x16
+ }
+
+}
+
+Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal) {
+ // Stub format is:
+ //
+ // .section __orc_stubs
+ // stub1:
+ // ldr x0, ptr1 ; PC-rel load of ptr1
+ // br x0 ; Jump to resolver
+ // stub2:
+ // ldr x0, ptr2 ; PC-rel load of ptr2
+ // br x0 ; Jump to resolver
+ //
+ // ...
+ //
+ // .section __orc_ptrs
+ // ptr1:
+ // .quad 0x0
+ // ptr2:
+ // .quad 0x0
+ //
+ // ...
+
+ const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+ // Emit at least MinStubs, rounded up to fill the pages allocated.
+ unsigned PageSize = sys::Process::getPageSize();
+ unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+ unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+ // Allocate memory for stubs and pointers in one call.
+ std::error_code EC;
+ auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+ 2 * NumPages * PageSize, nullptr,
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+
+ if (EC)
+ return errorCodeToError(EC);
+
+ // Create separate MemoryBlocks representing the stubs and pointers.
+ sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+ sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
+ NumPages * PageSize,
+ NumPages * PageSize);
+
+ // Populate the stubs page stubs and mark it executable.
+ uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlock.base());
+ uint64_t PtrOffsetField = static_cast<uint64_t>(NumPages * PageSize)
+ << 3;
+
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Stub[I] = 0xd61f020058000010 | PtrOffsetField;
+
+ if (auto EC = sys::Memory::protectMappedMemory(
+ StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
+ return errorCodeToError(EC);
+
+ // Initialize all pointers to point at FailureAddress.
+ void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Ptr[I] = InitialPtrVal;
+
+ StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
+
+ return Error::success();
+}
+
void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
void *CallbackMgr) {
OpenPOWER on IntegriCloud