diff options
author | Renato Golin <renato.golin@linaro.org> | 2016-09-08 17:13:15 +0000 |
---|---|---|
committer | Renato Golin <renato.golin@linaro.org> | 2016-09-08 17:13:15 +0000 |
commit | 6f605133dd72d7ae404e10e2721c2165d0965cb4 (patch) | |
tree | 0ab33d4f6251151b2951e1f30a5f888067cab55a /compiler-rt/lib/xray/xray_interface.cc | |
parent | 0f1fcd6fc61f6ea12a9aea897b61e01e0fd57c63 (diff) | |
download | bcm5719-llvm-6f605133dd72d7ae404e10e2721c2165d0965cb4.tar.gz bcm5719-llvm-6f605133dd72d7ae404e10e2721c2165d0965cb4.zip |
Revert "[XRay] ARM 32-bit no-Thumb support in compiler-rt"
This reverts commit r280890, as the related LLVM commit broke the thumb bots.
llvm-svn: 280969
Diffstat (limited to 'compiler-rt/lib/xray/xray_interface.cc')
-rw-r--r-- | compiler-rt/lib/xray/xray_interface.cc | 141 |
1 files changed, 111 insertions, 30 deletions
diff --git a/compiler-rt/lib/xray/xray_interface.cc b/compiler-rt/lib/xray/xray_interface.cc index 360a6ad5b0a..5ef3fc7aac9 100644 --- a/compiler-rt/lib/xray/xray_interface.cc +++ b/compiler-rt/lib/xray/xray_interface.cc @@ -26,15 +26,6 @@ namespace __xray { -#if defined(__x86_64__) - // FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect() ? - static const int16_t cSledLength = 12; -#elif defined(__arm__) - static const int16_t cSledLength = 28; -#else - #error "Unsupported CPU Architecture" -#endif /* CPU architecture */ - // This is the function to call when we encounter the entry or exit sleds. std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr}; @@ -73,6 +64,13 @@ public: } // namespace __xray +extern "C" { +// The following functions have to be defined in assembler, on a per-platform +// basis. See xray_trampoline_*.s files for implementations. +extern void __xray_FunctionEntry(); +extern void __xray_FunctionExit(); +} + extern std::atomic<bool> XRayInitialized; extern std::atomic<__xray::XRaySledMap> XRayInstrMap; @@ -135,13 +133,12 @@ XRayPatchingStatus ControlPatching(bool Enable) { if (InstrMap.Entries == 0) return XRayPatchingStatus::NOT_INITIALIZED; - const uint64_t PageSize = GetPageSizeCached(); - if((PageSize == 0) || ( (PageSize & (PageSize-1)) != 0) ) { - Report("System page size is not a power of two: %lld", PageSize); - return XRayPatchingStatus::FAILED; - } - - uint32_t FuncId = 1; + int32_t FuncId = 1; + static constexpr uint8_t CallOpCode = 0xe8; + static constexpr uint16_t MovR10Seq = 0xba41; + static constexpr uint16_t Jmp9Seq = 0x09eb; + static constexpr uint8_t JmpOpCode = 0xe9; + static constexpr uint8_t RetOpCode = 0xc3; uint64_t CurFun = 0; for (std::size_t I = 0; I < InstrMap.Entries; I++) { auto Sled = InstrMap.Sleds[I]; @@ -156,28 +153,112 @@ XRayPatchingStatus ControlPatching(bool Enable) { // While we're here, we should patch the nop sled. To do that we mprotect // the page containing the function to be writeable. void *PageAlignedAddr = - reinterpret_cast<void *>(Sled.Address & ~(PageSize-1)); + reinterpret_cast<void *>(Sled.Address & ~((2 << 16) - 1)); std::size_t MProtectLen = - (Sled.Address + cSledLength) - reinterpret_cast<uint64_t>(PageAlignedAddr); + (Sled.Address + 12) - reinterpret_cast<uint64_t>(PageAlignedAddr); MProtectHelper Protector(PageAlignedAddr, MProtectLen); if (Protector.MakeWriteable() == -1) { printf("Failed mprotect: %d\n", errno); return XRayPatchingStatus::FAILED; } - bool Success = false; - switch(Sled.Kind) { - case XRayEntryType::ENTRY: - Success = patchFunctionEntry(Enable, FuncId, Sled); - break; - case XRayEntryType::EXIT: - Success = patchFunctionExit(Enable, FuncId, Sled); - break; - default: - Report("Unsupported sled kind: %d", int(Sled.Kind)); - continue; + static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; + static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; + if (Sled.Kind == XRayEntryType::ENTRY) { + // FIXME: Implement this in a more extensible manner, per-platform. + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +9 + // <9 byte nop> + // + // With the following: + // + // mov r10d, <function id> + // call <relative 32bit offset to entry trampoline> + // + // We need to do this in the following order: + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 2-byte jmp instruction). + // 2. Put the call opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset to the + // __xray_FunctionEntry function's address. + int64_t TrampolineOffset = + reinterpret_cast<int64_t>(__xray_FunctionEntry) - + (static_cast<int64_t>(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Entry trampoline (%p) too far from sled (%p); distance = " + "%ld\n", + __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address), + TrampolineOffset); + continue; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; + *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + } + + if (Sled.Kind == XRayEntryType::EXIT) { + // FIXME: Implement this in a more extensible manner, per-platform. + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // ret + // <10 byte nop> + // + // With the following: + // + // mov r10d, <function id> + // jmp <relative 32bit offset to exit trampoline> + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 1-byte ret instruction). + // 2. Put the jmp opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset fo the + // __xray_FunctionExit function's address. + int64_t TrampolineOffset = + reinterpret_cast<int64_t>(__xray_FunctionExit) - + (static_cast<int64_t>(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " + "%ld\n", + __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address), + TrampolineOffset); + continue; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode; + *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode, + std::memory_order_release); + // FIXME: Write out the nops still? + } } - (void)Success; } XRayPatching.store(false, std::memory_order_release); PatchingSuccess = true; |