diff options
author | Tim Shen <timshen91@gmail.com> | 2017-02-14 02:05:47 +0000 |
---|---|---|
committer | Tim Shen <timshen91@gmail.com> | 2017-02-14 02:05:47 +0000 |
commit | b94588eb88bad5db1522e0f22f0ddca4a6d9e3c4 (patch) | |
tree | 7894dc5a4507b5dd2fe95baf3b9b9109b1d7de6e /compiler-rt | |
parent | c27b3d7623cfa6476c8db5957d3c7f937603ec44 (diff) | |
download | bcm5719-llvm-b94588eb88bad5db1522e0f22f0ddca4a6d9e3c4.tar.gz bcm5719-llvm-b94588eb88bad5db1522e0f22f0ddca4a6d9e3c4.zip |
Re-commit r294826 and r294781, with a fix on the cmake file to only
compile on powerpc64le.
I cannot locally reproduce this test failure:
http://lab.llvm.org:8011/builders/sanitizer-ppc64le-linux/builds/1363/steps/test%20standalone%20compiler-rt/logs/stdio
Let's see how the buildbot goes.
Differential Revision: https://reviews.llvm.org/D29742
llvm-svn: 295017
Diffstat (limited to 'compiler-rt')
-rw-r--r-- | compiler-rt/cmake/config-ix.cmake | 2 | ||||
-rw-r--r-- | compiler-rt/lib/xray/CMakeLists.txt | 6 | ||||
-rw-r--r-- | compiler-rt/lib/xray/xray_interface.cc | 2 | ||||
-rw-r--r-- | compiler-rt/lib/xray/xray_powerpc64.cc | 95 | ||||
-rw-r--r-- | compiler-rt/lib/xray/xray_powerpc64.inc | 37 | ||||
-rw-r--r-- | compiler-rt/lib/xray/xray_trampoline_powerpc64.S | 171 | ||||
-rw-r--r-- | compiler-rt/lib/xray/xray_trampoline_powerpc64.cc | 15 | ||||
-rw-r--r-- | compiler-rt/lib/xray/xray_tsc.h | 2 |
8 files changed, 329 insertions, 1 deletions
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 493fdf47939..f2b78808502 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -183,7 +183,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64}) set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64}) set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64}) set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}) -set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64}) +set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} powerpc64le) if(APPLE) include(CompilerRTDarwinUtils) diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt index 581c4063ee3..97cb28f32eb 100644 --- a/compiler-rt/lib/xray/CMakeLists.txt +++ b/compiler-rt/lib/xray/CMakeLists.txt @@ -29,6 +29,12 @@ set(aarch64_SOURCES xray_trampoline_AArch64.S ${XRAY_SOURCES}) +set(powerpc64le_SOURCES + xray_powerpc64.cc + xray_trampoline_powerpc64.cc + xray_trampoline_powerpc64.S + ${XRAY_SOURCES}) + include_directories(..) include_directories(../../include) diff --git a/compiler-rt/lib/xray/xray_interface.cc b/compiler-rt/lib/xray/xray_interface.cc index 0b13983e58d..09be20a8ab7 100644 --- a/compiler-rt/lib/xray/xray_interface.cc +++ b/compiler-rt/lib/xray/xray_interface.cc @@ -35,6 +35,8 @@ static const int16_t cSledLength = 12; static const int16_t cSledLength = 32; #elif defined(__arm__) static const int16_t cSledLength = 28; +#elif defined(__powerpc64__) +static const int16_t cSledLength = 8; #else #error "Unsupported CPU Architecture" #endif /* CPU architecture */ diff --git a/compiler-rt/lib/xray/xray_powerpc64.cc b/compiler-rt/lib/xray/xray_powerpc64.cc new file mode 100644 index 00000000000..e0b62905e82 --- /dev/null +++ b/compiler-rt/lib/xray/xray_powerpc64.cc @@ -0,0 +1,95 @@ +//===-- xray_powerpc64.cc ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of powerpc64 and powerpc64le routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include "xray_utils.h" +#include <atomic> +#include <cassert> +#include <cstring> + +#ifndef __LITTLE_ENDIAN__ +#error powerpc64 big endian is not supported for now. +#endif + +namespace { + +constexpr unsigned long long JumpOverInstNum = 7; + +void clearCache(void *Addr, size_t Len) { + const size_t LineSize = 32; + + const intptr_t Mask = ~(LineSize - 1); + const intptr_t StartLine = ((intptr_t)Addr) & Mask; + const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask; + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("dcbf 0, %0" : : "r"(Line)); + asm volatile("sync"); + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +} + +} // namespace + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +bool patchFunctionEntry(const bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + if (Enable) { + // lis 0, FuncId[16..32] + // li 0, FuncId[0..15] + *reinterpret_cast<uint64_t *>(Sled.Address) = + (0x3c000000ull + (FuncId >> 16)) + + ((0x60000000ull + (FuncId & 0xffff)) << 32); + } else { + // b +JumpOverInstNum instructions. + *reinterpret_cast<uint32_t *>(Sled.Address) = + 0x48000000ull + (JumpOverInstNum << 2); + } + clearCache(reinterpret_cast<void *>(Sled.Address), 8); + return true; +} + +bool patchFunctionExit(const bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + if (Enable) { + // lis 0, FuncId[16..32] + // li 0, FuncId[0..15] + *reinterpret_cast<uint64_t *>(Sled.Address) = + (0x3c000000ull + (FuncId >> 16)) + + ((0x60000000ull + (FuncId & 0xffff)) << 32); + } else { + // Copy the blr/b instruction after JumpOverInstNum instructions. + *reinterpret_cast<uint32_t *>(Sled.Address) = + *(reinterpret_cast<uint32_t *>(Sled.Address) + JumpOverInstNum); + } + clearCache(reinterpret_cast<void *>(Sled.Address), 8); + return true; +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchFunctionExit(Enable, FuncId, Sled); +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +} // namespace __xray diff --git a/compiler-rt/lib/xray/xray_powerpc64.inc b/compiler-rt/lib/xray/xray_powerpc64.inc new file mode 100644 index 00000000000..c1a1bac1ad0 --- /dev/null +++ b/compiler-rt/lib/xray/xray_powerpc64.inc @@ -0,0 +1,37 @@ +//===-- xray_powerpc64.inc --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// + +#include <cstdint> +#include <mutex> +#include <sys/platform/ppc.h> + +#include "xray_defs.h" + +namespace __xray { + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + CPU = 0; + return __ppc_get_timebase(); +} + +inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + static std::mutex M; + std::lock_guard<std::mutex> Guard(M); + return __ppc_get_timebase_freq(); +} + +inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { + return true; +} + +} // namespace __xray diff --git a/compiler-rt/lib/xray/xray_trampoline_powerpc64.S b/compiler-rt/lib/xray/xray_trampoline_powerpc64.S new file mode 100644 index 00000000000..d43231ead22 --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_powerpc64.S @@ -0,0 +1,171 @@ + .text + .abiversion 2 + .globl __xray_FunctionEntry + .p2align 4 +__xray_FunctionEntry: + std 0, 16(1) + stdu 1, -408(1) +# Spill r3-r10, f1-f13, and vsr34-vsr45, which are parameter registers. +# If this appears to be slow, the caller needs to pass in number of generic, +# floating point, and vector parameters, so that we only spill those live ones. + std 3, 32(1) + ld 3, 400(1) # FuncId + std 4, 40(1) + std 5, 48(1) + std 6, 56(1) + std 7, 64(1) + std 8, 72(1) + std 9, 80(1) + std 10, 88(1) + addi 4, 1, 96 + stxsdx 1, 0, 4 + addi 4, 1, 104 + stxsdx 2, 0, 4 + addi 4, 1, 112 + stxsdx 3, 0, 4 + addi 4, 1, 120 + stxsdx 4, 0, 4 + addi 4, 1, 128 + stxsdx 5, 0, 4 + addi 4, 1, 136 + stxsdx 6, 0, 4 + addi 4, 1, 144 + stxsdx 7, 0, 4 + addi 4, 1, 152 + stxsdx 8, 0, 4 + addi 4, 1, 160 + stxsdx 9, 0, 4 + addi 4, 1, 168 + stxsdx 10, 0, 4 + addi 4, 1, 176 + stxsdx 11, 0, 4 + addi 4, 1, 184 + stxsdx 12, 0, 4 + addi 4, 1, 192 + stxsdx 13, 0, 4 + addi 4, 1, 200 + stxvd2x 34, 0, 4 + addi 4, 1, 216 + stxvd2x 35, 0, 4 + addi 4, 1, 232 + stxvd2x 36, 0, 4 + addi 4, 1, 248 + stxvd2x 37, 0, 4 + addi 4, 1, 264 + stxvd2x 38, 0, 4 + addi 4, 1, 280 + stxvd2x 39, 0, 4 + addi 4, 1, 296 + stxvd2x 40, 0, 4 + addi 4, 1, 312 + stxvd2x 41, 0, 4 + addi 4, 1, 328 + stxvd2x 42, 0, 4 + addi 4, 1, 344 + stxvd2x 43, 0, 4 + addi 4, 1, 360 + stxvd2x 44, 0, 4 + addi 4, 1, 376 + stxvd2x 45, 0, 4 + std 2, 392(1) + mflr 0 + std 0, 400(1) + + li 4, 0 + bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType + nop + + addi 4, 1, 96 + lxsdx 1, 0, 4 + addi 4, 1, 104 + lxsdx 2, 0, 4 + addi 4, 1, 112 + lxsdx 3, 0, 4 + addi 4, 1, 120 + lxsdx 4, 0, 4 + addi 4, 1, 128 + lxsdx 5, 0, 4 + addi 4, 1, 136 + lxsdx 6, 0, 4 + addi 4, 1, 144 + lxsdx 7, 0, 4 + addi 4, 1, 152 + lxsdx 8, 0, 4 + addi 4, 1, 160 + lxsdx 9, 0, 4 + addi 4, 1, 168 + lxsdx 10, 0, 4 + addi 4, 1, 176 + lxsdx 11, 0, 4 + addi 4, 1, 184 + lxsdx 12, 0, 4 + addi 4, 1, 192 + lxsdx 13, 0, 4 + addi 4, 1, 200 + lxvd2x 34, 0, 4 + addi 4, 1, 216 + lxvd2x 35, 0, 4 + addi 4, 1, 232 + lxvd2x 36, 0, 4 + addi 4, 1, 248 + lxvd2x 37, 0, 4 + addi 4, 1, 264 + lxvd2x 38, 0, 4 + addi 4, 1, 280 + lxvd2x 39, 0, 4 + addi 4, 1, 296 + lxvd2x 40, 0, 4 + addi 4, 1, 312 + lxvd2x 41, 0, 4 + addi 4, 1, 328 + lxvd2x 42, 0, 4 + addi 4, 1, 344 + lxvd2x 43, 0, 4 + addi 4, 1, 360 + lxvd2x 44, 0, 4 + addi 4, 1, 376 + lxvd2x 45, 0, 4 + ld 0, 400(1) + mtlr 0 + ld 2, 392(1) + ld 3, 32(1) + ld 4, 40(1) + ld 5, 48(1) + ld 6, 56(1) + ld 7, 64(1) + ld 8, 72(1) + ld 9, 80(1) + ld 10, 88(1) + + addi 1, 1, 408 + ld 0, 16(1) + blr + + .globl __xray_FunctionExit + .p2align 4 +__xray_FunctionExit: + std 0, 16(1) + ld 0, -8(1) # FuncId + stdu 1, -72(1) +# Spill r3, f1, and vsr34, the return value registers. + std 3, 32(1) + mr 3, 0 + addi 4, 1, 40 + stxsdx 1, 0, 4 + addi 4, 1, 48 + stxvd2x 34, 0, 4 + mflr 0 + std 0, 64(1) + li 4, 1 + bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType + nop + ld 0, 64(1) + mtlr 0 + ld 3, 32(1) + addi 4, 1, 40 + lxsdx 1, 0, 4 + addi 4, 1, 48 + lxvd2x 34, 0, 4 + addi 1, 1, 72 + ld 0, 16(1) + blr diff --git a/compiler-rt/lib/xray/xray_trampoline_powerpc64.cc b/compiler-rt/lib/xray/xray_trampoline_powerpc64.cc new file mode 100644 index 00000000000..878c46930fe --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_powerpc64.cc @@ -0,0 +1,15 @@ +#include <atomic> +#include <xray/xray_interface.h> + +namespace __xray { + +extern std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction; + +// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand. +void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) { + auto fptr = __xray::XRayPatchedFunction.load(); + if (fptr != nullptr) + (*fptr)(FuncId, Type); +} + +} // namespace __xray diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h index 941f18877e2..5f07ea378c0 100644 --- a/compiler-rt/lib/xray/xray_tsc.h +++ b/compiler-rt/lib/xray/xray_tsc.h @@ -15,6 +15,8 @@ #if defined(__x86_64__) #include "xray_x86_64.inc" +#elif defined(__powerpc64__) +#include "xray_powerpc64.inc" #elif defined(__arm__) || defined(__aarch64__) // Emulated TSC. // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does |