diff options
author | Dean Michael Berris <dberris@google.com> | 2017-12-05 12:21:14 +0000 |
---|---|---|
committer | Dean Michael Berris <dberris@google.com> | 2017-12-05 12:21:14 +0000 |
commit | 52517d7cf70da666f98e72f926411c503f29ee50 (patch) | |
tree | 561c23d60c3b18474e73cb7985553824b9b0000d /compiler-rt | |
parent | cec8f47e77f1cb917ba9af24b2517b985a649202 (diff) | |
download | bcm5719-llvm-52517d7cf70da666f98e72f926411c503f29ee50.tar.gz bcm5719-llvm-52517d7cf70da666f98e72f926411c503f29ee50.zip |
[XRay][compiler-rt] Implement XRay Basic Mode Filtering
Summary:
This change implements the basic mode filtering similar to what we do in
FDR mode. The implementation is slightly simpler in basic-mode filtering
because we have less details to remember, but the idea is the same. At a
high level, we do the following to decide when to filter function call
records:
- We maintain a per-thread "shadow stack" which keeps track of the
XRay instrumented functions we've encountered in a thread's
execution.
- We push an entry onto the stack when we enter an XRay instrumented
function, and note the CPU, TSC, and type of entry (whether we have
payload or not when entering).
- When we encounter an exit event, we determine whether the function
being exited is the same function we've entered recently, was
executing in the same CPU, and the delta of the recent TSC and the
recorded TSC at the top of the stack is less than the equivalent
amount of microseconds we're configured to ignore -- then we un-wind
the record offset an appropriate number of times (so we can
overwrite the records later).
We also support limiting the stack depth of the recorded functions,
so that we don't arbitrarily write deep function call stacks.
Reviewers: eizan, pelikan, kpw, dblaikie
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40828
llvm-svn: 319762
Diffstat (limited to 'compiler-rt')
-rw-r--r-- | compiler-rt/lib/xray/xray_inmemory_log.cc | 215 | ||||
-rw-r--r-- | compiler-rt/test/xray/TestCases/Linux/basic-filtering.cc | 51 |
2 files changed, 212 insertions, 54 deletions
diff --git a/compiler-rt/lib/xray/xray_inmemory_log.cc b/compiler-rt/lib/xray/xray_inmemory_log.cc index 74789ebac1c..b70d6ecd05a 100644 --- a/compiler-rt/lib/xray/xray_inmemory_log.cc +++ b/compiler-rt/lib/xray/xray_inmemory_log.cc @@ -42,19 +42,25 @@ __sanitizer::SpinMutex LogMutex; // We use elements of this type to record the entry TSC of every function ID we // see as we're tracing a particular thread's execution. -struct StackEntry { +struct alignas(16) StackEntry { int32_t FuncId; + uint16_t Type; + uint8_t CPU; + uint8_t Padding; uint64_t TSC; }; +static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); + struct alignas(64) ThreadLocalData { - XRayRecord *InMemoryBuffer = nullptr; + void *InMemoryBuffer = nullptr; size_t BufferSize = 0; size_t BufferOffset = 0; - StackEntry *ShadowStack = nullptr; + void *ShadowStack = nullptr; size_t StackSize = 0; size_t StackEntries = 0; int Fd = -1; + pid_t TID = 0; }; static pthread_key_t PThreadKey; @@ -63,7 +69,16 @@ static __sanitizer::atomic_uint8_t BasicInitialized{0}; BasicLoggingOptions GlobalOptions; -thread_local volatile bool RecusionGuard = false; +thread_local volatile bool RecursionGuard = false; + +static uint64_t thresholdTicks() XRAY_NEVER_INSTRUMENT { + static uint64_t TicksPerSec = probeRequiredCPUFeatures() + ? getTSCFrequency() + : __xray::NanosecondsPerSecond; + static const uint64_t ThresholdTicks = + TicksPerSec * GlobalOptions.DurationFilterMicros / 1000000; + return ThresholdTicks; +} static int openLogFile() XRAY_NEVER_INSTRUMENT { int F = getLogFD(); @@ -92,11 +107,6 @@ static int openLogFile() XRAY_NEVER_INSTRUMENT { return F; } -pid_t getTId() XRAY_NEVER_INSTRUMENT { - thread_local pid_t TId = syscall(SYS_gettid); - return TId; -} - int getGlobalFd() XRAY_NEVER_INSTRUMENT { static int Fd = openLogFile(); return Fd; @@ -105,8 +115,12 @@ int getGlobalFd() XRAY_NEVER_INSTRUMENT { ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { thread_local ThreadLocalData TLD; thread_local bool UNUSED TOnce = [] { - if (GlobalOptions.ThreadBufferSize == 0) + if (GlobalOptions.ThreadBufferSize == 0) { + if (__sanitizer::Verbosity()) + Report("Not initializing TLD since ThreadBufferSize == 0.\n"); return false; + } + TLD.TID = __sanitizer::GetTid(); pthread_setspecific(PThreadKey, &TLD); TLD.Fd = getGlobalFd(); TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>( @@ -114,13 +128,26 @@ ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { nullptr, alignof(XRayRecord))); TLD.BufferSize = GlobalOptions.ThreadBufferSize; TLD.BufferOffset = 0; - if (GlobalOptions.MaxStackDepth == 0) + if (GlobalOptions.MaxStackDepth == 0) { + if (__sanitizer::Verbosity()) + Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); + TLD.StackSize = 0; + TLD.StackEntries = 0; + TLD.ShadowStack = nullptr; return false; + } TLD.ShadowStack = reinterpret_cast<StackEntry *>( InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, alignof(StackEntry))); TLD.StackSize = GlobalOptions.MaxStackDepth; TLD.StackEntries = 0; + if (__sanitizer::Verbosity() >= 2) { + static auto UNUSED Once = [] { + auto ticks = thresholdTicks(); + Report("Ticks threshold: %d\n", ticks); + return false; + }(); + } return false; }(); return TLD; @@ -131,7 +158,6 @@ void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto &InMemoryBuffer = TLD.InMemoryBuffer; - auto &Offset = TLD.BufferOffset; int Fd = getGlobalFd(); if (Fd == -1) return; @@ -139,27 +165,92 @@ void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, // Use a simple recursion guard, to handle cases where we're already logging // and for one reason or another, this function gets called again in the same // thread. - if (RecusionGuard) + if (RecursionGuard) return; - RecusionGuard = true; + RecursionGuard = true; + auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; }); + + uint8_t CPU = 0; + uint64_t TSC = ReadTSC(CPU); + + switch (Type) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: { + // Short circuit if we've reached the maximum depth of the stack. + if (TLD.StackEntries++ >= TLD.StackSize) + return; + + // When we encounter an entry event, we keep track of the TSC and the CPU, + // and put it in the stack. + StackEntry E; + E.FuncId = FuncId; + E.CPU = CPU; + E.Type = Type; + E.TSC = TSC; + auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + + (sizeof(StackEntry) * (TLD.StackEntries - 1)); + __sanitizer::internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); + break; + } + case XRayEntryType::EXIT: + case XRayEntryType::TAIL: { + if (TLD.StackEntries == 0) + break; + + if (--TLD.StackEntries >= TLD.StackSize) + return; + + // When we encounter an exit event, we check whether all the following are + // true: + // + // - The Function ID is the same as the most recent entry in the stack. + // - The CPU is the same as the most recent entry in the stack. + // - The Delta of the TSCs is less than the threshold amount of time we're + // looking to record. + // + // If all of these conditions are true, we pop the stack and don't write a + // record and move the record offset back. + StackEntry StackTop; + auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + + (sizeof(StackEntry) * TLD.StackEntries); + __sanitizer::internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); + if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && + StackTop.TSC < TSC) { + auto Delta = TSC - StackTop.TSC; + if (Delta < thresholdTicks()) { + assert(TLD.BufferOffset > 0); + TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; + return; + } + } + break; + } + default: + // Should be unreachable. + assert(false && "Unsupported XRayEntryType encountered."); + break; + } - // First we get the useful data, and stuff it into the already aligned buffer - // through a pointer offset. - auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset]; + // First determine whether the delta between the function's enter record and + // the exit record is higher than the threshold. + __xray::XRayRecord R; R.RecordType = RecordTypes::NORMAL; - R.TSC = ReadTSC(R.CPU); - R.TId = getTId(); + R.CPU = CPU; + R.TSC = TSC; + R.TId = TLD.TID; R.Type = Type; R.FuncId = FuncId; - if (++Offset == TLD.BufferSize) { + auto EntryPtr = static_cast<char *>(InMemoryBuffer) + + (sizeof(__xray::XRayRecord) * TLD.BufferOffset); + __sanitizer::internal_memcpy(EntryPtr, &R, sizeof(R)); + if (++TLD.BufferOffset == TLD.BufferSize) { __sanitizer::SpinMutexLock L(&LogMutex); auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer), - reinterpret_cast<char *>(RecordBuffer + Offset)); - Offset = 0; + reinterpret_cast<char *>(RecordBuffer + TLD.BufferOffset)); + TLD.BufferOffset = 0; + TLD.StackEntries = 0; } - - RecusionGuard = false; } template <class RDTSC> @@ -182,21 +273,22 @@ void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer), reinterpret_cast<char *>(RecordBuffer + Offset)); Offset = 0; + TLD.StackEntries = 0; } // Then we write the "we have an argument" record. InMemoryRawLog(FuncId, Type, ReadTSC); - if (RecusionGuard) + if (RecursionGuard) return; - - RecusionGuard = true; + RecursionGuard = true; + auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; }); // And from here on write the arg payload. __xray::XRayArgPayload R; R.RecordType = RecordTypes::ARG_PAYLOAD; R.FuncId = FuncId; - R.TId = getTId(); + R.TId = TLD.TID; R.Arg = Arg1; auto EntryPtr = &reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset]; @@ -207,9 +299,8 @@ void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer), reinterpret_cast<char *>(RecordBuffer + Offset)); Offset = 0; + TLD.StackEntries = 0; } - - RecusionGuard = false; } void basicLoggingHandleArg0RealTSC(int32_t FuncId, @@ -251,34 +342,44 @@ void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, }); } +static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { + ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P); + auto ExitGuard = __sanitizer::at_scope_exit([&TLD] { + // Clean up dynamic resources. + if (TLD.InMemoryBuffer) + InternalFree(TLD.InMemoryBuffer); + if (TLD.ShadowStack) + InternalFree(TLD.ShadowStack); + if (__sanitizer::Verbosity()) + Report("Cleaned up log for TID: %d\n", TLD.TID); + }); + + if (TLD.Fd == -1 || TLD.BufferOffset == 0) { + if (__sanitizer::Verbosity()) + Report("Skipping buffer for TID: %d; Fd = %d; Offset = %llu\n", TLD.TID, + TLD.Fd, TLD.BufferOffset); + return; + } + + { + __sanitizer::SpinMutexLock L(&LogMutex); + retryingWriteAll(TLD.Fd, reinterpret_cast<char *>(TLD.InMemoryBuffer), + reinterpret_cast<char *>(TLD.InMemoryBuffer) + + (sizeof(__xray::XRayRecord) * TLD.BufferOffset)); + } + + // Because this thread's exit could be the last one trying to write to + // the file and that we're not able to close out the file properly, we + // sync instead and hope that the pending writes are flushed as the + // thread exits. + fsync(TLD.Fd); +} + XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { static bool UNUSED Once = [] { - pthread_key_create(&PThreadKey, +[](void *P) { - ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P); - if (TLD.Fd == -1 || TLD.BufferOffset == 0) - return; - - { - __sanitizer::SpinMutexLock L(&LogMutex); - retryingWriteAll( - TLD.Fd, reinterpret_cast<char *>(TLD.InMemoryBuffer), - reinterpret_cast<char *>(TLD.InMemoryBuffer + TLD.BufferOffset)); - } - - // Because this thread's exit could be the last one trying to write to - // the file and that we're not able to close out the file properly, we - // sync instead and hope that the pending writes are flushed as the - // thread exits. - fsync(TLD.Fd); - - // Clean up dynamic resources. - if (TLD.InMemoryBuffer) - InternalFree(TLD.InMemoryBuffer); - if (TLD.ShadowStack) - InternalFree(TLD.ShadowStack); - }); + pthread_key_create(&PThreadKey, TLDDestructor); return false; }(); @@ -307,6 +408,7 @@ XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax, __xray_set_handler(UseRealTSC ? basicLoggingHandleArg0RealTSC : basicLoggingHandleArg0EmulateTSC); __xray_remove_customevent_handler(); + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } @@ -355,6 +457,11 @@ bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { Options.ThreadBufferSize = flags()->xray_naive_log_thread_buffer_size; __xray_log_init(flags()->xray_naive_log_thread_buffer_size, 0, &Options, sizeof(BasicLoggingOptions)); + static auto UNUSED Once = [] { + static auto UNUSED &TLD = getThreadLocalData(); + atexit(+[] { TLDDestructor(&TLD); }); + return false; + }(); } return true; } diff --git a/compiler-rt/test/xray/TestCases/Linux/basic-filtering.cc b/compiler-rt/test/xray/TestCases/Linux/basic-filtering.cc new file mode 100644 index 00000000000..b758859cf6c --- /dev/null +++ b/compiler-rt/test/xray/TestCases/Linux/basic-filtering.cc @@ -0,0 +1,51 @@ +// Check to make sure that we are actually filtering records from the basic mode +// logging implementation. + +// RUN: %clangxx_xray -std=c++11 %s -o %t -g +// RUN: rm basic-filtering-* || true +// RUN: XRAY_OPTIONS="patch_premain=true xray_naive_log=true verbosity=1 \ +// RUN: xray_logfile_base=basic-filtering- \ +// RUN: xray_naive_log_func_duration_threshold_us=1000 \ +// RUN: xray_naive_log_max_stack_depth=2" %run %t 2>&1 | \ +// RUN: FileCheck %s +// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \ +// RUN: "`ls basic-filtering-* | head -1`" | \ +// RUN: FileCheck %s --check-prefix TRACE +// RUN: rm basic-filtering-* || true +// +// REQUIRES: x86_64-linux +// REQUIRES: built-in-llvm-tree + +#include <cstdio> +#include <time.h> + +[[clang::xray_always_instrument]] void __attribute__((noinline)) filtered() { + printf("filtered was called.\n"); +} + +[[clang::xray_always_instrument]] void __attribute__((noinline)) beyond_stack() { + printf("beyond stack was called.\n"); +} + +[[clang::xray_always_instrument]] void __attribute__((noinline)) +always_shows() { + struct timespec sleep; + sleep.tv_nsec = 2000000; + sleep.tv_sec = 0; + struct timespec rem; + while (nanosleep(&sleep, &rem) == -1) + sleep = rem; + printf("always_shows was called.\n"); + beyond_stack(); +} + +[[clang::xray_always_instrument]] int main(int argc, char *argv[]) { + filtered(); // CHECK: filtered was called. + always_shows(); // CHECK: always_shows was called. + // CHECK: beyond stack was called. +} + +// TRACE-NOT: - { type: 0, func-id: {{.*}}, function: {{.*filtered.*}}, {{.*}} } +// TRACE-NOT: - { type: 0, func-id: {{.*}}, function: {{.*beyond_stack.*}}, {{.*}} } +// TRACE-DAG: - { type: 0, func-id: [[FID:[0-9]+]], function: {{.*always_shows.*}}, cpu: {{.*}}, thread: {{.*}}, kind: function-enter, tsc: {{[0-9]+}} } +// TRACE-DAG: - { type: 0, func-id: [[FID]], function: {{.*always_shows.*}}, cpu: {{.*}}, thread: {{.*}}, kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} } |