summaryrefslogtreecommitdiffstats
path: root/compiler-rt
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2017-12-05 12:21:14 +0000
committerDean Michael Berris <dberris@google.com>2017-12-05 12:21:14 +0000
commit52517d7cf70da666f98e72f926411c503f29ee50 (patch)
tree561c23d60c3b18474e73cb7985553824b9b0000d /compiler-rt
parentcec8f47e77f1cb917ba9af24b2517b985a649202 (diff)
downloadbcm5719-llvm-52517d7cf70da666f98e72f926411c503f29ee50.tar.gz
bcm5719-llvm-52517d7cf70da666f98e72f926411c503f29ee50.zip
[XRay][compiler-rt] Implement XRay Basic Mode Filtering
Summary: This change implements the basic mode filtering similar to what we do in FDR mode. The implementation is slightly simpler in basic-mode filtering because we have less details to remember, but the idea is the same. At a high level, we do the following to decide when to filter function call records: - We maintain a per-thread "shadow stack" which keeps track of the XRay instrumented functions we've encountered in a thread's execution. - We push an entry onto the stack when we enter an XRay instrumented function, and note the CPU, TSC, and type of entry (whether we have payload or not when entering). - When we encounter an exit event, we determine whether the function being exited is the same function we've entered recently, was executing in the same CPU, and the delta of the recent TSC and the recorded TSC at the top of the stack is less than the equivalent amount of microseconds we're configured to ignore -- then we un-wind the record offset an appropriate number of times (so we can overwrite the records later). We also support limiting the stack depth of the recorded functions, so that we don't arbitrarily write deep function call stacks. Reviewers: eizan, pelikan, kpw, dblaikie Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D40828 llvm-svn: 319762
Diffstat (limited to 'compiler-rt')
-rw-r--r--compiler-rt/lib/xray/xray_inmemory_log.cc215
-rw-r--r--compiler-rt/test/xray/TestCases/Linux/basic-filtering.cc51
2 files changed, 212 insertions, 54 deletions
diff --git a/compiler-rt/lib/xray/xray_inmemory_log.cc b/compiler-rt/lib/xray/xray_inmemory_log.cc
index 74789ebac1c..b70d6ecd05a 100644
--- a/compiler-rt/lib/xray/xray_inmemory_log.cc
+++ b/compiler-rt/lib/xray/xray_inmemory_log.cc
@@ -42,19 +42,25 @@ __sanitizer::SpinMutex LogMutex;
// We use elements of this type to record the entry TSC of every function ID we
// see as we're tracing a particular thread's execution.
-struct StackEntry {
+struct alignas(16) StackEntry {
int32_t FuncId;
+ uint16_t Type;
+ uint8_t CPU;
+ uint8_t Padding;
uint64_t TSC;
};
+static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry");
+
struct alignas(64) ThreadLocalData {
- XRayRecord *InMemoryBuffer = nullptr;
+ void *InMemoryBuffer = nullptr;
size_t BufferSize = 0;
size_t BufferOffset = 0;
- StackEntry *ShadowStack = nullptr;
+ void *ShadowStack = nullptr;
size_t StackSize = 0;
size_t StackEntries = 0;
int Fd = -1;
+ pid_t TID = 0;
};
static pthread_key_t PThreadKey;
@@ -63,7 +69,16 @@ static __sanitizer::atomic_uint8_t BasicInitialized{0};
BasicLoggingOptions GlobalOptions;
-thread_local volatile bool RecusionGuard = false;
+thread_local volatile bool RecursionGuard = false;
+
+static uint64_t thresholdTicks() XRAY_NEVER_INSTRUMENT {
+ static uint64_t TicksPerSec = probeRequiredCPUFeatures()
+ ? getTSCFrequency()
+ : __xray::NanosecondsPerSecond;
+ static const uint64_t ThresholdTicks =
+ TicksPerSec * GlobalOptions.DurationFilterMicros / 1000000;
+ return ThresholdTicks;
+}
static int openLogFile() XRAY_NEVER_INSTRUMENT {
int F = getLogFD();
@@ -92,11 +107,6 @@ static int openLogFile() XRAY_NEVER_INSTRUMENT {
return F;
}
-pid_t getTId() XRAY_NEVER_INSTRUMENT {
- thread_local pid_t TId = syscall(SYS_gettid);
- return TId;
-}
-
int getGlobalFd() XRAY_NEVER_INSTRUMENT {
static int Fd = openLogFile();
return Fd;
@@ -105,8 +115,12 @@ int getGlobalFd() XRAY_NEVER_INSTRUMENT {
ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
thread_local ThreadLocalData TLD;
thread_local bool UNUSED TOnce = [] {
- if (GlobalOptions.ThreadBufferSize == 0)
+ if (GlobalOptions.ThreadBufferSize == 0) {
+ if (__sanitizer::Verbosity())
+ Report("Not initializing TLD since ThreadBufferSize == 0.\n");
return false;
+ }
+ TLD.TID = __sanitizer::GetTid();
pthread_setspecific(PThreadKey, &TLD);
TLD.Fd = getGlobalFd();
TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>(
@@ -114,13 +128,26 @@ ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
nullptr, alignof(XRayRecord)));
TLD.BufferSize = GlobalOptions.ThreadBufferSize;
TLD.BufferOffset = 0;
- if (GlobalOptions.MaxStackDepth == 0)
+ if (GlobalOptions.MaxStackDepth == 0) {
+ if (__sanitizer::Verbosity())
+ Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n");
+ TLD.StackSize = 0;
+ TLD.StackEntries = 0;
+ TLD.ShadowStack = nullptr;
return false;
+ }
TLD.ShadowStack = reinterpret_cast<StackEntry *>(
InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr,
alignof(StackEntry)));
TLD.StackSize = GlobalOptions.MaxStackDepth;
TLD.StackEntries = 0;
+ if (__sanitizer::Verbosity() >= 2) {
+ static auto UNUSED Once = [] {
+ auto ticks = thresholdTicks();
+ Report("Ticks threshold: %d\n", ticks);
+ return false;
+ }();
+ }
return false;
}();
return TLD;
@@ -131,7 +158,6 @@ void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
auto &TLD = getThreadLocalData();
auto &InMemoryBuffer = TLD.InMemoryBuffer;
- auto &Offset = TLD.BufferOffset;
int Fd = getGlobalFd();
if (Fd == -1)
return;
@@ -139,27 +165,92 @@ void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
// Use a simple recursion guard, to handle cases where we're already logging
// and for one reason or another, this function gets called again in the same
// thread.
- if (RecusionGuard)
+ if (RecursionGuard)
return;
- RecusionGuard = true;
+ RecursionGuard = true;
+ auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; });
+
+ uint8_t CPU = 0;
+ uint64_t TSC = ReadTSC(CPU);
+
+ switch (Type) {
+ case XRayEntryType::ENTRY:
+ case XRayEntryType::LOG_ARGS_ENTRY: {
+ // Short circuit if we've reached the maximum depth of the stack.
+ if (TLD.StackEntries++ >= TLD.StackSize)
+ return;
+
+ // When we encounter an entry event, we keep track of the TSC and the CPU,
+ // and put it in the stack.
+ StackEntry E;
+ E.FuncId = FuncId;
+ E.CPU = CPU;
+ E.Type = Type;
+ E.TSC = TSC;
+ auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
+ (sizeof(StackEntry) * (TLD.StackEntries - 1));
+ __sanitizer::internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry));
+ break;
+ }
+ case XRayEntryType::EXIT:
+ case XRayEntryType::TAIL: {
+ if (TLD.StackEntries == 0)
+ break;
+
+ if (--TLD.StackEntries >= TLD.StackSize)
+ return;
+
+ // When we encounter an exit event, we check whether all the following are
+ // true:
+ //
+ // - The Function ID is the same as the most recent entry in the stack.
+ // - The CPU is the same as the most recent entry in the stack.
+ // - The Delta of the TSCs is less than the threshold amount of time we're
+ // looking to record.
+ //
+ // If all of these conditions are true, we pop the stack and don't write a
+ // record and move the record offset back.
+ StackEntry StackTop;
+ auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
+ (sizeof(StackEntry) * TLD.StackEntries);
+ __sanitizer::internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry));
+ if (StackTop.FuncId == FuncId && StackTop.CPU == CPU &&
+ StackTop.TSC < TSC) {
+ auto Delta = TSC - StackTop.TSC;
+ if (Delta < thresholdTicks()) {
+ assert(TLD.BufferOffset > 0);
+ TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2;
+ return;
+ }
+ }
+ break;
+ }
+ default:
+ // Should be unreachable.
+ assert(false && "Unsupported XRayEntryType encountered.");
+ break;
+ }
- // First we get the useful data, and stuff it into the already aligned buffer
- // through a pointer offset.
- auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
+ // First determine whether the delta between the function's enter record and
+ // the exit record is higher than the threshold.
+ __xray::XRayRecord R;
R.RecordType = RecordTypes::NORMAL;
- R.TSC = ReadTSC(R.CPU);
- R.TId = getTId();
+ R.CPU = CPU;
+ R.TSC = TSC;
+ R.TId = TLD.TID;
R.Type = Type;
R.FuncId = FuncId;
- if (++Offset == TLD.BufferSize) {
+ auto EntryPtr = static_cast<char *>(InMemoryBuffer) +
+ (sizeof(__xray::XRayRecord) * TLD.BufferOffset);
+ __sanitizer::internal_memcpy(EntryPtr, &R, sizeof(R));
+ if (++TLD.BufferOffset == TLD.BufferSize) {
__sanitizer::SpinMutexLock L(&LogMutex);
auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
- reinterpret_cast<char *>(RecordBuffer + Offset));
- Offset = 0;
+ reinterpret_cast<char *>(RecordBuffer + TLD.BufferOffset));
+ TLD.BufferOffset = 0;
+ TLD.StackEntries = 0;
}
-
- RecusionGuard = false;
}
template <class RDTSC>
@@ -182,21 +273,22 @@ void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1,
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
reinterpret_cast<char *>(RecordBuffer + Offset));
Offset = 0;
+ TLD.StackEntries = 0;
}
// Then we write the "we have an argument" record.
InMemoryRawLog(FuncId, Type, ReadTSC);
- if (RecusionGuard)
+ if (RecursionGuard)
return;
-
- RecusionGuard = true;
+ RecursionGuard = true;
+ auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; });
// And from here on write the arg payload.
__xray::XRayArgPayload R;
R.RecordType = RecordTypes::ARG_PAYLOAD;
R.FuncId = FuncId;
- R.TId = getTId();
+ R.TId = TLD.TID;
R.Arg = Arg1;
auto EntryPtr =
&reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset];
@@ -207,9 +299,8 @@ void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1,
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
reinterpret_cast<char *>(RecordBuffer + Offset));
Offset = 0;
+ TLD.StackEntries = 0;
}
-
- RecusionGuard = false;
}
void basicLoggingHandleArg0RealTSC(int32_t FuncId,
@@ -251,34 +342,44 @@ void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type,
});
}
+static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT {
+ ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P);
+ auto ExitGuard = __sanitizer::at_scope_exit([&TLD] {
+ // Clean up dynamic resources.
+ if (TLD.InMemoryBuffer)
+ InternalFree(TLD.InMemoryBuffer);
+ if (TLD.ShadowStack)
+ InternalFree(TLD.ShadowStack);
+ if (__sanitizer::Verbosity())
+ Report("Cleaned up log for TID: %d\n", TLD.TID);
+ });
+
+ if (TLD.Fd == -1 || TLD.BufferOffset == 0) {
+ if (__sanitizer::Verbosity())
+ Report("Skipping buffer for TID: %d; Fd = %d; Offset = %llu\n", TLD.TID,
+ TLD.Fd, TLD.BufferOffset);
+ return;
+ }
+
+ {
+ __sanitizer::SpinMutexLock L(&LogMutex);
+ retryingWriteAll(TLD.Fd, reinterpret_cast<char *>(TLD.InMemoryBuffer),
+ reinterpret_cast<char *>(TLD.InMemoryBuffer) +
+ (sizeof(__xray::XRayRecord) * TLD.BufferOffset));
+ }
+
+ // Because this thread's exit could be the last one trying to write to
+ // the file and that we're not able to close out the file properly, we
+ // sync instead and hope that the pending writes are flushed as the
+ // thread exits.
+ fsync(TLD.Fd);
+}
+
XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
void *Options,
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
static bool UNUSED Once = [] {
- pthread_key_create(&PThreadKey, +[](void *P) {
- ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P);
- if (TLD.Fd == -1 || TLD.BufferOffset == 0)
- return;
-
- {
- __sanitizer::SpinMutexLock L(&LogMutex);
- retryingWriteAll(
- TLD.Fd, reinterpret_cast<char *>(TLD.InMemoryBuffer),
- reinterpret_cast<char *>(TLD.InMemoryBuffer + TLD.BufferOffset));
- }
-
- // Because this thread's exit could be the last one trying to write to
- // the file and that we're not able to close out the file properly, we
- // sync instead and hope that the pending writes are flushed as the
- // thread exits.
- fsync(TLD.Fd);
-
- // Clean up dynamic resources.
- if (TLD.InMemoryBuffer)
- InternalFree(TLD.InMemoryBuffer);
- if (TLD.ShadowStack)
- InternalFree(TLD.ShadowStack);
- });
+ pthread_key_create(&PThreadKey, TLDDestructor);
return false;
}();
@@ -307,6 +408,7 @@ XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
__xray_set_handler(UseRealTSC ? basicLoggingHandleArg0RealTSC
: basicLoggingHandleArg0EmulateTSC);
__xray_remove_customevent_handler();
+
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
}
@@ -355,6 +457,11 @@ bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
Options.ThreadBufferSize = flags()->xray_naive_log_thread_buffer_size;
__xray_log_init(flags()->xray_naive_log_thread_buffer_size, 0, &Options,
sizeof(BasicLoggingOptions));
+ static auto UNUSED Once = [] {
+ static auto UNUSED &TLD = getThreadLocalData();
+ atexit(+[] { TLDDestructor(&TLD); });
+ return false;
+ }();
}
return true;
}
diff --git a/compiler-rt/test/xray/TestCases/Linux/basic-filtering.cc b/compiler-rt/test/xray/TestCases/Linux/basic-filtering.cc
new file mode 100644
index 00000000000..b758859cf6c
--- /dev/null
+++ b/compiler-rt/test/xray/TestCases/Linux/basic-filtering.cc
@@ -0,0 +1,51 @@
+// Check to make sure that we are actually filtering records from the basic mode
+// logging implementation.
+
+// RUN: %clangxx_xray -std=c++11 %s -o %t -g
+// RUN: rm basic-filtering-* || true
+// RUN: XRAY_OPTIONS="patch_premain=true xray_naive_log=true verbosity=1 \
+// RUN: xray_logfile_base=basic-filtering- \
+// RUN: xray_naive_log_func_duration_threshold_us=1000 \
+// RUN: xray_naive_log_max_stack_depth=2" %run %t 2>&1 | \
+// RUN: FileCheck %s
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \
+// RUN: "`ls basic-filtering-* | head -1`" | \
+// RUN: FileCheck %s --check-prefix TRACE
+// RUN: rm basic-filtering-* || true
+//
+// REQUIRES: x86_64-linux
+// REQUIRES: built-in-llvm-tree
+
+#include <cstdio>
+#include <time.h>
+
+[[clang::xray_always_instrument]] void __attribute__((noinline)) filtered() {
+ printf("filtered was called.\n");
+}
+
+[[clang::xray_always_instrument]] void __attribute__((noinline)) beyond_stack() {
+ printf("beyond stack was called.\n");
+}
+
+[[clang::xray_always_instrument]] void __attribute__((noinline))
+always_shows() {
+ struct timespec sleep;
+ sleep.tv_nsec = 2000000;
+ sleep.tv_sec = 0;
+ struct timespec rem;
+ while (nanosleep(&sleep, &rem) == -1)
+ sleep = rem;
+ printf("always_shows was called.\n");
+ beyond_stack();
+}
+
+[[clang::xray_always_instrument]] int main(int argc, char *argv[]) {
+ filtered(); // CHECK: filtered was called.
+ always_shows(); // CHECK: always_shows was called.
+ // CHECK: beyond stack was called.
+}
+
+// TRACE-NOT: - { type: 0, func-id: {{.*}}, function: {{.*filtered.*}}, {{.*}} }
+// TRACE-NOT: - { type: 0, func-id: {{.*}}, function: {{.*beyond_stack.*}}, {{.*}} }
+// TRACE-DAG: - { type: 0, func-id: [[FID:[0-9]+]], function: {{.*always_shows.*}}, cpu: {{.*}}, thread: {{.*}}, kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE-DAG: - { type: 0, func-id: [[FID]], function: {{.*always_shows.*}}, cpu: {{.*}}, thread: {{.*}}, kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} }
OpenPOWER on IntegriCloud