diff options
author | Dean Michael Berris <dberris@google.com> | 2018-07-13 04:04:18 +0000 |
---|---|---|
committer | Dean Michael Berris <dberris@google.com> | 2018-07-13 04:04:18 +0000 |
commit | 5d92d3e5be7e4a25e20cc77194723da0a2740eb8 (patch) | |
tree | b432896ded6bec57c08ef59a1ea60aa5e44a5490 /compiler-rt | |
parent | 00712cb749b47ef1e1c71b06958101743f8f3408 (diff) | |
download | bcm5719-llvm-5d92d3e5be7e4a25e20cc77194723da0a2740eb8.tar.gz bcm5719-llvm-5d92d3e5be7e4a25e20cc77194723da0a2740eb8.zip |
[XRay][compiler-rt] Profiling Mode: Flush logs on exit
Summary:
This change adds support for writing out profiles at program exit.
Depends on D48653.
Reviewers: kpw, eizan
Reviewed By: kpw
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D48956
llvm-svn: 336969
Diffstat (limited to 'compiler-rt')
5 files changed, 81 insertions, 36 deletions
diff --git a/compiler-rt/lib/xray/xray_profile_collector.cc b/compiler-rt/lib/xray/xray_profile_collector.cc index 5da8073f5c2..97264919335 100644 --- a/compiler-rt/lib/xray/xray_profile_collector.cc +++ b/compiler-rt/lib/xray/xray_profile_collector.cc @@ -30,13 +30,11 @@ struct ThreadTrie { tid_t TId; FunctionCallTrie *Trie; }; -Vector<ThreadTrie> ThreadTries; struct ProfileBuffer { void *Data; size_t Size; }; -Vector<ProfileBuffer> ProfileBuffers; struct BlockHeader { u32 BlockSize; @@ -44,6 +42,10 @@ struct BlockHeader { u64 ThreadId; }; +// These need to be pointers that point to heap/internal-allocator-allocated +// objects because these are accessed even at program exit. +Vector<ThreadTrie> *ThreadTries = nullptr; +Vector<ProfileBuffer> *ProfileBuffers = nullptr; FunctionCallTrie::Allocators *GlobalAllocators = nullptr; } // namespace @@ -57,8 +59,16 @@ void post(const FunctionCallTrie &T, tid_t TId) { new (GlobalAllocators) FunctionCallTrie::Allocators(); *GlobalAllocators = FunctionCallTrie::InitAllocatorsCustom( profilingFlags()->global_allocator_max); + ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>( + InternalAlloc(sizeof(Vector<ThreadTrie>))); + new (ThreadTries) Vector<ThreadTrie>(); + ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>( + InternalAlloc(sizeof(Vector<ProfileBuffer>))); + new (ProfileBuffers) Vector<ProfileBuffer>(); }); DCHECK_NE(GlobalAllocators, nullptr); + DCHECK_NE(ThreadTries, nullptr); + DCHECK_NE(ProfileBuffers, nullptr); ThreadTrie *Item = nullptr; { @@ -66,7 +76,7 @@ void post(const FunctionCallTrie &T, tid_t TId) { if (GlobalAllocators == nullptr) return; - Item = ThreadTries.PushBack(); + Item = ThreadTries->PushBack(); Item->TId = TId; // Here we're using the internal allocator instead of the managed allocator @@ -188,15 +198,15 @@ void serialize() { SpinMutexLock Lock(&GlobalMutex); // Clear out the global ProfileBuffers. - for (uptr I = 0; I < ProfileBuffers.Size(); ++I) - InternalFree(ProfileBuffers[I].Data); - ProfileBuffers.Reset(); + for (uptr I = 0; I < ProfileBuffers->Size(); ++I) + InternalFree((*ProfileBuffers)[I].Data); + ProfileBuffers->Reset(); - if (ThreadTries.Size() == 0) + if (ThreadTries->Size() == 0) return; // Then repopulate the global ProfileBuffers. - for (u32 I = 0; I < ThreadTries.Size(); ++I) { + for (u32 I = 0; I < ThreadTries->Size(); ++I) { using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0); ProfileRecord::PathAllocator PathAlloc( @@ -207,7 +217,7 @@ void serialize() { // use a local allocator and an __xray::Array<...> to store the intermediary // data, then compute the size as we're going along. Then we'll allocate the // contiguous space to contain the thread buffer data. - const auto &Trie = *ThreadTries[I].Trie; + const auto &Trie = *(*ThreadTries)[I].Trie; if (Trie.getRoots().empty()) continue; populateRecords(ProfileRecords, PathAlloc, Trie); @@ -227,8 +237,8 @@ void serialize() { for (const auto &Record : ProfileRecords) CumulativeSizes += 20 + (4 * Record.Path->size()); - BlockHeader Header{16 + CumulativeSizes, I, ThreadTries[I].TId}; - auto Buffer = ProfileBuffers.PushBack(); + BlockHeader Header{16 + CumulativeSizes, I, (*ThreadTries)[I].TId}; + auto Buffer = ProfileBuffers->PushBack(); Buffer->Size = sizeof(Header) + CumulativeSizes; Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64); DCHECK_NE(Buffer->Data, nullptr); @@ -244,18 +254,26 @@ void serialize() { void reset() { SpinMutexLock Lock(&GlobalMutex); - // Clear out the profile buffers that have been serialized. - for (uptr I = 0; I < ProfileBuffers.Size(); ++I) - InternalFree(ProfileBuffers[I].Data); - ProfileBuffers.Reset(); - - // Clear out the function call tries per thread. - for (uptr I = 0; I < ThreadTries.Size(); ++I) { - auto &T = ThreadTries[I]; - T.Trie->~FunctionCallTrie(); - InternalFree(T.Trie); + if (ProfileBuffers != nullptr) { + // Clear out the profile buffers that have been serialized. + for (uptr I = 0; I < ProfileBuffers->Size(); ++I) + InternalFree((*ProfileBuffers)[I].Data); + ProfileBuffers->Reset(); + InternalFree(ProfileBuffers); + ProfileBuffers = nullptr; + } + + if (ThreadTries != nullptr) { + // Clear out the function call tries per thread. + for (uptr I = 0; I < ThreadTries->Size(); ++I) { + auto &T = (*ThreadTries)[I]; + T.Trie->~FunctionCallTrie(); + InternalFree(T.Trie); + } + ThreadTries->Reset(); + InternalFree(ThreadTries); + ThreadTries = nullptr; } - ThreadTries.Reset(); // Reset the global allocators. if (GlobalAllocators != nullptr) { @@ -267,18 +285,29 @@ void reset() { InternalAlloc(sizeof(FunctionCallTrie::Allocators))); new (GlobalAllocators) FunctionCallTrie::Allocators(); *GlobalAllocators = FunctionCallTrie::InitAllocators(); + ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>( + InternalAlloc(sizeof(Vector<ThreadTrie>))); + new (ThreadTries) Vector<ThreadTrie>(); + ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>( + InternalAlloc(sizeof(Vector<ProfileBuffer>))); + new (ProfileBuffers) Vector<ProfileBuffer>(); } XRayBuffer nextBuffer(XRayBuffer B) { SpinMutexLock Lock(&GlobalMutex); - if (B.Data == nullptr && ProfileBuffers.Size()) - return {ProfileBuffers[0].Data, ProfileBuffers[0].Size}; + + if (ProfileBuffers == nullptr || ProfileBuffers->Size() == 0) + return {nullptr, 0}; + + if (B.Data == nullptr) + return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size}; BlockHeader Header; internal_memcpy(&Header, B.Data, sizeof(BlockHeader)); auto NextBlock = Header.BlockNum + 1; - if (NextBlock < ProfileBuffers.Size()) - return {ProfileBuffers[NextBlock].Data, ProfileBuffers[NextBlock].Size}; + if (NextBlock < ProfileBuffers->Size()) + return {(*ProfileBuffers)[NextBlock].Data, + (*ProfileBuffers)[NextBlock].Size}; return {nullptr, 0}; } diff --git a/compiler-rt/lib/xray/xray_profiling.cc b/compiler-rt/lib/xray/xray_profiling.cc index fa60263c2b3..2c5b8295993 100644 --- a/compiler-rt/lib/xray/xray_profiling.cc +++ b/compiler-rt/lib/xray/xray_profiling.cc @@ -277,7 +277,7 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, // We need to reset the profile data collection implementation now. profileCollectorService::reset(); - // We need to set up the at-thread-exit handler. + // We need to set up the exit handlers. static pthread_once_t Once = PTHREAD_ONCE_INIT; pthread_once(&Once, +[] { pthread_key_create(&ProfilingKey, +[](void *P) { @@ -288,6 +288,19 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, postCurrentThreadFCT(TLD); }); + + // We also need to set up an exit handler, so that we can get the profile + // information at exit time. We use the C API to do this, to not rely on C++ + // ABI functions for registering exit handlers. + Atexit(+[] { + // Finalize and flush. + if (profilingFinalize() != XRAY_LOG_FINALIZED) + return; + if (profilingFlush() != XRAY_LOG_FLUSHED) + return; + if (Verbosity()) + Report("XRay Profile flushed at exit."); + }); }); __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); @@ -321,13 +334,16 @@ bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { profilingFlush, }; auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); - if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && - Verbosity()) - Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " - "%d\n", - RegistrationResult); + if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { + if (Verbosity()) + Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " + "%d\n", + RegistrationResult); + return false; + } + if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) - __xray_set_log_impl(Impl); + __xray_log_select_mode("xray_profiling"); return true; } diff --git a/compiler-rt/lib/xray/xray_profiling_flags.inc b/compiler-rt/lib/xray/xray_profiling_flags.inc index 616bc83adf4..04ccd459d8a 100644 --- a/compiler-rt/lib/xray/xray_profiling_flags.inc +++ b/compiler-rt/lib/xray/xray_profiling_flags.inc @@ -20,7 +20,7 @@ XRAY_FLAG(uptr, global_allocator_max, 2 << 24, "Maximum size of the global allocator for profile storage.") XRAY_FLAG(uptr, stack_allocator_max, 2 << 24, "Maximum size of the traversal stack allocator.") -XRAY_FLAG(int, grace_period_ms, 100, +XRAY_FLAG(int, grace_period_ms, 1, "Profile collection will wait this much time in milliseconds before " "resetting the global state. This gives a chance to threads to " "notice that the profiler has been finalized and clean up.") diff --git a/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cc b/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cc index 7ccad1bac1f..f4caadb2808 100644 --- a/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cc +++ b/compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cc @@ -8,7 +8,7 @@ // RUN: XRAY_PROFILING_OPTIONS=no_flush=1 %run %t // RUN: XRAY_OPTIONS=verbosity=1 %run %t // RUN: PROFILES=`ls xray-log.profiling-multi-* | wc -l` -// RUN: [ $PROFILES -eq 1 ] +// RUN: [ $PROFILES -ge 1 ] // RUN: rm -f xray-log.profiling-multi-* // // REQUIRES: x86_64-target-arch diff --git a/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cc b/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cc index fd508b1acd1..32002c69921 100644 --- a/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cc +++ b/compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cc @@ -8,7 +8,7 @@ // RUN: XRAY_PROFILING_OPTIONS=no_flush=true %run %t // RUN: XRAY_OPTIONS=verbosity=1 %run %t // RUN: PROFILES=`ls xray-log.profiling-single-* | wc -l` -// RUN: [ $PROFILES -eq 2 ] +// RUN: [ $PROFILES -ge 2 ] // RUN: rm -f xray-log.profiling-single-* // // REQUIRES: x86_64-target-arch |