diff options
author | Dean Michael Berris <dberris@google.com> | 2018-12-07 06:23:06 +0000 |
---|---|---|
committer | Dean Michael Berris <dberris@google.com> | 2018-12-07 06:23:06 +0000 |
commit | 25d505953a34b526d9985f2e631128069a9c22d7 (patch) | |
tree | 6e0d03fe0c2d18b21eae96c4145ef6feb1f08159 /compiler-rt/lib/xray/tests/unit/profile_collector_test.cc | |
parent | a523a211754514ef1c5c84778e38e0a8da335500 (diff) | |
download | bcm5719-llvm-25d505953a34b526d9985f2e631128069a9c22d7.tar.gz bcm5719-llvm-25d505953a34b526d9985f2e631128069a9c22d7.zip |
[XRay] Use preallocated memory for XRay profiling
Summary:
This change builds upon D54989, which removes memory allocation from the
critical path of the profiling implementation. This also changes the API
for the profile collection service, to take ownership of the memory and
associated data structures per-thread.
The consolidation of the memory allocation allows us to do two things:
- Limits the amount of memory used by the profiling implementation,
associating preallocated buffers instead of allocating memory
on-demand.
- Consolidate the memory initialisation and cleanup by relying on the
buffer queue's reference counting implementation.
We find a number of places which also display some problematic
behaviour, including:
- Off-by-factor bug in the allocator implementation.
- Unrolling semantics in cases of "memory exhausted" situations, when
managing the state of the function call trie.
We also add a few test cases which verify our understanding of the
behaviour of the system, with important edge-cases (especially for
memory-exhausted cases) in the segmented array and profile collector
unit tests.
Depends on D54989.
Reviewers: mboerger
Subscribers: dschuff, mgorny, dmgreen, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D55249
llvm-svn: 348568
Diffstat (limited to 'compiler-rt/lib/xray/tests/unit/profile_collector_test.cc')
-rw-r--r-- | compiler-rt/lib/xray/tests/unit/profile_collector_test.cc | 49 |
1 files changed, 37 insertions, 12 deletions
diff --git a/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc b/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc index f06b7027ee1..df786d46b9d 100644 --- a/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc +++ b/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc @@ -110,24 +110,31 @@ std::tuple<Profile, const char *> ParseProfile(const char *P) { TEST(profileCollectorServiceTest, PostSerializeCollect) { profilingFlags()->setDefaults(); - // The most basic use-case (the one we actually only care about) is the one - // where we ensure that we can post FunctionCallTrie instances, which are then - // destroyed but serialized properly. - // - // First, we initialise a set of allocators in the local scope. This ensures - // that we're able to copy the contents of the FunctionCallTrie that uses - // the local allocators. - auto Allocators = FunctionCallTrie::InitAllocators(); + bool Success = false; + BufferQueue BQ(profilingFlags()->per_thread_allocator_max, + profilingFlags()->buffers_max, Success); + ASSERT_EQ(Success, true); + FunctionCallTrie::Allocators::Buffers Buffers; + ASSERT_EQ(BQ.getBuffer(Buffers.NodeBuffer), BufferQueue::ErrorCode::Ok); + ASSERT_EQ(BQ.getBuffer(Buffers.RootsBuffer), BufferQueue::ErrorCode::Ok); + ASSERT_EQ(BQ.getBuffer(Buffers.ShadowStackBuffer), + BufferQueue::ErrorCode::Ok); + ASSERT_EQ(BQ.getBuffer(Buffers.NodeIdPairBuffer), BufferQueue::ErrorCode::Ok); + auto Allocators = FunctionCallTrie::InitAllocatorsFromBuffers(Buffers); FunctionCallTrie T(Allocators); - // Then, we populate the trie with some data. + // Populate the trie with some data. T.enterFunction(1, 1, 0); T.enterFunction(2, 2, 0); T.exitFunction(2, 3, 0); T.exitFunction(1, 4, 0); + // Reset the collector data structures. + profileCollectorService::reset(); + // Then we post the data to the global profile collector service. - profileCollectorService::post(T, 1); + profileCollectorService::post(&BQ, std::move(T), std::move(Allocators), + std::move(Buffers), 1); // Then we serialize the data. profileCollectorService::serialize(); @@ -174,7 +181,21 @@ TEST(profileCollectorServiceTest, PostSerializeCollect) { // profileCollectorService. This simulates what the threads being profiled would // be doing anyway, but through the XRay logging implementation. void threadProcessing() { - thread_local auto Allocators = FunctionCallTrie::InitAllocators(); + static bool Success = false; + static BufferQueue BQ(profilingFlags()->per_thread_allocator_max, + profilingFlags()->buffers_max, Success); + thread_local FunctionCallTrie::Allocators::Buffers Buffers = [] { + FunctionCallTrie::Allocators::Buffers B; + BQ.getBuffer(B.NodeBuffer); + BQ.getBuffer(B.RootsBuffer); + BQ.getBuffer(B.ShadowStackBuffer); + BQ.getBuffer(B.NodeIdPairBuffer); + return B; + }(); + + thread_local auto Allocators = + FunctionCallTrie::InitAllocatorsFromBuffers(Buffers); + FunctionCallTrie T(Allocators); T.enterFunction(1, 1, 0); @@ -182,11 +203,15 @@ void threadProcessing() { T.exitFunction(2, 3, 0); T.exitFunction(1, 4, 0); - profileCollectorService::post(T, GetTid()); + profileCollectorService::post(&BQ, std::move(T), std::move(Allocators), + std::move(Buffers), GetTid()); } TEST(profileCollectorServiceTest, PostSerializeCollectMultipleThread) { profilingFlags()->setDefaults(); + + profileCollectorService::reset(); + std::thread t1(threadProcessing); std::thread t2(threadProcessing); |