summaryrefslogtreecommitdiffstats
path: root/compiler-rt/lib/xray
diff options
context:
space:
mode:
Diffstat (limited to 'compiler-rt/lib/xray')
-rw-r--r--compiler-rt/lib/xray/CMakeLists.txt2
-rw-r--r--compiler-rt/lib/xray/tests/unit/allocator_test.cc22
-rw-r--r--compiler-rt/lib/xray/tests/unit/profile_collector_test.cc49
-rw-r--r--compiler-rt/lib/xray/tests/unit/segmented_array_test.cc38
-rw-r--r--compiler-rt/lib/xray/xray_allocator.h22
-rw-r--r--compiler-rt/lib/xray/xray_function_call_trie.h85
-rw-r--r--compiler-rt/lib/xray/xray_profile_collector.cc225
-rw-r--r--compiler-rt/lib/xray/xray_profile_collector.h26
-rw-r--r--compiler-rt/lib/xray/xray_profiling.cc134
-rw-r--r--compiler-rt/lib/xray/xray_profiling_flags.inc5
-rw-r--r--compiler-rt/lib/xray/xray_segmented_array.h2
11 files changed, 460 insertions, 150 deletions
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
index 541e181afbd..0a86c52e620 100644
--- a/compiler-rt/lib/xray/CMakeLists.txt
+++ b/compiler-rt/lib/xray/CMakeLists.txt
@@ -2,6 +2,7 @@
# XRay runtime library implementation files.
set(XRAY_SOURCES
+ xray_buffer_queue.cc
xray_init.cc
xray_flags.cc
xray_interface.cc
@@ -11,7 +12,6 @@ set(XRAY_SOURCES
# Implementation files for all XRay modes.
set(XRAY_FDR_MODE_SOURCES
xray_fdr_flags.cc
- xray_buffer_queue.cc
xray_fdr_logging.cc)
set(XRAY_BASIC_MODE_SOURCES
diff --git a/compiler-rt/lib/xray/tests/unit/allocator_test.cc b/compiler-rt/lib/xray/tests/unit/allocator_test.cc
index 0177798b069..1170741623c 100644
--- a/compiler-rt/lib/xray/tests/unit/allocator_test.cc
+++ b/compiler-rt/lib/xray/tests/unit/allocator_test.cc
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "xray_allocator.h"
+#include "xray_buffer_queue.h"
#include "gtest/gtest.h"
namespace __xray {
@@ -56,5 +57,26 @@ TEST(AllocatorTest, AllocateBoundaries) {
ASSERT_EQ(C, Expected);
}
+TEST(AllocatorTest, AllocateFromNonOwned) {
+ bool Success = false;
+ BufferQueue BQ(GetPageSizeCached(), 10, Success);
+ ASSERT_TRUE(Success);
+ BufferQueue::Buffer B;
+ ASSERT_EQ(BQ.getBuffer(B), BufferQueue::ErrorCode::Ok);
+ {
+ Allocator<sizeof(OddSizedData)> A(B.Data, B.Size);
+
+ // Keep allocating until we hit a nullptr block.
+ unsigned C = 0;
+ auto Expected =
+ GetPageSizeCached() / RoundUpTo(sizeof(OddSizedData), kCacheLineSize);
+ for (auto B = A.Allocate(); B.Data != nullptr; B = A.Allocate(), ++C)
+ ;
+
+ ASSERT_EQ(C, Expected);
+ }
+ ASSERT_EQ(BQ.releaseBuffer(B), BufferQueue::ErrorCode::Ok);
+}
+
} // namespace
} // namespace __xray
diff --git a/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc b/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc
index f06b7027ee1..df786d46b9d 100644
--- a/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc
+++ b/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc
@@ -110,24 +110,31 @@ std::tuple<Profile, const char *> ParseProfile(const char *P) {
TEST(profileCollectorServiceTest, PostSerializeCollect) {
profilingFlags()->setDefaults();
- // The most basic use-case (the one we actually only care about) is the one
- // where we ensure that we can post FunctionCallTrie instances, which are then
- // destroyed but serialized properly.
- //
- // First, we initialise a set of allocators in the local scope. This ensures
- // that we're able to copy the contents of the FunctionCallTrie that uses
- // the local allocators.
- auto Allocators = FunctionCallTrie::InitAllocators();
+ bool Success = false;
+ BufferQueue BQ(profilingFlags()->per_thread_allocator_max,
+ profilingFlags()->buffers_max, Success);
+ ASSERT_EQ(Success, true);
+ FunctionCallTrie::Allocators::Buffers Buffers;
+ ASSERT_EQ(BQ.getBuffer(Buffers.NodeBuffer), BufferQueue::ErrorCode::Ok);
+ ASSERT_EQ(BQ.getBuffer(Buffers.RootsBuffer), BufferQueue::ErrorCode::Ok);
+ ASSERT_EQ(BQ.getBuffer(Buffers.ShadowStackBuffer),
+ BufferQueue::ErrorCode::Ok);
+ ASSERT_EQ(BQ.getBuffer(Buffers.NodeIdPairBuffer), BufferQueue::ErrorCode::Ok);
+ auto Allocators = FunctionCallTrie::InitAllocatorsFromBuffers(Buffers);
FunctionCallTrie T(Allocators);
- // Then, we populate the trie with some data.
+ // Populate the trie with some data.
T.enterFunction(1, 1, 0);
T.enterFunction(2, 2, 0);
T.exitFunction(2, 3, 0);
T.exitFunction(1, 4, 0);
+ // Reset the collector data structures.
+ profileCollectorService::reset();
+
// Then we post the data to the global profile collector service.
- profileCollectorService::post(T, 1);
+ profileCollectorService::post(&BQ, std::move(T), std::move(Allocators),
+ std::move(Buffers), 1);
// Then we serialize the data.
profileCollectorService::serialize();
@@ -174,7 +181,21 @@ TEST(profileCollectorServiceTest, PostSerializeCollect) {
// profileCollectorService. This simulates what the threads being profiled would
// be doing anyway, but through the XRay logging implementation.
void threadProcessing() {
- thread_local auto Allocators = FunctionCallTrie::InitAllocators();
+ static bool Success = false;
+ static BufferQueue BQ(profilingFlags()->per_thread_allocator_max,
+ profilingFlags()->buffers_max, Success);
+ thread_local FunctionCallTrie::Allocators::Buffers Buffers = [] {
+ FunctionCallTrie::Allocators::Buffers B;
+ BQ.getBuffer(B.NodeBuffer);
+ BQ.getBuffer(B.RootsBuffer);
+ BQ.getBuffer(B.ShadowStackBuffer);
+ BQ.getBuffer(B.NodeIdPairBuffer);
+ return B;
+ }();
+
+ thread_local auto Allocators =
+ FunctionCallTrie::InitAllocatorsFromBuffers(Buffers);
+
FunctionCallTrie T(Allocators);
T.enterFunction(1, 1, 0);
@@ -182,11 +203,15 @@ void threadProcessing() {
T.exitFunction(2, 3, 0);
T.exitFunction(1, 4, 0);
- profileCollectorService::post(T, GetTid());
+ profileCollectorService::post(&BQ, std::move(T), std::move(Allocators),
+ std::move(Buffers), GetTid());
}
TEST(profileCollectorServiceTest, PostSerializeCollectMultipleThread) {
profilingFlags()->setDefaults();
+
+ profileCollectorService::reset();
+
std::thread t1(threadProcessing);
std::thread t2(threadProcessing);
diff --git a/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc b/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc
index 73120aafc8e..46aeb88f71b 100644
--- a/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc
+++ b/compiler-rt/lib/xray/tests/unit/segmented_array_test.cc
@@ -2,6 +2,9 @@
#include "xray_segmented_array.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include <algorithm>
+#include <numeric>
+#include <vector>
namespace __xray {
namespace {
@@ -307,5 +310,40 @@ TEST(SegmentedArrayTest, PlacementNewOnAlignedStorage) {
}
}
+TEST(SegmentedArrayTest, ArrayOfPointersIteratorAccess) {
+ using PtrArray = Array<int *>;
+ PtrArray::AllocatorType Alloc(16384);
+ Array<int *> A(Alloc);
+ static constexpr size_t Count = 100;
+ std::vector<int> Integers(Count);
+ std::iota(Integers.begin(), Integers.end(), 0);
+ for (auto &I : Integers)
+ ASSERT_NE(A.Append(&I), nullptr);
+ int V = 0;
+ ASSERT_EQ(A.size(), Count);
+ for (auto P : A) {
+ ASSERT_NE(P, nullptr);
+ ASSERT_EQ(*P, V++);
+ }
+}
+
+TEST(SegmentedArrayTest, ArrayOfPointersIteratorAccessExhaustion) {
+ using PtrArray = Array<int *>;
+ PtrArray::AllocatorType Alloc(4096);
+ Array<int *> A(Alloc);
+ static constexpr size_t Count = 1000;
+ std::vector<int> Integers(Count);
+ std::iota(Integers.begin(), Integers.end(), 0);
+ for (auto &I : Integers)
+ if (A.Append(&I) == nullptr)
+ break;
+ int V = 0;
+ ASSERT_LT(A.size(), Count);
+ for (auto P : A) {
+ ASSERT_NE(P, nullptr);
+ ASSERT_EQ(*P, V++);
+ }
+}
+
} // namespace
} // namespace __xray
diff --git a/compiler-rt/lib/xray/xray_allocator.h b/compiler-rt/lib/xray/xray_allocator.h
index 2ba937b4324..907c54542a5 100644
--- a/compiler-rt/lib/xray/xray_allocator.h
+++ b/compiler-rt/lib/xray/xray_allocator.h
@@ -175,6 +175,7 @@ private:
unsigned char *BackingStore = nullptr;
unsigned char *AlignedNextBlock = nullptr;
size_t AllocatedBlocks = 0;
+ bool Owned;
SpinMutex Mutex{};
void *Alloc() XRAY_NEVER_INSTRUMENT {
@@ -209,14 +210,14 @@ private:
0);
}
- if ((AllocatedBlocks * Block::Size) >= MaxMemory)
+ if (((AllocatedBlocks + 1) * Block::Size) > MaxMemory)
return nullptr;
// Align the pointer we'd like to return to an appropriate alignment, then
// advance the pointer from where to start allocations.
void *Result = AlignedNextBlock;
- AlignedNextBlock = reinterpret_cast<unsigned char *>(
- reinterpret_cast<unsigned char *>(AlignedNextBlock) + N);
+ AlignedNextBlock =
+ reinterpret_cast<unsigned char *>(AlignedNextBlock) + Block::Size;
++AllocatedBlocks;
return Result;
}
@@ -227,6 +228,15 @@ public:
BackingStore(nullptr),
AlignedNextBlock(nullptr),
AllocatedBlocks(0),
+ Owned(true),
+ Mutex() {}
+
+ explicit Allocator(void *P, size_t M) XRAY_NEVER_INSTRUMENT
+ : MaxMemory(M),
+ BackingStore(reinterpret_cast<unsigned char *>(P)),
+ AlignedNextBlock(reinterpret_cast<unsigned char *>(P)),
+ AllocatedBlocks(0),
+ Owned(false),
Mutex() {}
Allocator(const Allocator &) = delete;
@@ -243,6 +253,8 @@ public:
O.AlignedNextBlock = nullptr;
AllocatedBlocks = O.AllocatedBlocks;
O.AllocatedBlocks = 0;
+ Owned = O.Owned;
+ O.Owned = false;
}
Allocator &operator=(Allocator &&O) XRAY_NEVER_INSTRUMENT {
@@ -258,13 +270,15 @@ public:
O.AlignedNextBlock = nullptr;
AllocatedBlocks = O.AllocatedBlocks;
O.AllocatedBlocks = 0;
+ Owned = O.Owned;
+ O.Owned = false;
return *this;
}
Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; }
~Allocator() NOEXCEPT XRAY_NEVER_INSTRUMENT {
- if (BackingStore != nullptr) {
+ if (Owned && BackingStore != nullptr) {
deallocateBuffer(BackingStore, MaxMemory);
}
}
diff --git a/compiler-rt/lib/xray/xray_function_call_trie.h b/compiler-rt/lib/xray/xray_function_call_trie.h
index d70667b5a7f..d01ad20e3d7 100644
--- a/compiler-rt/lib/xray/xray_function_call_trie.h
+++ b/compiler-rt/lib/xray/xray_function_call_trie.h
@@ -15,6 +15,7 @@
#ifndef XRAY_FUNCTION_CALL_TRIE_H
#define XRAY_FUNCTION_CALL_TRIE_H
+#include "xray_buffer_queue.h"
#include "xray_defs.h"
#include "xray_profiling_flags.h"
#include "xray_segmented_array.h"
@@ -161,6 +162,35 @@ public:
Allocators(const Allocators &) = delete;
Allocators &operator=(const Allocators &) = delete;
+ struct Buffers {
+ BufferQueue::Buffer NodeBuffer;
+ BufferQueue::Buffer RootsBuffer;
+ BufferQueue::Buffer ShadowStackBuffer;
+ BufferQueue::Buffer NodeIdPairBuffer;
+ };
+
+ explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
+ new (&NodeAllocatorStorage)
+ NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
+ NodeAllocator =
+ reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
+
+ new (&RootAllocatorStorage)
+ RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
+ RootAllocator =
+ reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
+
+ new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
+ B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
+ ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
+ &ShadowStackAllocatorStorage);
+
+ new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
+ B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
+ NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
+ &NodeIdPairAllocatorStorage);
+ }
+
explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
new (&NodeAllocatorStorage) NodeAllocatorType(Max);
NodeAllocator =
@@ -283,6 +313,12 @@ public:
return A;
}
+ static Allocators
+ InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
+ Allocators A(Bufs);
+ return A;
+ }
+
private:
NodeArray Nodes;
RootArray Roots;
@@ -323,16 +359,27 @@ public:
void enterFunction(const int32_t FId, uint64_t TSC,
uint16_t CPU) XRAY_NEVER_INSTRUMENT {
DCHECK_NE(FId, 0);
- // This function primarily deals with ensuring that the ShadowStack is
- // consistent and ready for when an exit event is encountered.
+
+ // If we're already overflowed the function call stack, do not bother
+ // attempting to record any more function entries.
+ if (UNLIKELY(OverflowedFunctions)) {
+ ++OverflowedFunctions;
+ return;
+ }
+
+ // If this is the first function we've encountered, we want to set up the
+ // node(s) and treat it as a root.
if (UNLIKELY(ShadowStack.empty())) {
- auto NewRoot = Nodes.AppendEmplace(
- nullptr, NodeIdPairArray{*NodeIdPairAllocator}, 0u, 0u, FId);
+ auto *NewRoot = Nodes.AppendEmplace(
+ nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
if (UNLIKELY(NewRoot == nullptr))
return;
- if (Roots.Append(NewRoot) == nullptr)
+ if (Roots.AppendEmplace(NewRoot) == nullptr) {
+ Nodes.trim(1);
return;
+ }
if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
+ Nodes.trim(1);
Roots.trim(1);
++OverflowedFunctions;
return;
@@ -340,13 +387,14 @@ public:
return;
}
- auto &Top = ShadowStack.back();
- auto TopNode = Top.NodePtr;
+ // From this point on, we require that the stack is not empty.
+ DCHECK(!ShadowStack.empty());
+ auto TopNode = ShadowStack.back().NodePtr;
DCHECK_NE(TopNode, nullptr);
- // If we've seen this callee before, then we just access that node and place
- // that on the top of the stack.
- auto Callee = TopNode->Callees.find_element(
+ // If we've seen this callee before, then we access that node and place that
+ // on the top of the stack.
+ auto* Callee = TopNode->Callees.find_element(
[FId](const NodeIdPair &NR) { return NR.FId == FId; });
if (Callee != nullptr) {
CHECK_NE(Callee->NodePtr, nullptr);
@@ -356,7 +404,7 @@ public:
}
// This means we've never seen this stack before, create a new node here.
- auto NewNode = Nodes.AppendEmplace(
+ auto* NewNode = Nodes.AppendEmplace(
TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
if (UNLIKELY(NewNode == nullptr))
return;
@@ -364,7 +412,6 @@ public:
TopNode->Callees.AppendEmplace(NewNode, FId);
if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
++OverflowedFunctions;
- DCHECK_NE(ShadowStack.back().NodePtr, nullptr);
return;
}
@@ -456,11 +503,13 @@ public:
if (UNLIKELY(NewRoot == nullptr))
return;
- O.Roots.Append(NewRoot);
+ if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
+ return;
// TODO: Figure out what to do if we fail to allocate any more stack
// space. Maybe warn or report once?
- DFSStack.AppendEmplace(Root, NewRoot);
+ if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
+ return;
while (!DFSStack.empty()) {
NodeAndParent NP = DFSStack.back();
DCHECK_NE(NP.Node, nullptr);
@@ -473,8 +522,12 @@ public:
Callee.FId);
if (UNLIKELY(NewNode == nullptr))
return;
- NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
- DFSStack.AppendEmplace(Callee.NodePtr, NewNode);
+ if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
+ nullptr))
+ return;
+ if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
+ nullptr))
+ return;
}
}
}
diff --git a/compiler-rt/lib/xray/xray_profile_collector.cc b/compiler-rt/lib/xray/xray_profile_collector.cc
index 2ef3ebd940c..dc3a8206984 100644
--- a/compiler-rt/lib/xray/xray_profile_collector.cc
+++ b/compiler-rt/lib/xray/xray_profile_collector.cc
@@ -57,52 +57,90 @@ struct BlockHeader {
u64 ThreadId;
};
-using ThreadTriesArray = Array<ThreadTrie>;
+struct ThreadData {
+ BufferQueue *BQ;
+ FunctionCallTrie::Allocators::Buffers Buffers;
+ FunctionCallTrie::Allocators Allocators;
+ FunctionCallTrie FCT;
+ tid_t TId;
+};
+
+using ThreadDataArray = Array<ThreadData>;
+using ThreadDataAllocator = ThreadDataArray::AllocatorType;
+
+// We use a separate buffer queue for the backing store for the allocator used
+// by the ThreadData array. This lets us host the buffers, allocators, and tries
+// associated with a thread by moving the data into the array instead of
+// attempting to copy the data to a separately backed set of tries.
+static typename std::aligned_storage<
+ sizeof(BufferQueue), alignof(BufferQueue)>::type BufferQueueStorage;
+static BufferQueue *BQ = nullptr;
+static BufferQueue::Buffer Buffer;
+static typename std::aligned_storage<sizeof(ThreadDataAllocator),
+ alignof(ThreadDataAllocator)>::type
+ ThreadDataAllocatorStorage;
+static typename std::aligned_storage<sizeof(ThreadDataArray),
+ alignof(ThreadDataArray)>::type
+ ThreadDataArrayStorage;
+
+static ThreadDataAllocator *TDAllocator = nullptr;
+static ThreadDataArray *TDArray = nullptr;
+
using ProfileBufferArray = Array<ProfileBuffer>;
-using ThreadTriesArrayAllocator = typename ThreadTriesArray::AllocatorType;
using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType;
// These need to be global aligned storage to avoid dynamic initialization. We
// need these to be aligned to allow us to placement new objects into the
// storage, and have pointers to those objects be appropriately aligned.
-static typename std::aligned_storage<sizeof(FunctionCallTrie::Allocators)>::type
- AllocatorStorage;
-static typename std::aligned_storage<sizeof(ThreadTriesArray)>::type
- ThreadTriesStorage;
static typename std::aligned_storage<sizeof(ProfileBufferArray)>::type
ProfileBuffersStorage;
-static typename std::aligned_storage<sizeof(ThreadTriesArrayAllocator)>::type
- ThreadTriesArrayAllocatorStorage;
static typename std::aligned_storage<sizeof(ProfileBufferArrayAllocator)>::type
ProfileBufferArrayAllocatorStorage;
-static ThreadTriesArray *ThreadTries = nullptr;
-static ThreadTriesArrayAllocator *ThreadTriesAllocator = nullptr;
-static ProfileBufferArray *ProfileBuffers = nullptr;
static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr;
-static FunctionCallTrie::Allocators *GlobalAllocators = nullptr;
+static ProfileBufferArray *ProfileBuffers = nullptr;
+
+// Use a global flag to determine whether the collector implementation has been
+// initialized.
+static atomic_uint8_t CollectorInitialized{0};
} // namespace
-void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT {
- static pthread_once_t Once = PTHREAD_ONCE_INIT;
- pthread_once(
- &Once, +[]() XRAY_NEVER_INSTRUMENT { reset(); });
+void post(BufferQueue *Q, FunctionCallTrie &&T,
+ FunctionCallTrie::Allocators &&A,
+ FunctionCallTrie::Allocators::Buffers &&B,
+ tid_t TId) XRAY_NEVER_INSTRUMENT {
+ DCHECK_NE(Q, nullptr);
+
+ // Bail out early if the collector has not been initialized.
+ if (!atomic_load(&CollectorInitialized, memory_order_acquire)) {
+ T.~FunctionCallTrie();
+ A.~Allocators();
+ Q->releaseBuffer(B.NodeBuffer);
+ Q->releaseBuffer(B.RootsBuffer);
+ Q->releaseBuffer(B.ShadowStackBuffer);
+ Q->releaseBuffer(B.NodeIdPairBuffer);
+ B.~Buffers();
+ return;
+ }
- ThreadTrie *Item = nullptr;
{
SpinMutexLock Lock(&GlobalMutex);
- if (GlobalAllocators == nullptr || ThreadTries == nullptr)
- return;
-
- Item = ThreadTries->Append({});
- if (Item == nullptr)
- return;
-
- Item->TId = TId;
- auto Trie = reinterpret_cast<FunctionCallTrie *>(&Item->TrieStorage);
- new (Trie) FunctionCallTrie(*GlobalAllocators);
- T.deepCopyInto(*Trie);
+ DCHECK_NE(TDAllocator, nullptr);
+ DCHECK_NE(TDArray, nullptr);
+
+ if (TDArray->AppendEmplace(Q, std::move(B), std::move(A), std::move(T),
+ TId) == nullptr) {
+ // If we fail to add the data to the array, we should destroy the objects
+ // handed us.
+ T.~FunctionCallTrie();
+ A.~Allocators();
+ Q->releaseBuffer(B.NodeBuffer);
+ Q->releaseBuffer(B.RootsBuffer);
+ Q->releaseBuffer(B.ShadowStackBuffer);
+ Q->releaseBuffer(B.NodeIdPairBuffer);
+ B.~Buffers();
+ }
}
}
@@ -133,11 +171,13 @@ populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA,
using StackAllocator = typename StackArray::AllocatorType;
StackAllocator StackAlloc(profilingFlags()->stack_allocator_max);
StackArray DFSStack(StackAlloc);
- for (const auto R : Trie.getRoots()) {
+ for (const auto *R : Trie.getRoots()) {
DFSStack.Append(R);
while (!DFSStack.empty()) {
- auto Node = DFSStack.back();
+ auto *Node = DFSStack.back();
DFSStack.trim(1);
+ if (Node == nullptr)
+ continue;
auto Record = PRs.AppendEmplace(PathArray{PA}, Node);
if (Record == nullptr)
return;
@@ -191,40 +231,54 @@ static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header,
} // namespace
void serialize() XRAY_NEVER_INSTRUMENT {
- SpinMutexLock Lock(&GlobalMutex);
-
- if (GlobalAllocators == nullptr || ThreadTries == nullptr ||
- ProfileBuffers == nullptr)
+ if (!atomic_load(&CollectorInitialized, memory_order_acquire))
return;
+ SpinMutexLock Lock(&GlobalMutex);
+
// Clear out the global ProfileBuffers, if it's not empty.
for (auto &B : *ProfileBuffers)
deallocateBuffer(reinterpret_cast<unsigned char *>(B.Data), B.Size);
ProfileBuffers->trim(ProfileBuffers->size());
- if (ThreadTries->empty())
+ DCHECK_NE(TDArray, nullptr);
+ if (TDArray->empty())
return;
// Then repopulate the global ProfileBuffers.
u32 I = 0;
- for (const auto &ThreadTrie : *ThreadTries) {
+ auto MaxSize = profilingFlags()->global_allocator_max;
+ auto ProfileArena = allocateBuffer(MaxSize);
+ if (ProfileArena == nullptr)
+ return;
+
+ auto ProfileArenaCleanup = at_scope_exit(
+ [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(ProfileArena, MaxSize); });
+
+ auto PathArena = allocateBuffer(profilingFlags()->global_allocator_max);
+ if (PathArena == nullptr)
+ return;
+
+ auto PathArenaCleanup = at_scope_exit(
+ [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(PathArena, MaxSize); });
+
+ for (const auto &ThreadTrie : *TDArray) {
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
- ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max);
+ ProfileRecordAllocator PRAlloc(ProfileArena,
+ profilingFlags()->global_allocator_max);
ProfileRecord::PathAllocator PathAlloc(
- profilingFlags()->global_allocator_max);
+ PathArena, profilingFlags()->global_allocator_max);
ProfileRecordArray ProfileRecords(PRAlloc);
// First, we want to compute the amount of space we're going to need. We'll
// use a local allocator and an __xray::Array<...> to store the intermediary
// data, then compute the size as we're going along. Then we'll allocate the
// contiguous space to contain the thread buffer data.
- const auto &Trie =
- *reinterpret_cast<const FunctionCallTrie *>(&(ThreadTrie.TrieStorage));
- if (Trie.getRoots().empty())
+ if (ThreadTrie.FCT.getRoots().empty())
continue;
- populateRecords(ProfileRecords, PathAlloc, Trie);
- DCHECK(!Trie.getRoots().empty());
+ populateRecords(ProfileRecords, PathAlloc, ThreadTrie.FCT);
+ DCHECK(!ThreadTrie.FCT.getRoots().empty());
DCHECK(!ProfileRecords.empty());
// Go through each record, to compute the sizes.
@@ -241,15 +295,16 @@ void serialize() XRAY_NEVER_INSTRUMENT {
CumulativeSizes += 20 + (4 * Record.Path.size());
BlockHeader Header{16 + CumulativeSizes, I++, ThreadTrie.TId};
- auto Buffer = ProfileBuffers->Append({});
- Buffer->Size = sizeof(Header) + CumulativeSizes;
- Buffer->Data = allocateBuffer(Buffer->Size);
- DCHECK_NE(Buffer->Data, nullptr);
- serializeRecords(Buffer, Header, ProfileRecords);
+ auto B = ProfileBuffers->Append({});
+ B->Size = sizeof(Header) + CumulativeSizes;
+ B->Data = allocateBuffer(B->Size);
+ DCHECK_NE(B->Data, nullptr);
+ serializeRecords(B, Header, ProfileRecords);
}
}
void reset() XRAY_NEVER_INSTRUMENT {
+ atomic_store(&CollectorInitialized, 0, memory_order_release);
SpinMutexLock Lock(&GlobalMutex);
if (ProfileBuffers != nullptr) {
@@ -257,46 +312,68 @@ void reset() XRAY_NEVER_INSTRUMENT {
for (auto &B : *ProfileBuffers)
deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size);
ProfileBuffers->trim(ProfileBuffers->size());
+ ProfileBuffers = nullptr;
}
- if (ThreadTries != nullptr) {
- // Clear out the function call tries per thread.
- for (auto &T : *ThreadTries) {
- auto Trie = reinterpret_cast<FunctionCallTrie *>(&T.TrieStorage);
- Trie->~FunctionCallTrie();
+ if (TDArray != nullptr) {
+ // Release the resources as required.
+ for (auto &TD : *TDArray) {
+ TD.BQ->releaseBuffer(TD.Buffers.NodeBuffer);
+ TD.BQ->releaseBuffer(TD.Buffers.RootsBuffer);
+ TD.BQ->releaseBuffer(TD.Buffers.ShadowStackBuffer);
+ TD.BQ->releaseBuffer(TD.Buffers.NodeIdPairBuffer);
}
- ThreadTries->trim(ThreadTries->size());
+ // We don't bother destroying the array here because we've already
+ // potentially freed the backing store for the array. Instead we're going to
+ // reset the pointer to nullptr, and re-use the storage later instead
+ // (placement-new'ing into the storage as-is).
+ TDArray = nullptr;
}
- // Reset the global allocators.
- if (GlobalAllocators != nullptr)
- GlobalAllocators->~Allocators();
+ if (TDAllocator != nullptr) {
+ TDAllocator->~Allocator();
+ TDAllocator = nullptr;
+ }
- GlobalAllocators =
- reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorStorage);
- new (GlobalAllocators)
- FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
+ if (Buffer.Data != nullptr) {
+ BQ->releaseBuffer(Buffer);
+ }
- if (ThreadTriesAllocator != nullptr)
- ThreadTriesAllocator->~ThreadTriesArrayAllocator();
+ if (BQ == nullptr) {
+ bool Success = false;
+ new (&BufferQueueStorage)
+ BufferQueue(profilingFlags()->global_allocator_max, 1, Success);
+ if (!Success)
+ return;
+ BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
+ } else {
+ BQ->finalize();
- ThreadTriesAllocator = reinterpret_cast<ThreadTriesArrayAllocator *>(
- &ThreadTriesArrayAllocatorStorage);
- new (ThreadTriesAllocator)
- ThreadTriesArrayAllocator(profilingFlags()->global_allocator_max);
- ThreadTries = reinterpret_cast<ThreadTriesArray *>(&ThreadTriesStorage);
- new (ThreadTries) ThreadTriesArray(*ThreadTriesAllocator);
+ if (BQ->init(profilingFlags()->global_allocator_max, 1) !=
+ BufferQueue::ErrorCode::Ok)
+ return;
+ }
- if (ProfileBuffersAllocator != nullptr)
- ProfileBuffersAllocator->~ProfileBufferArrayAllocator();
+ if (BQ->getBuffer(Buffer) != BufferQueue::ErrorCode::Ok)
+ return;
+ new (&ProfileBufferArrayAllocatorStorage)
+ ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max);
ProfileBuffersAllocator = reinterpret_cast<ProfileBufferArrayAllocator *>(
&ProfileBufferArrayAllocatorStorage);
- new (ProfileBuffersAllocator)
- ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max);
+
+ new (&ProfileBuffersStorage) ProfileBufferArray(*ProfileBuffersAllocator);
ProfileBuffers =
reinterpret_cast<ProfileBufferArray *>(&ProfileBuffersStorage);
- new (ProfileBuffers) ProfileBufferArray(*ProfileBuffersAllocator);
+
+ new (&ThreadDataAllocatorStorage)
+ ThreadDataAllocator(Buffer.Data, Buffer.Size);
+ TDAllocator =
+ reinterpret_cast<ThreadDataAllocator *>(&ThreadDataAllocatorStorage);
+ new (&ThreadDataArrayStorage) ThreadDataArray(*TDAllocator);
+ TDArray = reinterpret_cast<ThreadDataArray *>(&ThreadDataArrayStorage);
+
+ atomic_store(&CollectorInitialized, 1, memory_order_release);
}
XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT {
diff --git a/compiler-rt/lib/xray/xray_profile_collector.h b/compiler-rt/lib/xray/xray_profile_collector.h
index 335043db952..86c4ce85379 100644
--- a/compiler-rt/lib/xray/xray_profile_collector.h
+++ b/compiler-rt/lib/xray/xray_profile_collector.h
@@ -33,27 +33,13 @@ namespace profileCollectorService {
/// Posts the FunctionCallTrie associated with a specific Thread ID. This
/// will:
///
-/// - Make a copy of the FunctionCallTrie and store that against the Thread
-/// ID. This will use the global allocator for the service-managed
-/// FunctionCallTrie instances.
-/// - Queue up a pointer to the FunctionCallTrie.
-/// - If the queue is long enough (longer than some arbitrary threshold) we
-/// then pre-calculate a single FunctionCallTrie for the whole process.
+/// Moves the collection of FunctionCallTrie, Allocators, and Buffers associated
+/// with a thread's data to the queue. This takes ownership of the memory
+/// associated with a thread, and manages those exclusively.
///
-///
-/// We are making a copy of the FunctionCallTrie because the intent is to have
-/// this function be called at thread exit, or soon after the profiling
-/// handler is finalized through the XRay APIs. By letting threads each
-/// process their own thread-local FunctionCallTrie instances, we're removing
-/// the need for synchronisation across threads while we're profiling.
-/// However, once we're done profiling, we can then collect copies of these
-/// FunctionCallTrie instances and pay the cost of the copy.
-///
-/// NOTE: In the future, if this turns out to be more costly than "moving" the
-/// FunctionCallTrie instances from the owning thread to the collector
-/// service, then we can change the implementation to do it this way (moving)
-/// instead.
-void post(const FunctionCallTrie &T, tid_t TId);
+void post(BufferQueue *Q, FunctionCallTrie &&T,
+ FunctionCallTrie::Allocators &&A,
+ FunctionCallTrie::Allocators::Buffers &&B, tid_t TId);
/// The serialize will process all FunctionCallTrie instances in memory, and
/// turn those into specifically formatted blocks, each describing the
diff --git a/compiler-rt/lib/xray/xray_profiling.cc b/compiler-rt/lib/xray/xray_profiling.cc
index 6db4b6ff9a0..4323170cd1b 100644
--- a/compiler-rt/lib/xray/xray_profiling.cc
+++ b/compiler-rt/lib/xray/xray_profiling.cc
@@ -19,6 +19,7 @@
#include "sanitizer_common/sanitizer_flags.h"
#include "xray/xray_interface.h"
#include "xray/xray_log_interface.h"
+#include "xray_buffer_queue.h"
#include "xray_flags.h"
#include "xray_profile_collector.h"
#include "xray_profiling_flags.h"
@@ -46,6 +47,13 @@ struct ProfilingData {
static pthread_key_t ProfilingKey;
+// We use a global buffer queue, which gets initialized once at initialisation
+// time, and gets reset when profiling is "done".
+static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
+ BufferQueueStorage;
+static BufferQueue *BQ = nullptr;
+
+thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
alignof(FunctionCallTrie::Allocators)>::type
AllocatorsStorage;
@@ -81,17 +89,58 @@ static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
uptr Allocators = 0;
if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
memory_order_acq_rel)) {
- new (&AllocatorsStorage)
- FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
+ bool Success = false;
+ auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ atomic_store(&TLD.Allocators, 0, memory_order_release);
+ });
+
+ // Acquire a set of buffers for this thread.
+ if (BQ == nullptr)
+ return nullptr;
+
+ if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
+ BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
+ BufferQueue::ErrorCode::Ok)
+ return nullptr;
+
+ Success = true;
+ new (&AllocatorsStorage) FunctionCallTrie::Allocators(
+ FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
Allocators = reinterpret_cast<uptr>(
reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
atomic_store(&TLD.Allocators, Allocators, memory_order_release);
}
+ if (Allocators == 1)
+ return nullptr;
+
uptr FCT = 0;
if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
- new (&FunctionCallTrieStorage) FunctionCallTrie(
- *reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators));
+ new (&FunctionCallTrieStorage)
+ FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
+ atomic_load_relaxed(&TLD.Allocators)));
FCT = reinterpret_cast<uptr>(
reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
atomic_store(&TLD.FCT, FCT, memory_order_release);
@@ -104,10 +153,6 @@ static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
}
static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
- RecursionGuard TLDInit(TLDInitGuard);
- if (!TLDInit)
- return;
-
auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
&FunctionCallTrieStorage)))
@@ -125,7 +170,7 @@ static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
if (!TLDInit)
return;
- uptr P = atomic_load(&T.FCT, memory_order_acquire);
+ uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
if (P != reinterpret_cast<uptr>(
reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
return;
@@ -133,10 +178,21 @@ static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
DCHECK_NE(FCT, nullptr);
- if (!FCT->getRoots().empty())
- profileCollectorService::post(*FCT, GetTid());
+ uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
+ if (A !=
+ reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
+ return;
- cleanupTLD();
+ auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
+ DCHECK_NE(Allocators, nullptr);
+
+ // Always move the data into the profile collector.
+ profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
+ std::move(ThreadBuffers), GetTid());
+
+ // Re-initialize the ThreadBuffers object to a known "default" state.
+ ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
}
} // namespace
@@ -176,8 +232,6 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
}
- postCurrentThreadFCT(TLD);
-
// At this point, we'll create the file that will contain the profile, but
// only if the options say so.
if (!profilingFlags()->no_flush) {
@@ -205,14 +259,11 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
}
}
- // Clean up the current thread's TLD information as well.
- cleanupTLD();
-
profileCollectorService::reset();
atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
memory_order_release);
- atomic_store(&ProfilerLogStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+ atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
memory_order_release);
return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
@@ -272,6 +323,12 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
return static_cast<XRayLogInitStatus>(CurrentStatus);
}
+ // Mark then finalize the current generation of buffers. This allows us to let
+ // the threads currently holding onto new buffers still use them, but let the
+ // last reference do the memory cleanup.
+ DCHECK_NE(BQ, nullptr);
+ BQ->finalize();
+
// Wait a grace period to allow threads to see that we're finalizing.
SleepForMillis(profilingFlags()->grace_period_ms);
@@ -293,8 +350,8 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
}
XRayLogInitStatus
-profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax,
- void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+profilingLoggingInit(size_t, size_t, void *Options,
+ size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
RecursionGuard G(ReentranceGuard);
if (!G)
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
@@ -302,7 +359,7 @@ profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax,
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
- memory_order_release)) {
+ memory_order_acq_rel)) {
if (Verbosity())
Report("Cannot initialize already initialised profiling "
"implementation.\n");
@@ -331,6 +388,41 @@ profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax,
// We need to reset the profile data collection implementation now.
profileCollectorService::reset();
+ // Then also reset the buffer queue implementation.
+ if (BQ == nullptr) {
+ bool Success = false;
+ new (&BufferQueueStorage)
+ BufferQueue(profilingFlags()->per_thread_allocator_max,
+ profilingFlags()->buffers_max, Success);
+ if (!Success) {
+ if (Verbosity())
+ Report("Failed to initialize preallocated memory buffers!");
+ atomic_store(&ProfilerLogStatus,
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+
+ // If we've succeded, set the global pointer to the initialised storage.
+ BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
+ } else {
+ BQ->finalize();
+ auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
+ profilingFlags()->buffers_max);
+
+ if (InitStatus != BufferQueue::ErrorCode::Ok) {
+ if (Verbosity())
+ Report("Failed to initialize preallocated memory buffers; error: %s",
+ BufferQueue::getErrorString(InitStatus));
+ atomic_store(&ProfilerLogStatus,
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+
+ DCHECK(!BQ->finalizing());
+ }
+
// We need to set up the exit handlers.
static pthread_once_t Once = PTHREAD_ONCE_INIT;
pthread_once(
diff --git a/compiler-rt/lib/xray/xray_profiling_flags.inc b/compiler-rt/lib/xray/xray_profiling_flags.inc
index e9230ae6418..ccd70860bf6 100644
--- a/compiler-rt/lib/xray/xray_profiling_flags.inc
+++ b/compiler-rt/lib/xray/xray_profiling_flags.inc
@@ -14,7 +14,7 @@
#error "Define XRAY_FLAG prior to including this file!"
#endif
-XRAY_FLAG(uptr, per_thread_allocator_max, 2 << 20,
+XRAY_FLAG(uptr, per_thread_allocator_max, 16384,
"Maximum size of any single per-thread allocator.")
XRAY_FLAG(uptr, global_allocator_max, 2 << 24,
"Maximum size of the global allocator for profile storage.")
@@ -27,3 +27,6 @@ XRAY_FLAG(int, grace_period_ms, 1,
XRAY_FLAG(bool, no_flush, false,
"Set to true if we want the profiling implementation to not write "
"out files.")
+XRAY_FLAG(int, buffers_max, 128,
+ "The number of buffers to pre-allocate used by the profiling "
+ "implementation.")
diff --git a/compiler-rt/lib/xray/xray_segmented_array.h b/compiler-rt/lib/xray/xray_segmented_array.h
index d4feace381c..bc7e9379f63 100644
--- a/compiler-rt/lib/xray/xray_segmented_array.h
+++ b/compiler-rt/lib/xray/xray_segmented_array.h
@@ -372,7 +372,7 @@ public:
auto Base = &Tail->Data;
auto AlignedOffset = Base + (Offset * AlignedElementStorageSize);
DCHECK_LE(AlignedOffset + sizeof(T),
- reinterpret_cast<unsigned char *>(Tail) + SegmentSize);
+ reinterpret_cast<unsigned char *>(Base) + SegmentSize);
// In-place construct at Position.
new (AlignedOffset) T{std::forward<Args>(args)...};
OpenPOWER on IntegriCloud