summaryrefslogtreecommitdiffstats
path: root/compiler-rt/lib/xray
diff options
context:
space:
mode:
Diffstat (limited to 'compiler-rt/lib/xray')
-rw-r--r--compiler-rt/lib/xray/xray_allocator.h20
-rw-r--r--compiler-rt/lib/xray/xray_buffer_queue.cc102
-rw-r--r--compiler-rt/lib/xray/xray_buffer_queue.h25
-rw-r--r--compiler-rt/lib/xray/xray_fdr_logging.cc50
4 files changed, 95 insertions, 102 deletions
diff --git a/compiler-rt/lib/xray/xray_allocator.h b/compiler-rt/lib/xray/xray_allocator.h
index 5a782739dcd..aea12809b50 100644
--- a/compiler-rt/lib/xray/xray_allocator.h
+++ b/compiler-rt/lib/xray/xray_allocator.h
@@ -50,23 +50,33 @@ template <class T> void deallocate(T *B) XRAY_NEVER_INSTRUMENT {
internal_munmap(B, sizeof(T));
}
-inline void *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
- auto B = reinterpret_cast<void *>(internal_mmap(
- NULL, S, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+template <class T = uint8_t> T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
+ auto B = reinterpret_cast<void *>(
+ internal_mmap(NULL, S * sizeof(T), PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
if (B == MAP_FAILED) {
if (Verbosity())
Report("XRay Profiling: Failed to allocate memory of size %d.\n", S);
return nullptr;
}
- return B;
+ return reinterpret_cast<T *>(B);
}
-inline void deallocateBuffer(void *B, size_t S) XRAY_NEVER_INSTRUMENT {
+template <class T> void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT {
if (B == nullptr)
return;
internal_munmap(B, S);
}
+template <class T, class... U>
+T *initArray(size_t N, U &&... Us) XRAY_NEVER_INSTRUMENT {
+ auto A = allocateBuffer<T>(N);
+ if (A != nullptr)
+ while (N > 0)
+ new (A + (--N)) T(std::forward<U>(Us)...);
+ return A;
+}
+
/// The Allocator type hands out fixed-sized chunks of memory that are
/// cache-line aligned and sized. This is useful for placement of
/// performance-sensitive data in memory that's frequently accessed. The
diff --git a/compiler-rt/lib/xray/xray_buffer_queue.cc b/compiler-rt/lib/xray/xray_buffer_queue.cc
index 5bdd9148d54..5a88ecd3399 100644
--- a/compiler-rt/lib/xray/xray_buffer_queue.cc
+++ b/compiler-rt/lib/xray/xray_buffer_queue.cc
@@ -16,68 +16,42 @@
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_libc.h"
#include "sanitizer_common/sanitizer_posix.h"
+#include "xray_allocator.h"
+#include "xray_defs.h"
#include <memory>
#include <sys/mman.h>
using namespace __xray;
using namespace __sanitizer;
-template <class T> static T *allocRaw(size_t N) {
- // TODO: Report errors?
- void *A = reinterpret_cast<void *>(
- internal_mmap(NULL, N * sizeof(T), PROT_WRITE | PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
- return (A == MAP_FAILED) ? nullptr : reinterpret_cast<T *>(A);
-}
-
-template <class T> static void deallocRaw(T *ptr, size_t N) {
- // TODO: Report errors?
- if (ptr != nullptr)
- internal_munmap(ptr, N);
-}
-
-template <class T> static T *initArray(size_t N) {
- auto A = allocRaw<T>(N);
- if (A != nullptr)
- while (N > 0)
- new (A + (--N)) T();
- return A;
-}
-
-BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
- : BufferSize(B), Buffers(initArray<BufferQueue::BufferRep>(N)),
- BufferCount(N), Finalizing{0}, OwnedBuffers(initArray<void *>(N)),
- Next(Buffers), First(Buffers), LiveBuffers(0) {
- if (Buffers == nullptr) {
+BufferQueue::BufferQueue(size_t B, size_t N,
+ bool &Success) XRAY_NEVER_INSTRUMENT
+ : BufferSize(B),
+ BufferCount(N),
+ Mutex(),
+ Finalizing{0},
+ BackingStore(allocateBuffer(B *N)),
+ Buffers(initArray<BufferQueue::BufferRep>(N)),
+ Next(Buffers),
+ First(Buffers),
+ LiveBuffers(0) {
+ if (BackingStore == nullptr) {
Success = false;
return;
}
- if (OwnedBuffers == nullptr) {
- // Clean up the buffers we've already allocated.
- for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
- B->~BufferRep();
- deallocRaw(Buffers, N);
+ if (Buffers == nullptr) {
+ deallocateBuffer(BackingStore, BufferSize * BufferCount);
Success = false;
return;
- };
+ }
for (size_t i = 0; i < N; ++i) {
auto &T = Buffers[i];
- void *Tmp = allocRaw<char>(BufferSize);
- if (Tmp == nullptr) {
- Success = false;
- return;
- }
- auto *Extents = allocRaw<BufferExtents>(1);
- if (Extents == nullptr) {
- Success = false;
- return;
- }
auto &Buf = T.Buff;
- Buf.Data = Tmp;
+ Buf.Data = reinterpret_cast<char *>(BackingStore) + (BufferSize * i);
Buf.Size = B;
- Buf.Extents = Extents;
- OwnedBuffers[i] = Tmp;
+ atomic_store(&Buf.Extents, 0, memory_order_release);
+ T.Used = false;
}
Success = true;
}
@@ -85,13 +59,17 @@ BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
if (atomic_load(&Finalizing, memory_order_acquire))
return ErrorCode::QueueFinalizing;
+
SpinMutexLock Guard(&Mutex);
if (LiveBuffers == BufferCount)
return ErrorCode::NotEnoughMemory;
auto &T = *Next;
auto &B = T.Buff;
- Buf = B;
+ auto Extents = atomic_load(&B.Extents, memory_order_acquire);
+ atomic_store(&Buf.Extents, Extents, memory_order_release);
+ Buf.Data = B.Data;
+ Buf.Size = B.Size;
T.Used = true;
++LiveBuffers;
@@ -102,15 +80,11 @@ BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
}
BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
- // Blitz through the buffers array to find the buffer.
- bool Found = false;
- for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) {
- if (*I == Buf.Data) {
- Found = true;
- break;
- }
- }
- if (!Found)
+ // Check whether the buffer being referred to is within the bounds of the
+ // backing store's range.
+ if (Buf.Data < BackingStore ||
+ Buf.Data >
+ reinterpret_cast<char *>(BackingStore) + (BufferCount * BufferSize))
return ErrorCode::UnrecognizedBuffer;
SpinMutexLock Guard(&Mutex);
@@ -121,10 +95,14 @@ BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
return ErrorCode::NotEnoughMemory;
// Now that the buffer has been released, we mark it as "used".
- First->Buff = Buf;
+ auto Extents = atomic_load(&Buf.Extents, memory_order_acquire);
+ atomic_store(&First->Buff.Extents, Extents, memory_order_release);
+ First->Buff.Data = Buf.Data;
+ First->Buff.Size = Buf.Size;
First->Used = true;
Buf.Data = nullptr;
Buf.Size = 0;
+ atomic_store(&Buf.Extents, 0, memory_order_release);
--LiveBuffers;
if (++First == (Buffers + BufferCount))
First = Buffers;
@@ -139,14 +117,8 @@ BufferQueue::ErrorCode BufferQueue::finalize() {
}
BufferQueue::~BufferQueue() {
- for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
- auto &T = *I;
- auto &Buf = T.Buff;
- deallocRaw(Buf.Data, Buf.Size);
- deallocRaw(Buf.Extents, 1);
- }
for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
B->~BufferRep();
- deallocRaw(Buffers, BufferCount);
- deallocRaw(OwnedBuffers, BufferCount);
+ deallocateBuffer(Buffers, BufferCount);
+ deallocateBuffer(BackingStore, BufferSize * BufferCount);
}
diff --git a/compiler-rt/lib/xray/xray_buffer_queue.h b/compiler-rt/lib/xray/xray_buffer_queue.h
index e76fa7983c9..bfb091e3f83 100644
--- a/compiler-rt/lib/xray/xray_buffer_queue.h
+++ b/compiler-rt/lib/xray/xray_buffer_queue.h
@@ -18,6 +18,7 @@
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_mutex.h"
+#include "xray_defs.h"
#include <cstddef>
namespace __xray {
@@ -29,14 +30,10 @@ namespace __xray {
/// trace collection.
class BufferQueue {
public:
- struct alignas(64) BufferExtents {
- atomic_uint64_t Size;
- };
-
struct Buffer {
+ atomic_uint64_t Extents{0};
void *Data = nullptr;
size_t Size = 0;
- BufferExtents *Extents;
};
struct BufferRep {
@@ -76,8 +73,10 @@ private:
T *operator->() const { return &(Buffers[Offset].Buff); }
- Iterator(BufferRep *Root, size_t O, size_t M)
- : Buffers(Root), Offset(O), Max(M) {
+ Iterator(BufferRep *Root, size_t O, size_t M) XRAY_NEVER_INSTRUMENT
+ : Buffers(Root),
+ Offset(O),
+ Max(M) {
// We want to advance to the first Offset where the 'Used' property is
// true, or to the end of the list/queue.
while (!Buffers[Offset].Used && Offset != Max) {
@@ -107,16 +106,18 @@ private:
// Size of each individual Buffer.
size_t BufferSize;
- BufferRep *Buffers;
-
// Amount of pre-allocated buffers.
size_t BufferCount;
SpinMutex Mutex;
atomic_uint8_t Finalizing;
- // Pointers to buffers managed/owned by the BufferQueue.
- void **OwnedBuffers;
+ // A pointer to a contiguous block of memory to serve as the backing store for
+ // all the individual buffers handed out.
+ void *BackingStore;
+
+ // A dynamically allocated array of BufferRep instances.
+ BufferRep *Buffers;
// Pointer to the next buffer to be handed out.
BufferRep *Next;
@@ -198,7 +199,7 @@ public:
/// Applies the provided function F to each Buffer in the queue, only if the
/// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
/// releaseBuffer(...) operation).
- template <class F> void apply(F Fn) {
+ template <class F> void apply(F Fn) XRAY_NEVER_INSTRUMENT {
SpinMutexLock G(&Mutex);
for (auto I = begin(), E = end(); I != E; ++I)
Fn(*I);
diff --git a/compiler-rt/lib/xray/xray_fdr_logging.cc b/compiler-rt/lib/xray/xray_fdr_logging.cc
index 4b308b27fc4..131c137b677 100644
--- a/compiler-rt/lib/xray/xray_fdr_logging.cc
+++ b/compiler-rt/lib/xray/xray_fdr_logging.cc
@@ -30,6 +30,7 @@
#include "sanitizer_common/sanitizer_common.h"
#include "xray/xray_interface.h"
#include "xray/xray_records.h"
+#include "xray_allocator.h"
#include "xray_buffer_queue.h"
#include "xray_defs.h"
#include "xray_fdr_flags.h"
@@ -47,7 +48,7 @@ atomic_sint32_t LoggingStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
// force the alignment to 64-bytes for x86 cache line alignment, as this
// structure is used in the hot path of implementation.
struct alignas(64) ThreadLocalData {
- BufferQueue::Buffer Buffer;
+ BufferQueue::Buffer Buffer{};
char *RecordPtr = nullptr;
// The number of FunctionEntry records immediately preceding RecordPtr.
uint8_t NumConsecutiveFnEnters = 0;
@@ -81,6 +82,7 @@ static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
static pthread_key_t Key;
// Global BufferQueue.
+static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage;
static BufferQueue *BQ = nullptr;
static atomic_sint32_t LogFlushStatus = {
@@ -184,11 +186,10 @@ static void writeNewBufferPreamble(tid_t Tid, timespec TS,
TLD.NumTailCalls = 0;
if (TLD.BQ == nullptr || TLD.BQ->finalizing())
return;
- internal_memcpy(TLD.RecordPtr, Metadata, sizeof(Metadata));
- TLD.RecordPtr += sizeof(Metadata);
- // Since we write out the extents as the first metadata record of the
- // buffer, we need to write out the extents including the extents record.
- atomic_store(&TLD.Buffer.Extents->Size, sizeof(Metadata),
+ internal_memcpy(TLD.RecordPtr, Metadata,
+ sizeof(MetadataRecord) * InitRecordsCount);
+ TLD.RecordPtr += sizeof(MetadataRecord) * InitRecordsCount;
+ atomic_store(&TLD.Buffer.Extents, sizeof(MetadataRecord) * InitRecordsCount,
memory_order_release);
}
@@ -209,12 +210,12 @@ static void setupNewBuffer(int (*wall_clock_reader)(
static void incrementExtents(size_t Add) {
auto &TLD = getThreadLocalData();
- atomic_fetch_add(&TLD.Buffer.Extents->Size, Add, memory_order_acq_rel);
+ atomic_fetch_add(&TLD.Buffer.Extents, Add, memory_order_acq_rel);
}
static void decrementExtents(size_t Subtract) {
auto &TLD = getThreadLocalData();
- atomic_fetch_sub(&TLD.Buffer.Extents->Size, Subtract, memory_order_acq_rel);
+ atomic_fetch_sub(&TLD.Buffer.Extents, Subtract, memory_order_acq_rel);
}
static void writeNewCPUIdMetadata(uint16_t CPU,
@@ -583,7 +584,7 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
auto &TLD = getThreadLocalData();
- if (TLD.BQ == nullptr)
+ if (TLD.BQ == nullptr && BQ != nullptr)
TLD.BQ = BQ;
if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, wall_clock_reader))
@@ -758,6 +759,7 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
static BufferQueue::const_iterator It{};
static BufferQueue::const_iterator End{};
static void *CurrentBuffer{nullptr};
+ static size_t SerializedBufferSize = 0;
if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
// From this point on, we provide raw access to the raw buffer we're getting
// from the BufferQueue. We're relying on the iterators from the current
@@ -767,7 +769,7 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
}
if (CurrentBuffer != nullptr) {
- InternalFree(CurrentBuffer);
+ deallocateBuffer(CurrentBuffer, SerializedBufferSize);
CurrentBuffer = nullptr;
}
@@ -778,9 +780,9 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
// out to disk. The difference here would be that we still write "empty"
// buffers, or at least go through the iterators faithfully to let the
// handlers see the empty buffers in the queue.
- auto BufferSize = atomic_load(&It->Extents->Size, memory_order_acquire);
- auto SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
- CurrentBuffer = InternalAlloc(SerializedBufferSize);
+ auto BufferSize = atomic_load(&It->Extents, memory_order_acquire);
+ SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
+ CurrentBuffer = allocateBuffer(SerializedBufferSize);
if (CurrentBuffer == nullptr)
return {nullptr, 0};
@@ -848,7 +850,6 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
releaseThreadLocalBuffer(*TLD.BQ);
BQ->~BufferQueue();
- InternalFree(BQ);
BQ = nullptr;
}
});
@@ -883,6 +884,13 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
reinterpret_cast<char *>(&Header) + sizeof(Header));
+ // Release the current thread's buffer before we attempt to write out all the
+ // buffers. This ensures that in case we had only a single thread going, that
+ // we are able to capture the data nonetheless.
+ auto &TLD = getThreadLocalData();
+ if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
+ releaseThreadLocalBuffer(*TLD.BQ);
+
BQ->apply([&](const BufferQueue::Buffer &B) {
// Starting at version 2 of the FDR logging implementation, we only write
// the records identified by the extents of the buffer. We use the Extents
@@ -890,7 +898,7 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
// still use a Metadata record, but fill in the extents instead for the
// data.
MetadataRecord ExtentsRecord;
- auto BufferExtents = atomic_load(&B.Extents->Size, memory_order_acquire);
+ auto BufferExtents = atomic_load(&B.Extents, memory_order_acquire);
DCHECK(BufferExtents <= B.Size);
ExtentsRecord.Type = uint8_t(RecordType::Metadata);
ExtentsRecord.RecordKind =
@@ -922,7 +930,12 @@ XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
// Do special things to make the log finalize itself, and not allow any more
// operations to be performed until re-initialized.
- BQ->finalize();
+ if (BQ == nullptr) {
+ if (Verbosity())
+ Report("Attempting to finalize an uninitialized global buffer!\n");
+ } else {
+ BQ->finalize();
+ }
atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
memory_order_release);
@@ -1132,13 +1145,11 @@ XRayLogInitStatus fdrLoggingInit(UNUSED size_t BufferSize,
if (BQ != nullptr) {
BQ->~BufferQueue();
- InternalFree(BQ);
BQ = nullptr;
}
if (BQ == nullptr) {
- BQ = reinterpret_cast<BufferQueue *>(
- InternalAlloc(sizeof(BufferQueue), nullptr, 64));
+ BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
new (BQ) BufferQueue(BufferSize, BufferMax, Success);
}
@@ -1146,7 +1157,6 @@ XRayLogInitStatus fdrLoggingInit(UNUSED size_t BufferSize,
Report("BufferQueue init failed.\n");
if (BQ != nullptr) {
BQ->~BufferQueue();
- InternalFree(BQ);
BQ = nullptr;
}
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
OpenPOWER on IntegriCloud