1 files changed, 285 insertions, 0 deletions
diff --git a/compiler-rt/lib/xray/xray_profile_collector.cc b/compiler-rt/lib/xray/xray_profile_collector.cc
new file mode 100644
index 00000000000..28eb096f48e
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_profile_collector.cc
@@ -0,0 +1,285 @@
+//===-- xray_profile_collector.cc ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the interface for the profileCollectorService.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_profile_collector.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_vector.h"
+#include "xray_profiler_flags.h"
+#include <memory>
+#include <utility>
+
+namespace __xray {
+namespace profileCollectorService {
+
+namespace {
+
+SpinMutex GlobalMutex;
+struct ThreadTrie {
+  tid_t TId;
+  FunctionCallTrie *Trie;
+};
+Vector<ThreadTrie> ThreadTries;
+
+struct ProfileBuffer {
+  void *Data;
+  size_t Size;
+};
+Vector<ProfileBuffer> ProfileBuffers;
+
+struct BlockHeader {
+  u32 BlockSize;
+  u32 BlockNum;
+  u64 ThreadId;
+};
+
+FunctionCallTrie::Allocators *GlobalAllocators = nullptr;
+
+} // namespace
+
+void post(const FunctionCallTrie &T, tid_t TId) {
+  static pthread_once_t Once = PTHREAD_ONCE_INIT;
+  pthread_once(&Once, +[] {
+    SpinMutexLock Lock(&GlobalMutex);
+    GlobalAllocators = reinterpret_cast<FunctionCallTrie::Allocators *>(
+        InternalAlloc(sizeof(FunctionCallTrie::Allocators)));
+    new (GlobalAllocators) FunctionCallTrie::Allocators();
+    *GlobalAllocators = FunctionCallTrie::InitAllocators();
+  });
+  DCHECK_NE(GlobalAllocators, nullptr);
+
+  ThreadTrie *Item = nullptr;
+  {
+    SpinMutexLock Lock(&GlobalMutex);
+    if (GlobalAllocators == nullptr)
+      return;
+
+    Item = ThreadTries.PushBack();
+    Item->TId = TId;
+
+    // Here we're using the internal allocator instead of the managed allocator
+    // because:
+    //
+    // 1) We're not using the segmented array data structure to host
+    //    FunctionCallTrie objects. We're using a Vector (from sanitizer_common)
+    //    which works like a std::vector<...> keeping elements contiguous in
+    //    memory. The segmented array data structure assumes that elements are
+    //    trivially destructible, where FunctionCallTrie isn't.
+    //
+    // 2) Using a managed allocator means we need to manage that separately,
+    //    which complicates the nature of this code. To get around that, we're
+    //    using the internal allocator instead, which has its own global state
+    //    and is decoupled from the lifetime management required by the managed
+    //    allocator we have in XRay.
+    //
+    Item->Trie = reinterpret_cast<FunctionCallTrie *>(
+        InternalAlloc(sizeof(FunctionCallTrie)));
+    DCHECK_NE(Item->Trie, nullptr);
+    new (Item->Trie) FunctionCallTrie(*GlobalAllocators);
+  }
+  DCHECK_NE(Item, nullptr);
+
+  T.deepCopyInto(*Item->Trie);
+}
+
+// A PathArray represents the function id's representing a stack trace. In this
+// context a path is almost always represented from the leaf function in a call
+// stack to a root of the call trie.
+using PathArray = Array<int32_t>;
+
+struct ProfileRecord {
+  using PathAllocator = typename PathArray::AllocatorType;
+
+  // The Path in this record is the function id's from the leaf to the root of
+  // the function call stack as represented from a FunctionCallTrie.
+  PathArray *Path = nullptr;
+  const FunctionCallTrie::Node *Node = nullptr;
+
+  // Constructor for in-place construction.
+  ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N)
+      : Path([&] {
+          auto P =
+              reinterpret_cast<PathArray *>(InternalAlloc(sizeof(PathArray)));
+          new (P) PathArray(A);
+          return P;
+        }()),
+        Node(N) {}
+};
+
+namespace {
+
+using ProfileRecordArray = Array<ProfileRecord>;
+
+// Walk a depth-first traversal of each root of the FunctionCallTrie to generate
+// the path(s) and the data associated with the path.
+static void populateRecords(ProfileRecordArray &PRs,
+                            ProfileRecord::PathAllocator &PA,
+                            const FunctionCallTrie &Trie) {
+  using StackArray = Array<const FunctionCallTrie::Node *>;
+  using StackAllocator = typename StackArray::AllocatorType;
+  StackAllocator StackAlloc(profilerFlags()->stack_allocator_max, 0);
+  StackArray DFSStack(StackAlloc);
+  for (const auto R : Trie.getRoots()) {
+    DFSStack.Append(R);
+    while (!DFSStack.empty()) {
+      auto Node = DFSStack.back();
+      DFSStack.trim(1);
+      auto Record = PRs.AppendEmplace(PA, Node);
+      DCHECK_NE(Record, nullptr);
+
+      // Traverse the Node's parents and as we're doing so, get the FIds in
+      // the order they appear.
+      for (auto N = Node; N != nullptr; N = N->Parent)
+        Record->Path->Append(N->FId);
+      DCHECK(!Record->Path->empty());
+
+      for (const auto C : Node->Callees)
+        DFSStack.Append(C.NodePtr);
+    }
+  }
+}
+
+static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header,
+                             const ProfileRecordArray &ProfileRecords) {
+  auto NextPtr = static_cast<char *>(
+                     internal_memcpy(Buffer->Data, &Header, sizeof(Header))) +
+                 sizeof(Header);
+  for (const auto &Record : ProfileRecords) {
+    // List of IDs follow:
+    for (const auto FId : *Record.Path)
+      NextPtr =
+          static_cast<char *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) +
+          sizeof(FId);
+
+    // Add the sentinel here.
+    constexpr int32_t SentinelFId = 0;
+    NextPtr = static_cast<char *>(
+                  internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) +
+              sizeof(SentinelFId);
+
+    // Add the node data here.
+    NextPtr =
+        static_cast<char *>(internal_memcpy(NextPtr, &Record.Node->CallCount,
+                                            sizeof(Record.Node->CallCount))) +
+        sizeof(Record.Node->CallCount);
+    NextPtr = static_cast<char *>(
+                  internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime,
+                                  sizeof(Record.Node->CumulativeLocalTime))) +
+              sizeof(Record.Node->CumulativeLocalTime);
+  }
+
+  DCHECK_EQ(NextPtr - static_cast<char *>(Buffer->Data), Buffer->Size);
+}
+
+} // namespace
+
+void serialize() {
+  SpinMutexLock Lock(&GlobalMutex);
+
+  // Clear out the global ProfileBuffers.
+  for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
+    InternalFree(ProfileBuffers[I].Data);
+  ProfileBuffers.Reset();
+
+  if (ThreadTries.Size() == 0)
+    return;
+
+  // Then repopulate the global ProfileBuffers.
+  for (u32 I = 0; I < ThreadTries.Size(); ++I) {
+    using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
+    ProfileRecordAllocator PRAlloc(profilerFlags()->global_allocator_max, 0);
+    ProfileRecord::PathAllocator PathAlloc(
+        profilerFlags()->global_allocator_max, 0);
+    ProfileRecordArray ProfileRecords(PRAlloc);
+
+    // First, we want to compute the amount of space we're going to need. We'll
+    // use a local allocator and an __xray::Array<...> to store the intermediary
+    // data, then compute the size as we're going along. Then we'll allocate the
+    // contiguous space to contain the thread buffer data.
+    const auto &Trie = *ThreadTries[I].Trie;
+    if (Trie.getRoots().empty())
+      continue;
+    populateRecords(ProfileRecords, PathAlloc, Trie);
+    DCHECK(!Trie.getRoots().empty());
+    DCHECK(!ProfileRecords.empty());
+
+    // Go through each record, to compute the sizes.
+    //
+    // header size = block size (4 bytes)
+    //   + block number (4 bytes)
+    //   + thread id (8 bytes)
+    // record size = path ids (4 bytes * number of ids + sentinel 4 bytes)
+    //   + call count (8 bytes)
+    //   + local time (8 bytes)
+    //   + end of record (8 bytes)
+    u32 CumulativeSizes = 0;
+    for (const auto &Record : ProfileRecords)
+      CumulativeSizes += 20 + (4 * Record.Path->size());
+
+    BlockHeader Header{16 + CumulativeSizes, I, ThreadTries[I].TId};
+    auto Buffer = ProfileBuffers.PushBack();
+    Buffer->Size = sizeof(Header) + CumulativeSizes;
+    Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64);
+    DCHECK_NE(Buffer->Data, nullptr);
+    serializeRecords(Buffer, Header, ProfileRecords);
+
+    // Now clean up the ProfileRecords array, one at a time.
+    for (auto &Record : ProfileRecords) {
+      Record.Path->~PathArray();
+      InternalFree(Record.Path);
+    }
+  }
+}
+
+void reset() {
+  SpinMutexLock Lock(&GlobalMutex);
+  // Clear out the profile buffers that have been serialized.
+  for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
+    InternalFree(ProfileBuffers[I].Data);
+  ProfileBuffers.Reset();
+
+  // Clear out the function call tries per thread.
+  for (uptr I = 0; I < ThreadTries.Size(); ++I) {
+    auto &T = ThreadTries[I];
+    T.Trie->~FunctionCallTrie();
+    InternalFree(T.Trie);
+  }
+  ThreadTries.Reset();
+
+  // Reset the global allocators.
+  if (GlobalAllocators != nullptr) {
+    GlobalAllocators->~Allocators();
+    InternalFree(GlobalAllocators);
+    GlobalAllocators = nullptr;
+  }
+  GlobalAllocators = reinterpret_cast<FunctionCallTrie::Allocators *>(
+      InternalAlloc(sizeof(FunctionCallTrie::Allocators)));
+  new (GlobalAllocators) FunctionCallTrie::Allocators();
+  *GlobalAllocators = FunctionCallTrie::InitAllocators();
+}
+
+XRayBuffer nextBuffer(XRayBuffer B) {
+  SpinMutexLock Lock(&GlobalMutex);
+  if (B.Data == nullptr && ProfileBuffers.Size())
+    return {ProfileBuffers[0].Data, ProfileBuffers[0].Size};
+
+  BlockHeader Header;
+  internal_memcpy(&Header, B.Data, sizeof(BlockHeader));
+  auto NextBlock = Header.BlockNum + 1;
+  if (NextBlock < ProfileBuffers.Size())
+    return {ProfileBuffers[NextBlock].Data, ProfileBuffers[NextBlock].Size};
+  return {nullptr, 0};
+}
+
+} // namespace profileCollectorService
+} // namespace __xray