summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--compiler-rt/lib/xray/tests/unit/CMakeLists.txt2
-rw-r--r--compiler-rt/lib/xray/tests/unit/xray_fdr_log_printer_tool.cc329
-rw-r--r--compiler-rt/lib/xray/xray_fdr_log_records.h65
-rw-r--r--compiler-rt/lib/xray/xray_fdr_logging.cc349
-rw-r--r--compiler-rt/lib/xray/xray_fdr_logging.h54
-rw-r--r--compiler-rt/lib/xray/xray_fdr_logging_impl.h486
6 files changed, 891 insertions, 394 deletions
diff --git a/compiler-rt/lib/xray/tests/unit/CMakeLists.txt b/compiler-rt/lib/xray/tests/unit/CMakeLists.txt
index 62d01f23958..4ed91951894 100644
--- a/compiler-rt/lib/xray/tests/unit/CMakeLists.txt
+++ b/compiler-rt/lib/xray/tests/unit/CMakeLists.txt
@@ -2,3 +2,5 @@ add_xray_unittest(XRayBufferQueueTest SOURCES
buffer_queue_test.cc xray_unit_test_main.cc)
add_xray_unittest(XRayFDRLoggingTest SOURCES
fdr_logging_test.cc xray_unit_test_main.cc)
+
+add_executable(xray_fdr_log_printer xray_fdr_log_printer_tool.cc)
diff --git a/compiler-rt/lib/xray/tests/unit/xray_fdr_log_printer_tool.cc b/compiler-rt/lib/xray/tests/unit/xray_fdr_log_printer_tool.cc
new file mode 100644
index 00000000000..a4d05f200de
--- /dev/null
+++ b/compiler-rt/lib/xray/tests/unit/xray_fdr_log_printer_tool.cc
@@ -0,0 +1,329 @@
+//===-- xray_fdr_log_printer_tool.cc --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+
+#include "xray_fdr_logging.h"
+#include "xray_fdr_logging_impl.h"
+
+#include <algorithm>
+#include <array>
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <vector>
+
+#include "xray/xray_records.h"
+
+// Writes out xray fdr mode log records to stdout based on a sequence of
+// formatted data read from stdin.
+//
+// Interprets an adhoc format of Top Level Types and parameter maps of the form:
+//
+// RecordType : { Parameter1 = Value, Parameter2 = value , Parameter3 = value}
+// OtherRecordType : { ParameterOne = Value }
+//
+// Each line corresponds to a type of record written by the Xray Flight Data
+// Recorder mode to a buffer. This program synthesizes records in the FDR binary
+// format and writes them to std::cout.
+
+namespace {
+
+/// A crude lexer to read tokens and skip over whitespace.
+class TokenReader {
+public:
+ TokenReader() : LastDelimPresent(false), FoundEof(false), LastDelim(0) {}
+ std::string readToken(std::istream &Stream);
+ bool hasLastDelim() const { return LastDelimPresent; }
+ char getLastDelim() const { return LastDelim; }
+ void setLastDelim(char Delim) {
+ LastDelimPresent = true;
+ LastDelim = Delim;
+ }
+ void clearLastDelim() {
+ LastDelimPresent = false;
+ LastDelim = 0;
+ }
+ bool isEof() { return FoundEof; }
+ void setFoundEof(bool eof) { FoundEof = eof; }
+
+private:
+ bool LastDelimPresent;
+ bool FoundEof;
+ char LastDelim;
+};
+
+// Globally tracks whether we reached EOF and caches delimiters that qualify as
+// tokens.
+static TokenReader TokenReader{};
+
+bool isWhitespace(char c) {
+ // Hardcode the whitespace characters we will not return as tokens even though
+ // they are token delimiters.
+ static const std::vector<char> Whitespace{' ', '\n', '\t'};
+ return std::find(Whitespace.begin(), Whitespace.end(), c) != Whitespace.end();
+}
+
+bool isDelimiter(char c) {
+ // Hardcode a set of token delimiters.
+ static const std::vector<char> Delimiters{' ', ':', ',', '\n',
+ '\t', '{', '}', '='};
+ return std::find(Delimiters.begin(), Delimiters.end(), c) != Delimiters.end();
+}
+
+std::string TokenReader::readToken(std::istream &Stream) {
+ // If on the last call we read a trailing delimiter that also qualifies as a
+ // token, return it now.
+ if (hasLastDelim()) {
+ char Token = getLastDelim();
+ clearLastDelim();
+ return std::string{Token};
+ }
+
+ std::stringstream Builder{};
+ char c;
+ c = Stream.get();
+ while (!isDelimiter(c) && !Stream.eof()) {
+ Builder << c;
+ c = Stream.get();
+ }
+
+ setFoundEof(Stream.eof());
+
+ std::string Token = Builder.str();
+
+ if (Token.empty()) {
+ // We read a whitespace delimiter only. Skip over it.
+ if (!isEof() && isWhitespace(c)) {
+ return readToken(Stream);
+ } else if (isWhitespace(c)) {
+ // We only read a whitespace delimiter.
+ return "";
+ } else {
+ // We read a delimiter that matters as a token.
+ return std::string{c};
+ }
+ }
+
+ // If we found a delimiter that's a valid token. Store it to return as the
+ // next token.
+ if (!isWhitespace(c))
+ setLastDelim(c);
+
+ return Token;
+}
+
+// Reads an expected token or dies a gruesome death.
+void eatExpectedToken(std::istream &Stream, const std::string &Expected) {
+ std::string Token = TokenReader.readToken(Stream);
+ if (Token.compare(Expected) != 0) {
+ std::cerr << "Expecting token '" << Expected << "'. Found '" << Token
+ << "'.\n";
+ std::exit(1);
+ }
+}
+
+// Constructs a map of key value pairs from a token stream.
+// Expects to read an expression of the form:
+//
+// { a = b, c = d, e = f}
+//
+// If not, the driver will crash.
+std::map<std::string, std::string> readMap(std::istream &Stream) {
+ using StrMap = std::map<std::string, std::string>;
+ using StrVector = std::vector<std::string>;
+
+ eatExpectedToken(Stream, "{");
+ StrVector TokenList{};
+
+ while (!TokenReader.isEof()) {
+ std::string CurrentToken = TokenReader.readToken(Stream);
+ if (CurrentToken.compare("}") == 0) {
+ break;
+ }
+ TokenList.push_back(CurrentToken);
+ if (TokenReader.isEof()) {
+ std::cerr << "Got EOF while building a param map.\n";
+ std::exit(1);
+ }
+ }
+
+ if (TokenList.size() == 0) {
+ StrMap EmptyMap{};
+ return EmptyMap;
+ }
+ if (TokenList.size() % 4 != 3) {
+ std::cerr << "Error while building token map. Expected triples of tokens "
+ "in the form 'a = b' separated by commas.\n";
+ std::exit(1);
+ }
+
+ StrMap TokenMap{};
+ std::size_t ElementIndex = 0;
+ for (; ElementIndex < TokenList.size(); ElementIndex += 4) {
+ if (TokenList[ElementIndex + 1].compare("=") != 0) {
+ std::cerr << "Expected an assignment when building a param map.\n";
+ std::exit(1);
+ }
+ TokenMap[TokenList[ElementIndex]] = TokenList[ElementIndex + 2];
+ if (ElementIndex + 3 < TokenList.size()) {
+ if (TokenList[ElementIndex + 3].compare(",") != 0) {
+ std::cerr << "Expected assignment statements to be separated by commas."
+ << "\n";
+ std::exit(1);
+ }
+ }
+ }
+ return TokenMap;
+}
+
+std::string getOrDie(const std::map<std::string, std::string> &Lookup,
+ const std::string &Key) {
+ auto MapIter = Lookup.find(Key);
+ if (MapIter == Lookup.end()) {
+ std::cerr << "Expected key '" << Key << "'. Was not found.\n";
+ std::exit(1);
+ }
+ return MapIter->second;
+}
+
+// Reads a numeric type from a string token through the magic of
+// std::stringstream.
+template <typename NT> struct NumberParser {
+ static NT parse(const std::string &Input) {
+ NT Number = 0;
+ std::stringstream Stream(Input);
+ Stream >> Number;
+ return Number;
+ }
+};
+
+void writeNewBufferOrDie(std::istream &Input) {
+ auto TokenMap = readMap(Input);
+ pid_t Tid = NumberParser<pid_t>::parse(getOrDie(TokenMap, "Tid"));
+ time_t Time = NumberParser<time_t>::parse(getOrDie(TokenMap, "time"));
+ timespec TimeSpec = {Time, 0};
+ constexpr const size_t OutputSize = 32;
+ std::array<char, OutputSize> Buffer{};
+ char *MemPtr = Buffer.data();
+ __xray::__xray_fdr_internal::writeNewBufferPreamble(Tid, TimeSpec, MemPtr);
+ std::cout.write(Buffer.data(), OutputSize);
+}
+
+void writeNewCPUIdOrDie(std::istream &Input) {
+ auto TokenMap = readMap(Input);
+ uint16_t CPU = NumberParser<uint16_t>::parse(getOrDie(TokenMap, "CPU"));
+ uint64_t TSC = NumberParser<uint64_t>::parse(getOrDie(TokenMap, "TSC"));
+ constexpr const size_t OutputSize = 16;
+ std::array<char, OutputSize> Buffer{};
+ char *MemPtr = Buffer.data();
+ __xray::__xray_fdr_internal::writeNewCPUIdMetadata(CPU, TSC, MemPtr);
+ std::cout.write(Buffer.data(), OutputSize);
+}
+
+void writeEOBOrDie(std::istream &Input) {
+ auto TokenMap = readMap(Input);
+ constexpr const size_t OutputSize = 16;
+ std::array<char, OutputSize> Buffer{};
+ char *MemPtr = Buffer.data();
+ __xray::__xray_fdr_internal::writeEOBMetadata(MemPtr);
+ std::cout.write(Buffer.data(), OutputSize);
+}
+
+void writeTSCWrapOrDie(std::istream &Input) {
+ auto TokenMap = readMap(Input);
+ uint64_t TSC = NumberParser<uint64_t>::parse(getOrDie(TokenMap, "TSC"));
+ constexpr const size_t OutputSize = 16;
+ std::array<char, OutputSize> Buffer{};
+ char *MemPtr = Buffer.data();
+ __xray::__xray_fdr_internal::writeTSCWrapMetadata(TSC, MemPtr);
+ std::cout.write(Buffer.data(), OutputSize);
+}
+
+XRayEntryType decodeEntryType(const std::string &EntryTypeStr) {
+ if (EntryTypeStr.compare("Entry") == 0) {
+ return XRayEntryType::ENTRY;
+ } else if (EntryTypeStr.compare("LogArgsEntry") == 0) {
+ return XRayEntryType::LOG_ARGS_ENTRY;
+ } else if (EntryTypeStr.compare("Exit") == 0) {
+ return XRayEntryType::EXIT;
+ } else if (EntryTypeStr.compare("Tail") == 0) {
+ return XRayEntryType::TAIL;
+ }
+ std::cerr << "Illegal entry type " << EntryTypeStr << ".\n";
+ std::exit(1);
+}
+
+void writeFunctionOrDie(std::istream &Input) {
+ auto TokenMap = readMap(std::cin);
+ int FuncId = NumberParser<int>::parse(getOrDie(TokenMap, "FuncId"));
+ uint32_t TSCDelta =
+ NumberParser<uint32_t>::parse(getOrDie(TokenMap, "TSCDelta"));
+ std::string EntryType = getOrDie(TokenMap, "EntryType");
+ XRayEntryType XrayEntryType = decodeEntryType(EntryType);
+ constexpr const size_t OutputSize = 8;
+ std::array<char, OutputSize> Buffer{};
+ char *MemPtr = Buffer.data();
+ __xray::__xray_fdr_internal::writeFunctionRecord(FuncId, TSCDelta,
+ XrayEntryType, MemPtr);
+ std::cout.write(Buffer.data(), OutputSize);
+}
+
+} // namespace
+
+int main(int argc, char **argv) {
+ std::map<std::string, std::function<void(std::istream &)>> TopLevelRecordMap;
+ TopLevelRecordMap["NewBuffer"] = writeNewBufferOrDie;
+ TopLevelRecordMap["NewCPU"] = writeNewCPUIdOrDie;
+ TopLevelRecordMap["EOB"] = writeEOBOrDie;
+ TopLevelRecordMap["TSCWrap"] = writeTSCWrapOrDie;
+ TopLevelRecordMap["Function"] = writeFunctionOrDie;
+
+ // Write file header
+ //
+ // (2) uint16 : version
+ // (2) uint16 : type
+ // (4) uint32 : bitfield
+ // (8) uint64 : cycle frequency
+ // (16) - : padding
+ uint16_t HeaderVersion = 1;
+ uint16_t HeaderType = 1;
+ uint32_t Bitfield = 3;
+ uint64_t CycleFreq = 42;
+ constexpr const size_t HeaderSize = 32;
+ std::array<char, HeaderSize> Header{};
+ std::memcpy(Header.data(), &HeaderVersion, sizeof(HeaderVersion));
+ std::memcpy(Header.data() + 2, &HeaderType, sizeof(HeaderType));
+ std::memcpy(Header.data() + 4, &Bitfield, sizeof(Bitfield));
+ std::memcpy(Header.data() + 8, &CycleFreq, sizeof(CycleFreq));
+ std::cout.write(Header.data(), HeaderSize);
+
+ std::string CurrentToken;
+ while (true) {
+ CurrentToken = TokenReader.readToken(std::cin);
+ if (TokenReader.isEof())
+ break;
+ auto MapIter = TopLevelRecordMap.find(CurrentToken);
+ if (MapIter != TopLevelRecordMap.end()) {
+ eatExpectedToken(std::cin, ":");
+ if (TokenReader.isEof()) {
+ std::cerr << "Got eof when expecting to read a map.\n";
+ std::exit(1);
+ }
+ MapIter->second(std::cin);
+ } else {
+ std::cerr << "Got bad top level instruction '" << CurrentToken << "'.\n";
+ std::exit(1);
+ }
+ }
+ return 0;
+}
diff --git a/compiler-rt/lib/xray/xray_fdr_log_records.h b/compiler-rt/lib/xray/xray_fdr_log_records.h
new file mode 100644
index 00000000000..36d9410d16f
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_fdr_log_records.h
@@ -0,0 +1,65 @@
+//===-- xray_fdr_log_records.h -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOG_RECORDS_H
+#define XRAY_XRAY_FDR_LOG_RECORDS_H
+
+enum class RecordType : uint8_t { Function, Metadata };
+
+// A MetadataRecord encodes the kind of record in its first byte, and have 15
+// additional bytes in the end to hold free-form data.
+struct alignas(16) MetadataRecord {
+ // A MetadataRecord must always have a type of 1.
+ /* RecordType */ uint8_t Type : 1;
+
+ // Each kind of record is represented as a 7-bit value (even though we use an
+ // unsigned 8-bit enum class to do so).
+ enum class RecordKinds : uint8_t {
+ NewBuffer,
+ EndOfBuffer,
+ NewCPUId,
+ TSCWrap,
+ WalltimeMarker,
+ };
+ // Use 7 bits to identify this record type.
+ /* RecordKinds */ uint8_t RecordKind : 7;
+ char Data[15];
+} __attribute__((packed));
+
+static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord.");
+
+struct alignas(8) FunctionRecord {
+ // A FunctionRecord must always have a type of 0.
+ /* RecordType */ uint8_t Type : 1;
+ enum class RecordKinds {
+ FunctionEnter = 0x00,
+ FunctionExit = 0x01,
+ FunctionTailExit = 0x02,
+ };
+ /* RecordKinds */ uint8_t RecordKind : 3;
+
+ // We only use 28 bits of the function ID, so that we can use as few bytes as
+ // possible. This means we only support 2^28 (268,435,456) unique function ids
+ // in a single binary.
+ int FuncId : 28;
+
+ // We use another 4 bytes to hold the delta between the previous entry's TSC.
+ // In case we've found that the distance is greater than the allowable 32 bits
+ // (either because we are running in a different CPU and the TSC might be
+ // different then), we should use a MetadataRecord before this FunctionRecord
+ // that will contain the full TSC for that CPU, and keep this to 0.
+ uint32_t TSCDelta;
+} __attribute__((packed));
+
+static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord.");
+
+#endif // XRAY_XRAY_FDR_LOG_RECORDS_H
diff --git a/compiler-rt/lib/xray/xray_fdr_logging.cc b/compiler-rt/lib/xray/xray_fdr_logging.cc
index bae7d4c4d07..0a1fb19f0e3 100644
--- a/compiler-rt/lib/xray/xray_fdr_logging.cc
+++ b/compiler-rt/lib/xray/xray_fdr_logging.cc
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file is a part of XRay, a dynamic runtime instruementation system.
+// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// Here we implement the Flight Data Recorder mode for XRay, where we use
// compact structures to store records in memory as well as when writing out the
@@ -31,6 +31,7 @@
#include "xray/xray_records.h"
#include "xray_buffer_queue.h"
#include "xray_defs.h"
+#include "xray_fdr_logging_impl.h"
#include "xray_flags.h"
#include "xray_tsc.h"
#include "xray_utils.h"
@@ -49,8 +50,8 @@ std::atomic<XRayLogFlushStatus> LogFlushStatus{
std::unique_ptr<FDRLoggingOptions> FDROptions;
XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
- void *Options,
- size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+ void *Options,
+ size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
assert(OptionsSize == sizeof(FDRLoggingOptions));
XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
if (!LoggingStatus.compare_exchange_strong(
@@ -174,352 +175,16 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
-namespace {
-thread_local BufferQueue::Buffer Buffer;
-thread_local char *RecordPtr = nullptr;
-
-void setupNewBuffer(const BufferQueue::Buffer &Buffer) XRAY_NEVER_INSTRUMENT {
- RecordPtr = static_cast<char *>(Buffer.Buffer);
-
- static constexpr int InitRecordsCount = 2;
- std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
- {
- // Write out a MetadataRecord to signify that this is the start of a new
- // buffer, associated with a particular thread, with a new CPU. For the
- // data, we have 15 bytes to squeeze as much information as we can. At this
- // point we only write down the following bytes:
- // - Thread ID (pid_t, 4 bytes)
- auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
- NewBuffer.Type = uint8_t(RecordType::Metadata);
- NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
- pid_t Tid = syscall(SYS_gettid);
- std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
- }
-
- // Also write the WalltimeMarker record.
- {
- static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
- auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
- WalltimeMarker.Type = uint8_t(RecordType::Metadata);
- WalltimeMarker.RecordKind =
- uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
- timespec TS{0, 0};
- clock_gettime(CLOCK_MONOTONIC, &TS);
-
- // We only really need microsecond precision here, and enforce across
- // platforms that we need 64-bit seconds and 32-bit microseconds encoded in
- // the Metadata record.
- int32_t Micros = TS.tv_nsec / 1000;
- int64_t Seconds = TS.tv_sec;
- std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
- std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
- }
- std::memcpy(RecordPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
- RecordPtr += sizeof(MetadataRecord) * InitRecordsCount;
-}
-
-void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT {
- MetadataRecord NewCPUId;
- NewCPUId.Type = uint8_t(RecordType::Metadata);
- NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
-
- // The data for the New CPU will contain the following bytes:
- // - CPU ID (uint16_t, 2 bytes)
- // - Full TSC (uint64_t, 8 bytes)
- // Total = 12 bytes.
- std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
- std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
- std::memcpy(RecordPtr, &NewCPUId, sizeof(MetadataRecord));
- RecordPtr += sizeof(MetadataRecord);
-}
-
-void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
- MetadataRecord EOBMeta;
- EOBMeta.Type = uint8_t(RecordType::Metadata);
- EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
- // For now we don't write any bytes into the Data field.
- std::memcpy(RecordPtr, &EOBMeta, sizeof(MetadataRecord));
- RecordPtr += sizeof(MetadataRecord);
-}
-
-void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
- MetadataRecord TSCWrap;
- TSCWrap.Type = uint8_t(RecordType::Metadata);
- TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
-
- // The data for the TSCWrap record contains the following bytes:
- // - Full TSC (uint64_t, 8 bytes)
- // Total = 8 bytes.
- std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
- std::memcpy(RecordPtr, &TSCWrap, sizeof(MetadataRecord));
- RecordPtr += sizeof(MetadataRecord);
-}
-
-constexpr auto MetadataRecSize = sizeof(MetadataRecord);
-constexpr auto FunctionRecSize = sizeof(FunctionRecord);
-
-class ThreadExitBufferCleanup {
- std::weak_ptr<BufferQueue> Buffers;
- BufferQueue::Buffer &Buffer;
-
-public:
- explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
- BufferQueue::Buffer &Buffer)
- XRAY_NEVER_INSTRUMENT : Buffers(BQ),
- Buffer(Buffer) {}
-
- ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
- if (RecordPtr == nullptr)
- return;
-
- // We make sure that upon exit, a thread will write out the EOB
- // MetadataRecord in the thread-local log, and also release the buffer to
- // the queue.
- assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
- static_cast<ptrdiff_t>(MetadataRecSize));
- if (auto BQ = Buffers.lock()) {
- writeEOBMetadata();
- if (auto EC = BQ->releaseBuffer(Buffer))
- Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
- EC.message().c_str());
- return;
- }
- }
-};
-
-class RecursionGuard {
- bool &Running;
- const bool Valid;
-
-public:
- explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
- if (Valid)
- Running = true;
- }
-
- RecursionGuard(const RecursionGuard &) = delete;
- RecursionGuard(RecursionGuard &&) = delete;
- RecursionGuard &operator=(const RecursionGuard &) = delete;
- RecursionGuard &operator=(RecursionGuard &&) = delete;
-
- explicit operator bool() const { return Valid; }
-
- ~RecursionGuard() noexcept {
- if (Valid)
- Running = false;
- }
-};
-
-inline bool loggingInitialized() {
- return LoggingStatus.load(std::memory_order_acquire) ==
- XRayLogInitStatus::XRAY_LOG_INITIALIZED;
-}
-
-} // namespace
-
void fdrLoggingHandleArg0(int32_t FuncId,
- XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+ XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
// We want to get the TSC as early as possible, so that we can check whether
// we've seen this CPU before. We also do it before we load anything else, to
// allow for forward progress with the scheduling.
unsigned char CPU;
uint64_t TSC = __xray::readTSC(CPU);
- // Bail out right away if logging is not initialized yet.
- if (LoggingStatus.load(std::memory_order_acquire) !=
- XRayLogInitStatus::XRAY_LOG_INITIALIZED)
- return;
-
- // We use a thread_local variable to keep track of which CPUs we've already
- // run, and the TSC times for these CPUs. This allows us to stop repeating the
- // CPU field in the function records.
- //
- // We assume that we'll support only 65536 CPUs for x86_64.
- thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
- thread_local uint64_t LastTSC = 0;
-
- // Make sure a thread that's ever called handleArg0 has a thread-local
- // live reference to the buffer queue for this particular instance of
- // FDRLogging, and that we're going to clean it up when the thread exits.
- thread_local auto LocalBQ = BQ;
- thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
-
- // Prevent signal handler recursion, so in case we're already in a log writing
- // mode and the signal handler comes in (and is also instrumented) then we
- // don't want to be clobbering potentially partial writes already happening in
- // the thread. We use a simple thread_local latch to only allow one on-going
- // handleArg0 to happen at any given time.
- thread_local bool Running = false;
- RecursionGuard Guard{Running};
- if (!Guard) {
- assert(Running == true && "RecursionGuard is buggy!");
- return;
- }
-
- if (!loggingInitialized() || LocalBQ->finalizing()) {
- writeEOBMetadata();
- if (auto EC = BQ->releaseBuffer(Buffer)) {
- Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
- EC.message().c_str());
- return;
- }
- RecordPtr = nullptr;
- }
-
- if (Buffer.Buffer == nullptr) {
- if (auto EC = LocalBQ->getBuffer(Buffer)) {
- auto LS = LoggingStatus.load(std::memory_order_acquire);
- if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
- LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
- Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
- return;
- }
-
- setupNewBuffer(Buffer);
- }
-
- if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
- // This means this is the first CPU this thread has ever run on. We set the
- // current CPU and record this as the first TSC we've seen.
- CurrentCPU = CPU;
- writeNewCPUIdMetadata(CPU, TSC);
- }
-
- // Before we go setting up writing new function entries, we need to be really
- // careful about the pointer math we're doing. This means we need to ensure
- // that the record we are about to write is going to fit into the buffer,
- // without overflowing the buffer.
- //
- // To do this properly, we use the following assumptions:
- //
- // - The least number of bytes we will ever write is 8
- // (sizeof(FunctionRecord)) only if the delta between the previous entry
- // and this entry is within 32 bits.
- // - The most number of bytes we will ever write is 8 + 16 = 24. This is
- // computed by:
- //
- // sizeof(FunctionRecord) + sizeof(MetadataRecord)
- //
- // These arise in the following cases:
- //
- // 1. When the delta between the TSC we get and the previous TSC for the
- // same CPU is outside of the uint32_t range, we end up having to
- // write a MetadataRecord to indicate a "tsc wrap" before the actual
- // FunctionRecord.
- // 2. When we learn that we've moved CPUs, we need to write a
- // MetadataRecord to indicate a "cpu change", and thus write out the
- // current TSC for that CPU before writing out the actual
- // FunctionRecord.
- // 3. When we learn about a new CPU ID, we need to write down a "new cpu
- // id" MetadataRecord before writing out the actual FunctionRecord.
- //
- // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
- //
- // So the math we need to do is to determine whether writing 24 bytes past the
- // current pointer leaves us with enough bytes to write the EOB
- // MetadataRecord. If we don't have enough space after writing as much as 24
- // bytes in the end of the buffer, we need to write out the EOB, get a new
- // Buffer, set it up properly before doing any further writing.
- //
- char *BufferStart = static_cast<char *>(Buffer.Buffer);
- if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart <
- static_cast<ptrdiff_t>(MetadataRecSize)) {
- writeEOBMetadata();
- if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
- Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
- EC.message().c_str());
- return;
- }
- if (auto EC = LocalBQ->getBuffer(Buffer)) {
- Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
- return;
- }
- setupNewBuffer(Buffer);
- }
-
- // By this point, we are now ready to write at most 24 bytes (one metadata
- // record and one function record).
- BufferStart = static_cast<char *>(Buffer.Buffer);
- assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >=
- static_cast<ptrdiff_t>(MetadataRecSize) &&
- "Misconfigured BufferQueue provided; Buffer size not large enough.");
-
- std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
- AlignedFuncRecordBuffer;
- auto &FuncRecord =
- *reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
- FuncRecord.Type = uint8_t(RecordType::Function);
-
- // Only get the lower 28 bits of the function id.
- FuncRecord.FuncId = FuncId & ~(0x0F << 28);
-
- // Here we compute the TSC Delta. There are a few interesting situations we
- // need to account for:
- //
- // - The thread has migrated to a different CPU. If this is the case, then
- // we write down the following records:
- //
- // 1. A 'NewCPUId' Metadata record.
- // 2. A FunctionRecord with a 0 for the TSCDelta field.
- //
- // - The TSC delta is greater than the 32 bits we can store in a
- // FunctionRecord. In this case we write down the following records:
- //
- // 1. A 'TSCWrap' Metadata record.
- // 2. A FunctionRecord with a 0 for the TSCDelta field.
- //
- // - The TSC delta is representable within the 32 bits we can store in a
- // FunctionRecord. In this case we write down just a FunctionRecord with
- // the correct TSC delta.
- //
- FuncRecord.TSCDelta = 0;
- if (CPU != CurrentCPU) {
- // We've moved to a new CPU.
- writeNewCPUIdMetadata(CPU, TSC);
- } else {
- // If the delta is greater than the range for a uint32_t, then we write out
- // the TSC wrap metadata entry with the full TSC, and the TSC for the
- // function record be 0.
- auto Delta = LastTSC - TSC;
- if (Delta > (1ULL << 32) - 1)
- writeTSCWrapMetadata(TSC);
- else
- FuncRecord.TSCDelta = Delta;
- }
-
- // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid
- // us in future computations of this TSC delta value.
- LastTSC = TSC;
- CurrentCPU = CPU;
-
- switch (Entry) {
- case XRayEntryType::ENTRY:
- case XRayEntryType::LOG_ARGS_ENTRY:
- FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
- break;
- case XRayEntryType::EXIT:
- FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
- break;
- case XRayEntryType::TAIL:
- FuncRecord.RecordKind =
- uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
- break;
- }
-
- std::memcpy(RecordPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
- RecordPtr += sizeof(FunctionRecord);
-
- // If we've exhausted the buffer by this time, we then release the buffer to
- // make sure that other threads may start using this buffer.
- if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
- writeEOBMetadata();
- if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
- Report("Failed releasing buffer at %p; error=%s\n", Buffer.Buffer,
- EC.message().c_str());
- return;
- }
- RecordPtr = nullptr;
- }
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, TSC, CPU,
+ clock_gettime, LoggingStatus, BQ);
}
} // namespace __xray
diff --git a/compiler-rt/lib/xray/xray_fdr_logging.h b/compiler-rt/lib/xray/xray_fdr_logging.h
index 28c829625bd..93c33b417ba 100644
--- a/compiler-rt/lib/xray/xray_fdr_logging.h
+++ b/compiler-rt/lib/xray/xray_fdr_logging.h
@@ -14,6 +14,7 @@
#define XRAY_XRAY_FDR_LOGGING_H
#include "xray/xray_log_interface.h"
+#include "xray_fdr_log_records.h"
// FDR (Flight Data Recorder) Mode
// ===============================
@@ -26,57 +27,6 @@
namespace __xray {
-enum class RecordType : uint8_t {
- Function, Metadata
-};
-
-// A MetadataRecord encodes the kind of record in its first byte, and have 15
-// additional bytes in the end to hold free-form data.
-struct alignas(16) MetadataRecord {
- // A MetadataRecord must always have a type of 1.
- /* RecordType */ uint8_t Type : 1;
-
- // Each kind of record is represented as a 7-bit value (even though we use an
- // unsigned 8-bit enum class to do so).
- enum class RecordKinds : uint8_t {
- NewBuffer,
- EndOfBuffer,
- NewCPUId,
- TSCWrap,
- WalltimeMarker,
- };
- // Use 7 bits to identify this record type.
- /* RecordKinds */ uint8_t RecordKind : 7;
- char Data[15];
-} __attribute__((packed));
-
-static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord.");
-
-struct alignas(8) FunctionRecord {
- // A FunctionRecord must always have a type of 0.
- /* RecordType */ uint8_t Type : 1;
- enum class RecordKinds {
- FunctionEnter = 0x00,
- FunctionExit = 0x01,
- FunctionTailExit = 0x02,
- };
- /* RecordKinds */ uint8_t RecordKind : 3;
-
- // We only use 28 bits of the function ID, so that we can use as few bytes as
- // possible. This means we only support 2^28 (268,435,456) unique function ids
- // in a single binary.
- int FuncId : 28;
-
- // We use another 4 bytes to hold the delta between the previous entry's TSC.
- // In case we've found that the distance is greater than the allowable 32 bits
- // (either because we are running in a different CPU and the TSC might be
- // different then), we should use a MetadataRecord before this FunctionRecord
- // that will contain the full TSC for that CPU, and keep this to 0.
- uint32_t TSCDelta;
-} __attribute__((packed));
-
-static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord.");
-
// Options used by the FDR implementation.
struct FDRLoggingOptions {
bool ReportErrors = false;
@@ -85,7 +35,7 @@ struct FDRLoggingOptions {
// Flight Data Recorder mode implementation interfaces.
XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
- void *Options, size_t OptionsSize);
+ void *Options, size_t OptionsSize);
XRayLogInitStatus fdrLoggingFinalize();
void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry);
XRayLogFlushStatus fdrLoggingFlush();
diff --git a/compiler-rt/lib/xray/xray_fdr_logging_impl.h b/compiler-rt/lib/xray/xray_fdr_logging_impl.h
new file mode 100644
index 00000000000..a06b22df920
--- /dev/null
+++ b/compiler-rt/lib/xray/xray_fdr_logging_impl.h
@@ -0,0 +1,486 @@
+//===-- xray_fdr_logging_impl.h ---------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Here we implement the thread local state management and record i/o for Flight
+// Data Recorder mode for XRay, where we use compact structures to store records
+// in memory as well as when writing out the data to files.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOGGING_IMPL_H
+#define XRAY_XRAY_FDR_LOGGING_IMPL_H
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_log_interface.h"
+#include "xray_buffer_queue.h"
+#include "xray_defs.h"
+#include "xray_fdr_log_records.h"
+
+namespace __xray {
+
+/// We expose some of the state transitions when FDR logging mode is operating
+/// such that we can simulate a series of log events that may occur without
+/// and test with determinism without worrying about the real CPU time.
+///
+/// Because the code uses thread_local allocation extensively as part of its
+/// design, callers that wish to test events occuring on different threads
+/// will actually have to run them on different threads.
+///
+/// This also means that it is possible to break invariants maintained by
+/// cooperation with xray_fdr_logging class, so be careful and think twice.
+namespace __xray_fdr_internal {
+
+/// Writes the new buffer record and wallclock time that begin a buffer for a
+/// thread to MemPtr and increments MemPtr. Bypasses the thread local state
+// machine and writes directly to memory without checks.
+static void writeNewBufferPreamble(pid_t Tid, timespec TS, char *&MemPtr);
+
+/// Write a metadata record to switch to a new CPU to MemPtr and increments
+/// MemPtr. Bypasses the thread local state machine and writes directly to
+/// memory without checks.
+static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, char *&MemPtr);
+
+/// Writes an EOB metadata record to MemPtr and increments MemPtr. Bypasses the
+/// thread local state machine and writes directly to memory without checks.
+static void writeEOBMetadata(char *&MemPtr);
+
+/// Writes a TSC Wrap metadata record to MemPtr and increments MemPtr. Bypasses
+/// the thread local state machine and directly writes to memory without checks.
+static void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr);
+
+/// Writes a Function Record to MemPtr and increments MemPtr. Bypasses the
+/// thread local state machine and writes the function record directly to
+/// memory.
+static void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+ XRayEntryType EntryType, char *&MemPtr);
+
+/// Sets up a new buffer in thread_local storage and writes a preamble. The
+/// wall_clock_reader function is used to populate the WallTimeRecord entry.
+static void setupNewBuffer(const BufferQueue::Buffer &Buffer,
+ int (*wall_clock_reader)(clockid_t,
+ struct timespec *));
+
+/// Called to record CPU time for a new CPU within the current thread.
+static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC);
+
+/// Called to close the buffer when the thread exhausts the buffer or when the
+/// thread exits (via a thread local variable destructor).
+static void writeEOBMetadata();
+
+/// TSC Wrap records are written when a TSC delta encoding scheme overflows.
+static void writeTSCWrapMetadata(uint64_t TSC);
+
+/// Here's where the meat of the processing happens. The writer captures
+/// function entry, exit and tail exit points with a time and will create
+/// TSCWrap, NewCPUId and Function records as necessary. The writer might
+/// walk backward through its buffer and erase trivial functions to avoid
+/// polluting the log and may use the buffer queue to obtain or release a
+/// buffer.
+static void
+processFunctionHook(int32_t FuncId, XRayEntryType Entry, uint64_t TSC,
+ unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t, struct timespec *),
+ const std::atomic<XRayLogInitStatus> &LoggingStatus,
+ const std::shared_ptr<BufferQueue> &BQ);
+
+//-----------------------------------------------------------------------------|
+// The rest of the file is implementation. |
+//-----------------------------------------------------------------------------|
+// Functions are implemented in the header for inlining since we don't want |
+// to grow the stack when we've hijacked the binary for logging. |
+//-----------------------------------------------------------------------------|
+
+namespace {
+thread_local BufferQueue::Buffer Buffer;
+thread_local char *RecordPtr = nullptr;
+
+constexpr auto MetadataRecSize = sizeof(MetadataRecord);
+constexpr auto FunctionRecSize = sizeof(FunctionRecord);
+
+class ThreadExitBufferCleanup {
+ std::weak_ptr<BufferQueue> Buffers;
+ BufferQueue::Buffer &Buffer;
+
+public:
+ explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
+ BufferQueue::Buffer &Buffer)
+ XRAY_NEVER_INSTRUMENT : Buffers(BQ),
+ Buffer(Buffer) {}
+
+ ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
+ if (RecordPtr == nullptr)
+ return;
+
+ // We make sure that upon exit, a thread will write out the EOB
+ // MetadataRecord in the thread-local log, and also release the buffer to
+ // the queue.
+ assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
+ static_cast<ptrdiff_t>(MetadataRecSize));
+ if (auto BQ = Buffers.lock()) {
+ writeEOBMetadata();
+ if (auto EC = BQ->releaseBuffer(Buffer))
+ Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
+ EC.message().c_str());
+ return;
+ }
+ }
+};
+
+class RecursionGuard {
+ bool &Running;
+ const bool Valid;
+
+public:
+ explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
+ if (Valid)
+ Running = true;
+ }
+
+ RecursionGuard(const RecursionGuard &) = delete;
+ RecursionGuard(RecursionGuard &&) = delete;
+ RecursionGuard &operator=(const RecursionGuard &) = delete;
+ RecursionGuard &operator=(RecursionGuard &&) = delete;
+
+ explicit operator bool() const { return Valid; }
+
+ ~RecursionGuard() noexcept {
+ if (Valid)
+ Running = false;
+ }
+};
+
+static inline bool loggingInitialized(
+ const std::atomic<XRayLogInitStatus> &LoggingStatus) XRAY_NEVER_INSTRUMENT {
+ return LoggingStatus.load(std::memory_order_acquire) ==
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+} // namespace anonymous
+
+static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ static constexpr int InitRecordsCount = 2;
+ std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
+ {
+ // Write out a MetadataRecord to signify that this is the start of a new
+ // buffer, associated with a particular thread, with a new CPU. For the
+ // data, we have 15 bytes to squeeze as much information as we can. At this
+ // point we only write down the following bytes:
+ // - Thread ID (pid_t, 4 bytes)
+ auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
+ NewBuffer.Type = uint8_t(RecordType::Metadata);
+ NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
+ std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
+ }
+ // Also write the WalltimeMarker record.
+ {
+ static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
+ auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
+ WalltimeMarker.Type = uint8_t(RecordType::Metadata);
+ WalltimeMarker.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
+
+ // We only really need microsecond precision here, and enforce across
+ // platforms that we need 64-bit seconds and 32-bit microseconds encoded in
+ // the Metadata record.
+ int32_t Micros = TS.tv_nsec / 1000;
+ int64_t Seconds = TS.tv_sec;
+ std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
+ std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
+ }
+ std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
+ MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
+}
+
+static inline void setupNewBuffer(const BufferQueue::Buffer &Buffer,
+ int (*wall_clock_reader)(clockid_t,
+ struct timespec *))
+ XRAY_NEVER_INSTRUMENT {
+ RecordPtr = static_cast<char *>(Buffer.Buffer);
+ pid_t Tid = syscall(SYS_gettid);
+ timespec TS{0, 0};
+ // This is typically clock_gettime, but callers have injection ability.
+ wall_clock_reader(CLOCK_MONOTONIC, &TS);
+ writeNewBufferPreamble(Tid, TS, RecordPtr);
+}
+
+static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ MetadataRecord NewCPUId;
+ NewCPUId.Type = uint8_t(RecordType::Metadata);
+ NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
+
+ // The data for the New CPU will contain the following bytes:
+ // - CPU ID (uint16_t, 2 bytes)
+ // - Full TSC (uint64_t, 8 bytes)
+ // Total = 12 bytes.
+ std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
+ std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
+ std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
+ MemPtr += sizeof(MetadataRecord);
+}
+
+static inline void writeNewCPUIdMetadata(uint16_t CPU,
+ uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+ writeNewCPUIdMetadata(CPU, TSC, RecordPtr);
+}
+
+static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ MetadataRecord EOBMeta;
+ EOBMeta.Type = uint8_t(RecordType::Metadata);
+ EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
+ // For now we don't write any bytes into the Data field.
+ std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
+ MemPtr += sizeof(MetadataRecord);
+}
+
+static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
+ writeEOBMetadata(RecordPtr);
+}
+
+static inline void writeTSCWrapMetadata(uint64_t TSC,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ MetadataRecord TSCWrap;
+ TSCWrap.Type = uint8_t(RecordType::Metadata);
+ TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
+
+ // The data for the TSCWrap record contains the following bytes:
+ // - Full TSC (uint64_t, 8 bytes)
+ // Total = 8 bytes.
+ std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
+ std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
+ MemPtr += sizeof(MetadataRecord);
+}
+
+static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+ writeTSCWrapMetadata(TSC, RecordPtr);
+}
+
+static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+ XRayEntryType EntryType,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
+ AlignedFuncRecordBuffer;
+ auto &FuncRecord =
+ *reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
+ FuncRecord.Type = uint8_t(RecordType::Function);
+ // Only take 28 bits of the function id.
+ FuncRecord.FuncId = FuncId & ~(0x0F << 28);
+ FuncRecord.TSCDelta = TSCDelta;
+
+ switch (EntryType) {
+ case XRayEntryType::ENTRY:
+ case XRayEntryType::LOG_ARGS_ENTRY:
+ FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
+ break;
+ case XRayEntryType::EXIT:
+ FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
+ break;
+ case XRayEntryType::TAIL:
+ FuncRecord.RecordKind =
+ uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
+ break;
+ }
+
+ std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
+ MemPtr += sizeof(FunctionRecord);
+}
+
+static inline void processFunctionHook(
+ int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t, struct timespec *),
+ const std::atomic<XRayLogInitStatus> &LoggingStatus,
+ const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+ // Bail out right away if logging is not initialized yet.
+ if (LoggingStatus.load(std::memory_order_acquire) !=
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED)
+ return;
+
+ // We use a thread_local variable to keep track of which CPUs we've already
+ // run, and the TSC times for these CPUs. This allows us to stop repeating the
+ // CPU field in the function records.
+ //
+ // We assume that we'll support only 65536 CPUs for x86_64.
+ thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
+ thread_local uint64_t LastTSC = 0;
+
+ // Make sure a thread that's ever called handleArg0 has a thread-local
+ // live reference to the buffer queue for this particular instance of
+ // FDRLogging, and that we're going to clean it up when the thread exits.
+ thread_local auto LocalBQ = BQ;
+ thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
+
+ // Prevent signal handler recursion, so in case we're already in a log writing
+ // mode and the signal handler comes in (and is also instrumented) then we
+ // don't want to be clobbering potentially partial writes already happening in
+ // the thread. We use a simple thread_local latch to only allow one on-going
+ // handleArg0 to happen at any given time.
+ thread_local bool Running = false;
+ RecursionGuard Guard{Running};
+ if (!Guard) {
+ assert(Running == true && "RecursionGuard is buggy!");
+ return;
+ }
+
+ if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) {
+ writeEOBMetadata();
+ if (auto EC = BQ->releaseBuffer(Buffer)) {
+ Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
+ EC.message().c_str());
+ return;
+ }
+ RecordPtr = nullptr;
+ }
+
+ if (Buffer.Buffer == nullptr) {
+ if (auto EC = LocalBQ->getBuffer(Buffer)) {
+ auto LS = LoggingStatus.load(std::memory_order_acquire);
+ if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
+ LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
+ Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
+ return;
+ }
+
+ setupNewBuffer(Buffer, wall_clock_reader);
+ }
+
+ if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
+ // This means this is the first CPU this thread has ever run on. We set the
+ // current CPU and record this as the first TSC we've seen.
+ CurrentCPU = CPU;
+ writeNewCPUIdMetadata(CPU, TSC);
+ }
+
+ // Before we go setting up writing new function entries, we need to be really
+ // careful about the pointer math we're doing. This means we need to ensure
+ // that the record we are about to write is going to fit into the buffer,
+ // without overflowing the buffer.
+ //
+ // To do this properly, we use the following assumptions:
+ //
+ // - The least number of bytes we will ever write is 8
+ // (sizeof(FunctionRecord)) only if the delta between the previous entry
+ // and this entry is within 32 bits.
+ // - The most number of bytes we will ever write is 8 + 16 = 24. This is
+ // computed by:
+ //
+ // sizeof(FunctionRecord) + sizeof(MetadataRecord)
+ //
+ // These arise in the following cases:
+ //
+ // 1. When the delta between the TSC we get and the previous TSC for the
+ // same CPU is outside of the uint32_t range, we end up having to
+ // write a MetadataRecord to indicate a "tsc wrap" before the actual
+ // FunctionRecord.
+ // 2. When we learn that we've moved CPUs, we need to write a
+ // MetadataRecord to indicate a "cpu change", and thus write out the
+ // current TSC for that CPU before writing out the actual
+ // FunctionRecord.
+ // 3. When we learn about a new CPU ID, we need to write down a "new cpu
+ // id" MetadataRecord before writing out the actual FunctionRecord.
+ //
+ // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
+ //
+ // So the math we need to do is to determine whether writing 24 bytes past the
+ // current pointer leaves us with enough bytes to write the EOB
+ // MetadataRecord. If we don't have enough space after writing as much as 24
+ // bytes in the end of the buffer, we need to write out the EOB, get a new
+ // Buffer, set it up properly before doing any further writing.
+ //
+ char *BufferStart = static_cast<char *>(Buffer.Buffer);
+ if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart <
+ static_cast<ptrdiff_t>(MetadataRecSize)) {
+ writeEOBMetadata();
+ if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
+ Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
+ EC.message().c_str());
+ return;
+ }
+ if (auto EC = LocalBQ->getBuffer(Buffer)) {
+ Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
+ return;
+ }
+ setupNewBuffer(Buffer, wall_clock_reader);
+ }
+
+ // By this point, we are now ready to write at most 24 bytes (one metadata
+ // record and one function record).
+ BufferStart = static_cast<char *>(Buffer.Buffer);
+ assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >=
+ static_cast<ptrdiff_t>(MetadataRecSize) &&
+ "Misconfigured BufferQueue provided; Buffer size not large enough.");
+
+ // Here we compute the TSC Delta. There are a few interesting situations we
+ // need to account for:
+ //
+ // - The thread has migrated to a different CPU. If this is the case, then
+ // we write down the following records:
+ //
+ // 1. A 'NewCPUId' Metadata record.
+ // 2. A FunctionRecord with a 0 for the TSCDelta field.
+ //
+ // - The TSC delta is greater than the 32 bits we can store in a
+ // FunctionRecord. In this case we write down the following records:
+ //
+ // 1. A 'TSCWrap' Metadata record.
+ // 2. A FunctionRecord with a 0 for the TSCDelta field.
+ //
+ // - The TSC delta is representable within the 32 bits we can store in a
+ // FunctionRecord. In this case we write down just a FunctionRecord with
+ // the correct TSC delta.
+ //
+
+ uint32_t RecordTSCDelta = 0;
+ if (CPU != CurrentCPU) {
+ // We've moved to a new CPU.
+ writeNewCPUIdMetadata(CPU, TSC);
+ } else {
+ // If the delta is greater than the range for a uint32_t, then we write out
+ // the TSC wrap metadata entry with the full TSC, and the TSC for the
+ // function record be 0.
+ auto Delta = TSC - LastTSC;
+ if (Delta > (1ULL << 32) - 1)
+ writeTSCWrapMetadata(TSC);
+ else
+ RecordTSCDelta = Delta;
+ }
+
+ // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid
+ // us in future computations of this TSC delta value.
+ LastTSC = TSC;
+ CurrentCPU = CPU;
+
+ writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
+
+ // If we've exhausted the buffer by this time, we then release the buffer to
+ // make sure that other threads may start using this buffer.
+ if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
+ writeEOBMetadata();
+ if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
+ Report("Failed releasing buffer at %p; error=%s\n", Buffer.Buffer,
+ EC.message().c_str());
+ return;
+ }
+ RecordPtr = nullptr;
+ }
+}
+
+} // namespace __xray_fdr_internal
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H
OpenPOWER on IntegriCloud