summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Support/TimeProfiler.h6
-rw-r--r--llvm/lib/Support/TimeProfiler.cpp97
2 files changed, 85 insertions, 18 deletions
diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h
index 678f8c13681..f43893a6d93 100644
--- a/llvm/include/llvm/Support/TimeProfiler.h
+++ b/llvm/include/llvm/Support/TimeProfiler.h
@@ -14,7 +14,8 @@
namespace llvm {
struct TimeTraceProfiler;
-extern TimeTraceProfiler *TimeTraceProfilerInstance;
+extern thread_local std::unique_ptr<TimeTraceProfiler>
+ TimeTraceProfilerInstance;
/// Initialize the time trace profiler.
/// This sets up the global \p TimeTraceProfilerInstance
@@ -25,6 +26,9 @@ void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
/// Cleanup the time trace profiler, if it was initialized.
void timeTraceProfilerCleanup();
+/// Finish a time trace profiler running on a worker thread.
+void timeTraceProfilerFinishThread();
+
/// Is the time trace profiler enabled, i.e. initialized?
inline bool timeTraceProfilerEnabled() {
return TimeTraceProfilerInstance != nullptr;
diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp
index a7c85509064..f2ede75ccf9 100644
--- a/llvm/lib/Support/TimeProfiler.cpp
+++ b/llvm/lib/Support/TimeProfiler.cpp
@@ -15,6 +15,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Threading.h"
+#include <algorithm>
#include <cassert>
#include <chrono>
#include <string>
@@ -22,9 +24,16 @@
using namespace std::chrono;
+namespace {
+std::mutex Mu;
+std::vector<std::unique_ptr<llvm::TimeTraceProfiler>>
+ ThreadTimeTraceProfilerInstances; // guarded by Mu
+} // namespace
+
namespace llvm {
-TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
+thread_local std::unique_ptr<TimeTraceProfiler> TimeTraceProfilerInstance =
+ nullptr;
typedef duration<steady_clock::rep, steady_clock::period> DurationType;
typedef time_point<steady_clock> TimePointType;
@@ -61,7 +70,7 @@ struct Entry {
struct TimeTraceProfiler {
TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
: StartTime(steady_clock::now()), ProcName(ProcName),
- TimeTraceGranularity(TimeTraceGranularity) {}
+ Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {}
void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
@@ -103,22 +112,31 @@ struct TimeTraceProfiler {
Stack.pop_back();
}
+ // Write events from this TimeTraceProfilerInstance and
+ // ThreadTimeTraceProfilerInstances.
void Write(raw_pwrite_stream &OS) {
+ // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
+ std::lock_guard<std::mutex> Lock(Mu);
assert(Stack.empty() &&
"All profiler sections should be ended when calling Write");
+ assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(),
+ ThreadTimeTraceProfilerInstances.end(),
+ [](const auto &TTP) { return TTP->Stack.empty(); }) &&
+ "All profiler sections should be ended when calling Write");
+
json::OStream J(OS);
J.objectBegin();
J.attributeBegin("traceEvents");
J.arrayBegin();
// Emit all events for the main flame graph.
- for (const auto &E : Entries) {
+ auto writeEvent = [&](auto &E, uint64_t Tid) {
auto StartUs = E.getFlameGraphStartUs(StartTime);
auto DurUs = E.getFlameGraphDurUs();
J.object([&]{
J.attribute("pid", 1);
- J.attribute("tid", 0);
+ J.attribute("tid", int64_t(Tid));
J.attribute("ph", "X");
J.attribute("ts", StartUs);
J.attribute("dur", DurUs);
@@ -127,39 +145,73 @@ struct TimeTraceProfiler {
J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
}
});
+ };
+ for (const auto &E : Entries) {
+ writeEvent(E, this->Tid);
+ }
+ for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
+ for (const auto &E : TTP->Entries) {
+ writeEvent(E, TTP->Tid);
+ }
}
// Emit totals by section name as additional "thread" events, sorted from
// longest one.
- int Tid = 1;
+ // Find highest used thread id.
+ uint64_t MaxTid = this->Tid;
+ for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
+ MaxTid = std::max(MaxTid, TTP->Tid);
+ }
+
+ // Combine all CountAndTotalPerName from threads into one.
+ StringMap<CountAndDurationType> AllCountAndTotalPerName;
+ auto combineStat = [&](auto &Stat) {
+ std::string Key = Stat.getKey();
+ auto Value = Stat.getValue();
+ auto &CountAndTotal = AllCountAndTotalPerName[Key];
+ CountAndTotal.first += Value.first;
+ CountAndTotal.second += Value.second;
+ };
+ for (const auto &Stat : CountAndTotalPerName) {
+ combineStat(Stat);
+ }
+ for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
+ for (const auto &Stat : TTP->CountAndTotalPerName) {
+ combineStat(Stat);
+ }
+ }
+
std::vector<NameAndCountAndDurationType> SortedTotals;
- SortedTotals.reserve(CountAndTotalPerName.size());
- for (const auto &E : CountAndTotalPerName)
- SortedTotals.emplace_back(E.getKey(), E.getValue());
+ SortedTotals.reserve(AllCountAndTotalPerName.size());
+ for (const auto &Total : AllCountAndTotalPerName)
+ SortedTotals.emplace_back(Total.getKey(), Total.getValue());
llvm::sort(SortedTotals.begin(), SortedTotals.end(),
[](const NameAndCountAndDurationType &A,
const NameAndCountAndDurationType &B) {
return A.second.second > B.second.second;
});
- for (const auto &E : SortedTotals) {
- auto DurUs = duration_cast<microseconds>(E.second.second).count();
- auto Count = CountAndTotalPerName[E.first].first;
+
+ // Report totals on separate threads of tracing file.
+ uint64_t TotalTid = MaxTid + 1;
+ for (const auto &Total : SortedTotals) {
+ auto DurUs = duration_cast<microseconds>(Total.second.second).count();
+ auto Count = AllCountAndTotalPerName[Total.first].first;
J.object([&]{
J.attribute("pid", 1);
- J.attribute("tid", Tid);
+ J.attribute("tid", int64_t(TotalTid));
J.attribute("ph", "X");
J.attribute("ts", 0);
J.attribute("dur", DurUs);
- J.attribute("name", "Total " + E.first);
+ J.attribute("name", "Total " + Total.first);
J.attributeObject("args", [&] {
J.attribute("count", int64_t(Count));
J.attribute("avg ms", int64_t(DurUs / Count / 1000));
});
});
- ++Tid;
+ ++TotalTid;
}
// Emit metadata event with process name.
@@ -183,6 +235,7 @@ struct TimeTraceProfiler {
StringMap<CountAndDurationType> CountAndTotalPerName;
const TimePointType StartTime;
const std::string ProcName;
+ const uint64_t Tid;
// Minimum time granularity (in microseconds)
const unsigned TimeTraceGranularity;
@@ -192,13 +245,23 @@ void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
StringRef ProcName) {
assert(TimeTraceProfilerInstance == nullptr &&
"Profiler should not be initialized");
- TimeTraceProfilerInstance = new TimeTraceProfiler(
+ TimeTraceProfilerInstance = std::make_unique<TimeTraceProfiler>(
TimeTraceGranularity, llvm::sys::path::filename(ProcName));
}
+// Removes all TimeTraceProfilerInstances.
void timeTraceProfilerCleanup() {
- delete TimeTraceProfilerInstance;
- TimeTraceProfilerInstance = nullptr;
+ TimeTraceProfilerInstance.reset();
+ std::lock_guard<std::mutex> Lock(Mu);
+ ThreadTimeTraceProfilerInstances.clear();
+}
+
+// Finish TimeTraceProfilerInstance on a worker thread.
+// This doesn't remove the instance, just moves the pointer to global vector.
+void timeTraceProfilerFinishThread() {
+ std::lock_guard<std::mutex> Lock(Mu);
+ ThreadTimeTraceProfilerInstances.push_back(
+ std::move(TimeTraceProfilerInstance));
}
void timeTraceProfilerWrite(raw_pwrite_stream &OS) {
OpenPOWER on IntegriCloud