diff options
author | Dean Michael Berris <dberris@google.com> | 2018-11-05 05:43:22 +0000 |
---|---|---|
committer | Dean Michael Berris <dberris@google.com> | 2018-11-05 05:43:22 +0000 |
commit | 1e255e7a7b3752ef7f786506e695f0f7b58e2339 (patch) | |
tree | 4207c46d48c69f0b282489ca2253d0d9c77e9075 /compiler-rt/lib/xray/tests | |
parent | 30b627e5c9a4bec17cd1ee53533fa685913971ce (diff) | |
download | bcm5719-llvm-1e255e7a7b3752ef7f786506e695f0f7b58e2339.tar.gz bcm5719-llvm-1e255e7a7b3752ef7f786506e695f0f7b58e2339.zip |
[XRay] Update TSC math to handle wraparound
Summary:
Prior to this change, we can run into situations where the TSC we're
getting when exiting a function is less than the TSC we got when
entering it. This would sometimes cause the counter for cumulative call
times overflow, which was erroneously also being stored as a signed
64-bit integer.
This change addresses both these issues while adding provisions for
tracking CPU migrations. We do this because moving from one CPU to
another doesn't guarantee that the timestamp counter for some
architectures aren't guaranteed to be synchronised. For the moment, we
leave the provisions there until we can update the data format to
include the counting of CPU migrations we can catch.
We update the necessary tests as well, ensuring that our expectations
for the cycle accounting to be met in case of counter wraparound.
Reviewers: mboerger
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D54088
llvm-svn: 346116
Diffstat (limited to 'compiler-rt/lib/xray/tests')
-rw-r--r-- | compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc | 138 | ||||
-rw-r--r-- | compiler-rt/lib/xray/tests/unit/profile_collector_test.cc | 16 |
2 files changed, 91 insertions, 63 deletions
diff --git a/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc b/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc index 049ecfb07e0..9b0f21090fb 100644 --- a/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc +++ b/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc @@ -10,9 +10,9 @@ // This file is a part of XRay, a function call tracing system. // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" - #include "xray_function_call_trie.h" +#include "gtest/gtest.h" +#include <cstdint> namespace __xray { @@ -29,26 +29,54 @@ TEST(FunctionCallTrieTest, EnterAndExitFunction) { auto A = FunctionCallTrie::InitAllocators(); FunctionCallTrie Trie(A); - Trie.enterFunction(1, 1); - Trie.exitFunction(1, 2); + uint64_t TSC = 1; + uint16_t CPU = 0; + Trie.enterFunction(1, TSC++, CPU++); + Trie.exitFunction(1, TSC++, CPU++); + const auto &R = Trie.getRoots(); - // We need a way to pull the data out. At this point, until we get a data - // collection service implemented, we're going to export the data as a list of - // roots, and manually walk through the structure ourselves. + ASSERT_EQ(R.size(), 1u); + ASSERT_EQ(R.front()->FId, 1); + ASSERT_EQ(R.front()->CallCount, 1u); + ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); +} + +TEST(FunctionCallTrieTest, HandleTSCOverflow) { + profilingFlags()->setDefaults(); + auto A = FunctionCallTrie::InitAllocators(); + FunctionCallTrie Trie(A); + Trie.enterFunction(1, std::numeric_limits<uint64_t>::max(), 0); + Trie.exitFunction(1, 1, 0); const auto &R = Trie.getRoots(); ASSERT_EQ(R.size(), 1u); ASSERT_EQ(R.front()->FId, 1); - ASSERT_EQ(R.front()->CallCount, 1); + ASSERT_EQ(R.front()->CallCount, 1u); ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); } +TEST(FunctionCallTrieTest, MaximalCumulativeTime) { + profilingFlags()->setDefaults(); + auto A = FunctionCallTrie::InitAllocators(); + FunctionCallTrie Trie(A); + + Trie.enterFunction(1, 1, 0); + Trie.exitFunction(1, 0, 0); + const auto &R = Trie.getRoots(); + + ASSERT_EQ(R.size(), 1u); + ASSERT_EQ(R.front()->FId, 1); + ASSERT_EQ(R.front()->CallCount, 1u); + ASSERT_EQ(R.front()->CumulativeLocalTime, + std::numeric_limits<uint64_t>::max() - 1); +} + TEST(FunctionCallTrieTest, MissingFunctionEntry) { profilingFlags()->setDefaults(); auto A = FunctionCallTrie::InitAllocators(); FunctionCallTrie Trie(A); - Trie.exitFunction(1, 1); + Trie.exitFunction(1, 1, 0); const auto &R = Trie.getRoots(); ASSERT_TRUE(R.empty()); @@ -58,9 +86,9 @@ TEST(FunctionCallTrieTest, NoMatchingEntersForExit) { profilingFlags()->setDefaults(); auto A = FunctionCallTrie::InitAllocators(); FunctionCallTrie Trie(A); - Trie.enterFunction(2, 1); - Trie.enterFunction(3, 3); - Trie.exitFunction(1, 5); + Trie.enterFunction(2, 1, 0); + Trie.enterFunction(3, 3, 0); + Trie.exitFunction(1, 5, 0); const auto &R = Trie.getRoots(); ASSERT_FALSE(R.empty()); @@ -71,7 +99,7 @@ TEST(FunctionCallTrieTest, MissingFunctionExit) { profilingFlags()->setDefaults(); auto A = FunctionCallTrie::InitAllocators(); FunctionCallTrie Trie(A); - Trie.enterFunction(1, 1); + Trie.enterFunction(1, 1, 0); const auto &R = Trie.getRoots(); ASSERT_FALSE(R.empty()); @@ -84,12 +112,12 @@ TEST(FunctionCallTrieTest, MultipleRoots) { FunctionCallTrie Trie(A); // Enter and exit FId = 1. - Trie.enterFunction(1, 1); - Trie.exitFunction(1, 2); + Trie.enterFunction(1, 1, 0); + Trie.exitFunction(1, 2, 0); // Enter and exit FId = 2. - Trie.enterFunction(2, 3); - Trie.exitFunction(2, 4); + Trie.enterFunction(2, 3, 0); + Trie.exitFunction(2, 4, 0); const auto &R = Trie.getRoots(); ASSERT_FALSE(R.empty()); @@ -126,11 +154,11 @@ TEST(FunctionCallTrieTest, MissingIntermediaryExit) { auto A = FunctionCallTrie::InitAllocators(); FunctionCallTrie Trie(A); - Trie.enterFunction(1, 0); - Trie.enterFunction(2, 100); - Trie.enterFunction(3, 200); - Trie.exitFunction(3, 300); - Trie.exitFunction(1, 400); + Trie.enterFunction(1, 0, 0); + Trie.enterFunction(2, 100, 0); + Trie.enterFunction(3, 200, 0); + Trie.exitFunction(3, 300, 0); + Trie.exitFunction(1, 400, 0); // What we should see at this point is all the functions in the trie in a // specific order (1 -> 2 -> 3) with the appropriate count(s) and local @@ -153,12 +181,12 @@ TEST(FunctionCallTrieTest, MissingIntermediaryExit) { // Now that we've established the preconditions, we check for specific aspects // of the nodes. - EXPECT_EQ(F3.CallCount, 1); - EXPECT_EQ(F2.CallCount, 1); - EXPECT_EQ(F1.CallCount, 1); - EXPECT_EQ(F3.CumulativeLocalTime, 100); - EXPECT_EQ(F2.CumulativeLocalTime, 300); - EXPECT_EQ(F1.CumulativeLocalTime, 100); + EXPECT_EQ(F3.CallCount, 1u); + EXPECT_EQ(F2.CallCount, 1u); + EXPECT_EQ(F1.CallCount, 1u); + EXPECT_EQ(F3.CumulativeLocalTime, 100u); + EXPECT_EQ(F2.CumulativeLocalTime, 300u); + EXPECT_EQ(F1.CumulativeLocalTime, 100u); } TEST(FunctionCallTrieTest, DeepCallStack) { @@ -168,8 +196,8 @@ TEST(FunctionCallTrieTest, DeepCallStack) { auto A = FunctionCallTrie::InitAllocators(); FunctionCallTrie Trie(A); for (int i = 0; i < 32; ++i) - Trie.enterFunction(i + 1, i); - Trie.exitFunction(1, 33); + Trie.enterFunction(i + 1, i, 0); + Trie.exitFunction(1, 33, 0); // Here, validate that we have a 32-level deep function call path from the // root (1) down to the leaf (33). @@ -178,7 +206,7 @@ TEST(FunctionCallTrieTest, DeepCallStack) { auto F = R[0]; for (int i = 0; i < 32; ++i) { EXPECT_EQ(F->FId, i + 1); - EXPECT_EQ(F->CallCount, 1); + EXPECT_EQ(F->CallCount, 1u); if (F->Callees.empty() && i != 31) FAIL() << "Empty callees for FId " << F->FId; if (i != 31) @@ -193,12 +221,12 @@ TEST(FunctionCallTrieTest, DeepCopy) { auto A = FunctionCallTrie::InitAllocators(); FunctionCallTrie Trie(A); - Trie.enterFunction(1, 0); - Trie.enterFunction(2, 1); - Trie.exitFunction(2, 2); - Trie.enterFunction(3, 3); - Trie.exitFunction(3, 4); - Trie.exitFunction(1, 5); + Trie.enterFunction(1, 0, 0); + Trie.enterFunction(2, 1, 0); + Trie.exitFunction(2, 2, 0); + Trie.enterFunction(3, 3, 0); + Trie.exitFunction(3, 4, 0); + Trie.exitFunction(1, 5, 0); // We want to make a deep copy and compare notes. auto B = FunctionCallTrie::InitAllocators(); @@ -236,20 +264,20 @@ TEST(FunctionCallTrieTest, MergeInto) { FunctionCallTrie T1(A); // 1 -> 2 -> 3 - T0.enterFunction(1, 0); - T0.enterFunction(2, 1); - T0.enterFunction(3, 2); - T0.exitFunction(3, 3); - T0.exitFunction(2, 4); - T0.exitFunction(1, 5); + T0.enterFunction(1, 0, 0); + T0.enterFunction(2, 1, 0); + T0.enterFunction(3, 2, 0); + T0.exitFunction(3, 3, 0); + T0.exitFunction(2, 4, 0); + T0.exitFunction(1, 5, 0); // 1 -> 2 -> 3 - T1.enterFunction(1, 0); - T1.enterFunction(2, 1); - T1.enterFunction(3, 2); - T1.exitFunction(3, 3); - T1.exitFunction(2, 4); - T1.exitFunction(1, 5); + T1.enterFunction(1, 0, 0); + T1.enterFunction(2, 1, 0); + T1.enterFunction(3, 2, 0); + T1.exitFunction(3, 3, 0); + T1.exitFunction(2, 4, 0); + T1.exitFunction(1, 5, 0); // We use a different allocator here to make sure that we're able to transfer // data into a FunctionCallTrie which uses a different allocator. This @@ -264,20 +292,20 @@ TEST(FunctionCallTrieTest, MergeInto) { ASSERT_EQ(Merged.getRoots().size(), 1u); const auto &R0 = *Merged.getRoots()[0]; EXPECT_EQ(R0.FId, 1); - EXPECT_EQ(R0.CallCount, 2); - EXPECT_EQ(R0.CumulativeLocalTime, 10); + EXPECT_EQ(R0.CallCount, 2u); + EXPECT_EQ(R0.CumulativeLocalTime, 10u); EXPECT_EQ(R0.Callees.size(), 1u); const auto &F1 = *R0.Callees[0].NodePtr; EXPECT_EQ(F1.FId, 2); - EXPECT_EQ(F1.CallCount, 2); - EXPECT_EQ(F1.CumulativeLocalTime, 6); + EXPECT_EQ(F1.CallCount, 2u); + EXPECT_EQ(F1.CumulativeLocalTime, 6u); EXPECT_EQ(F1.Callees.size(), 1u); const auto &F2 = *F1.Callees[0].NodePtr; EXPECT_EQ(F2.FId, 3); - EXPECT_EQ(F2.CallCount, 2); - EXPECT_EQ(F2.CumulativeLocalTime, 2); + EXPECT_EQ(F2.CallCount, 2u); + EXPECT_EQ(F2.CumulativeLocalTime, 2u); EXPECT_EQ(F2.Callees.size(), 0u); } diff --git a/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc b/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc index 67049af2cd5..f06b7027ee1 100644 --- a/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc +++ b/compiler-rt/lib/xray/tests/unit/profile_collector_test.cc @@ -121,10 +121,10 @@ TEST(profileCollectorServiceTest, PostSerializeCollect) { FunctionCallTrie T(Allocators); // Then, we populate the trie with some data. - T.enterFunction(1, 1); - T.enterFunction(2, 2); - T.exitFunction(2, 3); - T.exitFunction(1, 4); + T.enterFunction(1, 1, 0); + T.enterFunction(2, 2, 0); + T.exitFunction(2, 3, 0); + T.exitFunction(1, 4, 0); // Then we post the data to the global profile collector service. profileCollectorService::post(T, 1); @@ -177,10 +177,10 @@ void threadProcessing() { thread_local auto Allocators = FunctionCallTrie::InitAllocators(); FunctionCallTrie T(Allocators); - T.enterFunction(1, 1); - T.enterFunction(2, 2); - T.exitFunction(2, 3); - T.exitFunction(1, 4); + T.enterFunction(1, 1, 0); + T.enterFunction(2, 2, 0); + T.exitFunction(2, 3, 0); + T.exitFunction(1, 4, 0); profileCollectorService::post(T, GetTid()); } |