diff options
Diffstat (limited to 'llvm/lib/ProfileData/SampleProfReader.cpp')
-rw-r--r-- | llvm/lib/ProfileData/SampleProfReader.cpp | 233 |
1 files changed, 168 insertions, 65 deletions
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 70964cb27e1..9b579e6319d 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -25,14 +25,22 @@ // Each section has the following format // // function1:total_samples:total_head_samples -// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] -// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] -// ... -// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] +// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] +// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] +// ... +// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] +// offsetA[.discriminator]: fnA:num_of_total_samples +// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ] +// ... // -// The file may contain blank lines between sections and within a -// section. However, the spacing within a single line is fixed. Additional -// spaces will result in an error while reading the file. +// This is a nested tree in which the identation represent the nest level +// of the inline stack. There is no blank line in the file. And the spacing +// within a single line is fixed. Additional spaces will result in an error +// while reading the file. +// +// Inline stack is a stack of source locations in which the top of the stack +// represents the leaf function, and the bottom of the stack represents the +// actual symbol in which the instruction belongs. // // Function names must be mangled in order for the profile loader to // match them in the current translation unit. The two numbers in the @@ -41,6 +49,11 @@ // in the prologue of the function (second number). This head sample // count provides an indicator of how frequently the function is invoked. // +// There are two types of lines in the function body. +// +// * Sampled line represents the profile information of a source location. +// * Callsite line represents the profile inofrmation of a callsite. +// // Each sampled line may contain several items. Some are optional (marked // below): // @@ -92,6 +105,16 @@ // instruction that calls one of ``foo()``, ``bar()`` and ``baz()``, // with ``baz()`` being the relatively more frequently called target. // +// Each callsite line may contain several items. Some are optional. +// +// a. Source line offset. This number represents the line number of the +// callsite that is inlined in the profiled binary. +// +// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line. +// +// c. Number of samples. This is an integer quantity representing the +// total number of samples collected for the inlined instance at this +// callsite //===----------------------------------------------------------------------===// #include "llvm/ProfileData/SampleProfReader.h" @@ -100,7 +123,8 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm::sampleprof; using namespace llvm; @@ -143,6 +167,96 @@ void SampleProfileReader::dump(raw_ostream &OS) { dumpFunctionProfile(I.getKey(), OS); } +/// \brief Parse \p Input as function head. +/// +/// Parse one line of \p Input, and update function name in \p FName, +/// function's total sample count in \p NumSamples, function's entry +/// count in \p NumHeadSamples. +/// +/// \returns true if parsing is successful. +static bool ParseHead(const StringRef &Input, StringRef &FName, + unsigned &NumSamples, unsigned &NumHeadSamples) { + if (Input[0] == ' ') + return false; + size_t n2 = Input.rfind(':'); + size_t n1 = Input.rfind(':', n2 - 1); + FName = Input.substr(0, n1); + if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) + return false; + if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) + return false; + return true; +} + +/// \brief Parse \p Input as line sample. +/// +/// \param Input input line. +/// \param IsCallsite true if the line represents an inlined callsite. +/// \param Depth the depth of the inline stack. +/// \param NumSamples total samples of the line/inlined callsite. +/// \param LineOffset line offset to the start of the function. +/// \param Discriminator discriminator of the line. +/// \param TargetCountMap map from indirect call target to count. +/// +/// returns true if parsing is successful. +static bool ParseLine(const StringRef &Input, bool &IsCallsite, unsigned &Depth, + unsigned &NumSamples, unsigned &LineOffset, + unsigned &Discriminator, StringRef &CalleeName, + DenseMap<StringRef, unsigned> &TargetCountMap) { + for (Depth = 0; Input[Depth] == ' '; Depth++) + ; + if (Depth == 0) + return false; + + size_t n1 = Input.find(':'); + StringRef Loc = Input.substr(Depth, n1 - Depth); + size_t n2 = Loc.find('.'); + if (n2 == StringRef::npos) { + if (Loc.getAsInteger(10, LineOffset)) + return false; + Discriminator = 0; + } else { + if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) + return false; + if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) + return false; + } + + StringRef Rest = Input.substr(n1 + 2); + if (Rest[0] >= '0' && Rest[0] <= '9') { + IsCallsite = false; + size_t n3 = Rest.find(' '); + if (n3 == StringRef::npos) { + if (Rest.getAsInteger(10, NumSamples)) + return false; + } else { + if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) + return false; + } + while (n3 != StringRef::npos) { + n3 += Rest.substr(n3).find_first_not_of(' '); + Rest = Rest.substr(n3); + n3 = Rest.find(' '); + StringRef pair = Rest; + if (n3 != StringRef::npos) { + pair = Rest.substr(0, n3); + } + int n4 = pair.find(':'); + unsigned count; + if (pair.substr(n4 + 1).getAsInteger(10, count)) + return false; + TargetCountMap[pair.substr(0, n4)] = count; + } + } else { + IsCallsite = true; + int n3 = Rest.find_last_of(':'); + CalleeName = Rest.substr(0, n3); + if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) + return false; + } + return true; +} + /// \brief Load samples from a text file. /// /// See the documentation at the top of the file for an explanation of @@ -152,13 +266,11 @@ void SampleProfileReader::dump(raw_ostream &OS) { std::error_code SampleProfileReaderText::read() { line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); - // Read the profile of each function. Since each function may be - // mentioned more than once, and we are collecting flat profiles, - // accumulate samples as we parse them. - Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$"); - Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$"); - Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)"); - while (!LineIt.is_at_eof()) { + SmallVector<FunctionSamples *, 10> InlineStack; + + for (; !LineIt.is_at_eof(); ++LineIt) { + if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') + continue; // Read the header of each function. // // Note that for function identifiers we are actually expecting @@ -171,59 +283,52 @@ std::error_code SampleProfileReaderText::read() { // // The only requirement we place on the identifier, then, is that it // should not begin with a number. - SmallVector<StringRef, 4> Matches; - if (!HeadRE.match(*LineIt, &Matches)) { - reportError(LineIt.line_number(), - "Expected 'mangled_name:NUM:NUM', found " + *LineIt); - return sampleprof_error::malformed; - } - assert(Matches.size() == 4); - StringRef FName = Matches[1]; - unsigned NumSamples, NumHeadSamples; - Matches[2].getAsInteger(10, NumSamples); - Matches[3].getAsInteger(10, NumHeadSamples); - Profiles[FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[FName]; - FProfile.addTotalSamples(NumSamples); - FProfile.addHeadSamples(NumHeadSamples); - ++LineIt; - - // Now read the body. The body of the function ends when we reach - // EOF or when we see the start of the next function. - while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) { - if (!LineSampleRE.match(*LineIt, &Matches)) { + if ((*LineIt)[0] != ' ') { + unsigned NumSamples, NumHeadSamples; + StringRef FName; + if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { + reportError(LineIt.line_number(), + "Expected 'mangled_name:NUM:NUM', found " + *LineIt); + return sampleprof_error::malformed; + } + Profiles[FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FName]; + FProfile.addTotalSamples(NumSamples); + FProfile.addHeadSamples(NumHeadSamples); + InlineStack.clear(); + InlineStack.push_back(&FProfile); + } else { + unsigned NumSamples; + StringRef FName; + DenseMap<StringRef, unsigned> TargetCountMap; + bool IsCallsite; + unsigned Depth, LineOffset, Discriminator; + if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset, + Discriminator, FName, TargetCountMap)) { reportError(LineIt.line_number(), "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt); return sampleprof_error::malformed; } - assert(Matches.size() == 5); - unsigned LineOffset, NumSamples, Discriminator = 0; - Matches[1].getAsInteger(10, LineOffset); - if (Matches[2] != "") - Matches[2].getAsInteger(10, Discriminator); - Matches[3].getAsInteger(10, NumSamples); - - // If there are function calls in this line, generate a call sample - // entry for each call. - std::string CallsLine(Matches[4]); - while (CallsLine != "") { - SmallVector<StringRef, 3> CallSample; - if (!CallSampleRE.match(CallsLine, &CallSample)) { - reportError(LineIt.line_number(), - "Expected 'mangled_name:NUM', found " + CallsLine); - return sampleprof_error::malformed; + if (IsCallsite) { + while (InlineStack.size() > Depth) { + InlineStack.pop_back(); + } + FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( + CallsiteLocation(LineOffset, Discriminator, FName)); + FSamples.addTotalSamples(NumSamples); + InlineStack.push_back(&FSamples); + } else { + while (InlineStack.size() > Depth) { + InlineStack.pop_back(); + } + FunctionSamples &FProfile = *InlineStack.back(); + for (const auto &name_count : TargetCountMap) { + FProfile.addCalledTargetSamples(LineOffset, Discriminator, + name_count.first, name_count.second); } - StringRef CalledFunction = CallSample[1]; - unsigned CalledFunctionSamples; - CallSample[2].getAsInteger(10, CalledFunctionSamples); - FProfile.addCalledTargetSamples(LineOffset, Discriminator, - CalledFunction, CalledFunctionSamples); - CallsLine = CallSampleRE.sub("", CallsLine); + FProfile.addBodySamples(LineOffset, Discriminator, NumSamples); } - - FProfile.addBodySamples(LineOffset, Discriminator, NumSamples); - ++LineIt; } } @@ -474,7 +579,6 @@ std::error_code SampleProfileReaderGCC::addSourceCount(StringRef Name, return sampleprof_error::success; } - std::error_code SampleProfileReaderGCC::readOneFunctionProfile(const SourceStack &Stack, bool Update) { @@ -577,7 +681,6 @@ std::error_code SampleProfileReaderGCC::readWorkingSet() { return sampleprof_error::not_implemented; } - /// \brief Read a GCC AutoFDO profile. /// /// This format is generated by the Linux Perf conversion tool at @@ -591,8 +694,8 @@ std::error_code SampleProfileReaderGCC::read() { if (std::error_code EC = readFunctionProfiles()) return EC; - // FIXME(dnovillo) - Module groups and working set support are not - // yet implemented. +// FIXME(dnovillo) - Module groups and working set support are not +// yet implemented. #if 0 // Read the module group file. if (std::error_code EC = readModuleGroup()) |