diff options
author | Diego Novillo <dnovillo@google.com> | 2014-10-30 18:00:06 +0000 |
---|---|---|
committer | Diego Novillo <dnovillo@google.com> | 2014-10-30 18:00:06 +0000 |
commit | c572e92c763f44bddfa30bc43e9027547b72dbfa (patch) | |
tree | aeabaa930d4db8badc1d18835bc8eaebc4dfb4ee /llvm/lib | |
parent | 98b6546dded3a531dcab29318ad44169215915b0 (diff) | |
download | bcm5719-llvm-c572e92c763f44bddfa30bc43e9027547b72dbfa.tar.gz bcm5719-llvm-c572e92c763f44bddfa30bc43e9027547b72dbfa.zip |
Add profile writing capabilities for sampling profiles.
Summary:
This patch finishes up support for handling sampling profiles in both
text and binary formats. The new binary format uses uleb128 encoding to
represent numeric values. This makes profiles files about 25% smaller.
The profile writer class can write profiles in the existing text and the
new binary format. In subsequent patches, I will add the capability to
read (and perhaps write) profiles in the gcov format used by GCC.
Additionally, I will be adding support in llvm-profdata to manipulate
sampling profiles.
There was a bit of refactoring needed to separate some code that was in
the reader files, but is actually common to both the reader and writer.
The new test checks that reading the same profile encoded as text or
raw, produces the same results.
Reviewers: bogner, dexonsmith
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D6000
llvm-svn: 220915
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/ProfileData/CMakeLists.txt | 2 | ||||
-rw-r--r-- | llvm/lib/ProfileData/SampleProf.cpp | 49 | ||||
-rw-r--r-- | llvm/lib/ProfileData/SampleProfReader.cpp | 250 | ||||
-rw-r--r-- | llvm/lib/ProfileData/SampleProfWriter.cpp | 110 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SampleProfile.cpp | 10 |
5 files changed, 381 insertions, 40 deletions
diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt index 074e1a920b4..b9d472d99e7 100644 --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -5,5 +5,7 @@ add_llvm_library(LLVMProfileData CoverageMapping.cpp CoverageMappingWriter.cpp CoverageMappingReader.cpp + SampleProf.cpp SampleProfReader.cpp + SampleProfWriter.cpp ) diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp new file mode 100644 index 00000000000..8bd2249f57e --- /dev/null +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -0,0 +1,49 @@ +//=-- SampleProf.cpp - Sample profiling format support --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains common definitions used in the reading and writing of +// sample profile data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +namespace { +class SampleProfErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.sampleprof"; } + std::string message(int IE) const override { + sampleprof_error E = static_cast<sampleprof_error>(IE); + switch (E) { + case sampleprof_error::success: + return "Success"; + case sampleprof_error::bad_magic: + return "Invalid file format (bad magic)"; + case sampleprof_error::unsupported_version: + return "Unsupported format version"; + case sampleprof_error::too_large: + return "Too much profile data"; + case sampleprof_error::truncated: + return "Truncated profile data"; + case sampleprof_error::malformed: + return "Malformed profile data"; + } + llvm_unreachable("A value of sampleprof_error has no message."); + } +}; +} + +static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory; + +const std::error_category &llvm::sampleprof_category() { + return *ErrorCategory; +} diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 963e05679ae..df4be83f5f8 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// // // This file implements the class that reads LLVM sample profiles. It -// supports two file formats: text and bitcode. The textual representation -// is useful for debugging and testing purposes. The bitcode representation +// supports two file formats: text and binary. The textual representation +// is useful for debugging and testing purposes. The binary representation // is more compact, resulting in smaller file sizes. However, they can // both be used interchangeably. // @@ -95,13 +95,15 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/ProfileData/SampleProfWriter.h" // REMOVE #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" -using namespace sampleprof; +using namespace llvm::sampleprof; using namespace llvm; /// \brief Print the samples collected for a function on stream \p OS. @@ -112,10 +114,22 @@ void FunctionSamples::print(raw_ostream &OS) { << " sampled lines\n"; for (BodySampleMap::const_iterator SI = BodySamples.begin(), SE = BodySamples.end(); - SI != SE; ++SI) - OS << "\tline offset: " << SI->first.LineOffset - << ", discriminator: " << SI->first.Discriminator - << ", number of samples: " << SI->second << "\n"; + SI != SE; ++SI) { + LineLocation Loc = SI->first; + SampleRecord Sample = SI->second; + OS << "\tline offset: " << Loc.LineOffset + << ", discriminator: " << Loc.Discriminator + << ", number of samples: " << Sample.getSamples(); + if (Sample.hasCalls()) { + OS << ", calls:"; + for (SampleRecord::CallTargetList::const_iterator + I = Sample.getCallTargets().begin(), + E = Sample.getCallTargets().end(); + I != E; ++I) + OS << " " << (*I).first << ":" << (*I).second; + } + OS << "\n"; + } OS << "\n"; } @@ -125,7 +139,7 @@ void FunctionSamples::print(raw_ostream &OS) { /// \param FName Name of the function to print. void SampleProfileReader::printFunctionProfile(raw_ostream &OS, StringRef FName) { - OS << "Function: " << FName << ":\n"; + OS << "Function: " << FName << ": "; Profiles[FName].print(OS); } @@ -150,22 +164,15 @@ void SampleProfileReader::dump() { /// the expected format. /// /// \returns true if the file was loaded successfully, false otherwise. -bool SampleProfileReader::loadText() { - ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = - MemoryBuffer::getFile(Filename); - if (std::error_code EC = BufferOrErr.getError()) { - std::string Msg(EC.message()); - M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg)); - return false; - } - MemoryBuffer &Buffer = *BufferOrErr.get(); - line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); +std::error_code SampleProfileReaderText::read() { + line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); // Read the profile of each function. Since each function may be // mentioned more than once, and we are collecting flat profiles, // accumulate samples as we parse them. Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$"); - Regex LineSample("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$"); + Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$"); + Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)"); while (!LineIt.is_at_eof()) { // Read the header of each function. // @@ -179,11 +186,11 @@ bool SampleProfileReader::loadText() { // // The only requirement we place on the identifier, then, is that it // should not begin with a number. - SmallVector<StringRef, 3> Matches; + SmallVector<StringRef, 4> Matches; if (!HeadRE.match(*LineIt, &Matches)) { reportParseError(LineIt.line_number(), "Expected 'mangled_name:NUM:NUM', found " + *LineIt); - return false; + return sampleprof_error::malformed; } assert(Matches.size() == 4); StringRef FName = Matches[1]; @@ -199,11 +206,11 @@ bool SampleProfileReader::loadText() { // Now read the body. The body of the function ends when we reach // EOF or when we see the start of the next function. while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) { - if (!LineSample.match(*LineIt, &Matches)) { + if (!LineSampleRE.match(*LineIt, &Matches)) { reportParseError( LineIt.line_number(), "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt); - return false; + return sampleprof_error::malformed; } assert(Matches.size() == 5); unsigned LineOffset, NumSamples, Discriminator = 0; @@ -212,27 +219,194 @@ bool SampleProfileReader::loadText() { Matches[2].getAsInteger(10, Discriminator); Matches[3].getAsInteger(10, NumSamples); - // FIXME: Handle called targets (in Matches[4]). + // If there are function calls in this line, generate a call sample + // entry for each call. + std::string CallsLine(Matches[4]); + while (CallsLine != "") { + SmallVector<StringRef, 3> CallSample; + if (!CallSampleRE.match(CallsLine, &CallSample)) { + reportParseError(LineIt.line_number(), + "Expected 'mangled_name:NUM', found " + CallsLine); + return sampleprof_error::malformed; + } + StringRef CalledFunction = CallSample[1]; + unsigned CalledFunctionSamples; + CallSample[2].getAsInteger(10, CalledFunctionSamples); + FProfile.addCalledTargetSamples(LineOffset, Discriminator, + CalledFunction, CalledFunctionSamples); + CallsLine = CallSampleRE.sub("", CallsLine); + } - // When dealing with instruction weights, we use the value - // zero to indicate the absence of a sample. If we read an - // actual zero from the profile file, return it as 1 to - // avoid the confusion later on. - if (NumSamples == 0) - NumSamples = 1; FProfile.addBodySamples(LineOffset, Discriminator, NumSamples); ++LineIt; } } - return true; + return sampleprof_error::success; +} + +template <typename T> +ErrorOr<T> SampleProfileReaderBinary::readNumber() { + unsigned NumBytesRead = 0; + std::error_code EC; + uint64_t Val = decodeULEB128(Data, &NumBytesRead); + + if (Val > std::numeric_limits<T>::max()) + EC = sampleprof_error::malformed; + else if (Data + NumBytesRead > End) + EC = sampleprof_error::truncated; + else + EC = sampleprof_error::success; + + if (EC) { + reportParseError(0, EC.message()); + return EC; + } + + Data += NumBytesRead; + return static_cast<T>(Val); +} + +ErrorOr<StringRef> SampleProfileReaderBinary::readString() { + std::error_code EC; + StringRef Str(reinterpret_cast<const char *>(Data)); + if (Data + Str.size() + 1 > End) { + EC = sampleprof_error::truncated; + reportParseError(0, EC.message()); + return EC; + } + + Data += Str.size() + 1; + return Str; +} + +std::error_code SampleProfileReaderBinary::read() { + while (!at_eof()) { + auto FName(readString()); + if (std::error_code EC = FName.getError()) + return EC; + + Profiles[*FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[*FName]; + + auto Val = readNumber<unsigned>(); + if (std::error_code EC = Val.getError()) + return EC; + FProfile.addTotalSamples(*Val); + + Val = readNumber<unsigned>(); + if (std::error_code EC = Val.getError()) + return EC; + FProfile.addHeadSamples(*Val); + + // Read the samples in the body. + auto NumRecords = readNumber<unsigned>(); + if (std::error_code EC = NumRecords.getError()) + return EC; + for (unsigned I = 0; I < *NumRecords; ++I) { + auto LineOffset = readNumber<uint64_t>(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + auto Discriminator = readNumber<uint64_t>(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + auto NumSamples = readNumber<uint64_t>(); + if (std::error_code EC = NumSamples.getError()) + return EC; + + auto NumCalls = readNumber<unsigned>(); + if (std::error_code EC = NumCalls.getError()) + return EC; + + for (unsigned J = 0; J < *NumCalls; ++J) { + auto CalledFunction(readString()); + if (std::error_code EC = CalledFunction.getError()) + return EC; + + auto CalledFunctionSamples = readNumber<uint64_t>(); + if (std::error_code EC = CalledFunctionSamples.getError()) + return EC; + + FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + *CalledFunction, + *CalledFunctionSamples); + } + + FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + } + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderBinary::readHeader() { + Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); + End = Data + Buffer->getBufferSize(); + + // Read and check the magic identifier. + auto Magic = readNumber<uint64_t>(); + if (std::error_code EC = Magic.getError()) + return EC; + else if (*Magic != SPMagic()) + return sampleprof_error::bad_magic; + + // Read the version number. + auto Version = readNumber<uint64_t>(); + if (std::error_code EC = Version.getError()) + return EC; + else if (*Version != SPVersion()) + return sampleprof_error::unsupported_version; + + return sampleprof_error::success; } -/// \brief Load execution samples from a file. +bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) { + const uint8_t *Data = + reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); + uint64_t Magic = decodeULEB128(Data); + return Magic == SPMagic(); +} + +/// \brief Prepare a memory buffer for the contents of \p Filename. +/// +/// \returns an error code indicating the status of the buffer. +static std::error_code +setupMemoryBuffer(std::string Filename, std::unique_ptr<MemoryBuffer> &Buffer) { + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BufferOrErr.getError()) + return EC; + Buffer = std::move(BufferOrErr.get()); + + // Sanity check the file. + if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) + return sampleprof_error::too_large; + + return sampleprof_error::success; +} + +/// \brief Create a sample profile reader based on the format of the input file. +/// +/// \param Filename The file to open. +/// +/// \param Reader The reader to instantiate according to \p Filename's format. /// -/// This function examines the header of the given file to determine -/// whether to use the text or the bitcode loader. -bool SampleProfileReader::load() { - // TODO Actually detect the file format. - return loadText(); +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \returns an error code indicating the status of the created reader. +std::error_code +SampleProfileReader::create(std::string Filename, + std::unique_ptr<SampleProfileReader> &Reader, + LLVMContext &C) { + std::unique_ptr<MemoryBuffer> Buffer; + if (std::error_code EC = setupMemoryBuffer(Filename, Buffer)) + return EC; + + if (SampleProfileReaderBinary::hasFormat(*Buffer)) + Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C)); + else + Reader.reset(new SampleProfileReaderText(std::move(Buffer), C)); + + return Reader->readHeader(); } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp new file mode 100644 index 00000000000..ab2e7617762 --- /dev/null +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -0,0 +1,110 @@ +//===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that writes LLVM sample profiles. It +// supports two file formats: text and binary. The textual representation +// is useful for debugging and testing purposes. The binary representation +// is more compact, resulting in smaller file sizes. However, they can +// both be used interchangeably. +// +// See lib/ProfileData/SampleProfReader.cpp for documentation on each of the +// supported formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProfWriter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/Regex.h" + +using namespace llvm::sampleprof; +using namespace llvm; + +/// \brief Write samples to a text file. +bool SampleProfileWriterText::write(const Function &F, + const FunctionSamples &S) { + if (S.empty()) + return true; + + OS << F.getName() << ":" << S.getTotalSamples() << ":" << S.getHeadSamples() + << "\n"; + + for (BodySampleMap::const_iterator I = S.getBodySamples().begin(), + E = S.getBodySamples().end(); + I != E; ++I) { + LineLocation Loc = I->first; + SampleRecord Sample = I->second; + if (Loc.Discriminator == 0) + OS << Loc.LineOffset << ": "; + else + OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; + + OS << Sample.getSamples(); + + for (SampleRecord::CallTargetList::const_iterator + I = Sample.getCallTargets().begin(), + E = Sample.getCallTargets().end(); + I != E; ++I) + OS << " " << (*I).first << ":" << (*I).second; + OS << "\n"; + } + + return true; +} + +SampleProfileWriterBinary::SampleProfileWriterBinary(StringRef F, + std::error_code &EC) + : SampleProfileWriter(F, EC, sys::fs::F_None) { + if (EC) + return; + + // Write the file header. + encodeULEB128(SPMagic(), OS); + encodeULEB128(SPVersion(), OS); +} + +/// \brief Write samples to a binary file. +/// +/// \returns true if the samples were written successfully, false otherwise. +bool SampleProfileWriterBinary::write(const Function &F, + const FunctionSamples &S) { + if (S.empty()) + return true; + + OS << F.getName(); + encodeULEB128(0, OS); + encodeULEB128(S.getTotalSamples(), OS); + encodeULEB128(S.getHeadSamples(), OS); + encodeULEB128(S.getBodySamples().size(), OS); + for (BodySampleMap::const_iterator I = S.getBodySamples().begin(), + E = S.getBodySamples().end(); + I != E; ++I) { + LineLocation Loc = I->first; + SampleRecord Sample = I->second; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + encodeULEB128(Sample.getSamples(), OS); + encodeULEB128(Sample.getCallTargets().size(), OS); + for (SampleRecord::CallTargetList::const_iterator + I = Sample.getCallTargets().begin(), + E = Sample.getCallTargets().end(); + I != E; ++I) { + std::string Callee = (*I).first; + unsigned CalleeSamples = (*I).second; + OS << Callee; + encodeULEB128(0, OS); + encodeULEB128(CalleeSamples, OS); + } + } + + return true; +} diff --git a/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/llvm/lib/Transforms/Scalar/SampleProfile.cpp index fedbcf3a7c0..8d5480c7cb5 100644 --- a/llvm/lib/Transforms/Scalar/SampleProfile.cpp +++ b/llvm/lib/Transforms/Scalar/SampleProfile.cpp @@ -737,8 +737,14 @@ INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile", "Sample Profile loader", false, false) bool SampleProfileLoader::doInitialization(Module &M) { - Reader.reset(new SampleProfileReader(M, Filename)); - ProfileIsValid = Reader->load(); + if (std::error_code EC = + SampleProfileReader::create(Filename, Reader, M.getContext())) { + std::string Msg = "Could not open profile: " + EC.message(); + DiagnosticInfoSampleProfile Diag(Filename.data(), Msg); + M.getContext().diagnose(Diag); + return false; + } + ProfileIsValid = (Reader->read() == sampleprof_error::success); return true; } |