diff options
| author | Francis Visoiu Mistrih <francisvm@yahoo.com> | 2019-04-24 00:06:24 +0000 |
|---|---|---|
| committer | Francis Visoiu Mistrih <francisvm@yahoo.com> | 2019-04-24 00:06:24 +0000 |
| commit | 7fee2b89fd6e5101bc590e0741f4d7a82b7715e1 (patch) | |
| tree | 6358f4d132215ab591592762043c3a8fffb2ee31 /llvm/lib | |
| parent | 53796d9439018b97a0e6f35af0ba83843a7270e7 (diff) | |
| download | bcm5719-llvm-7fee2b89fd6e5101bc590e0741f4d7a82b7715e1.tar.gz bcm5719-llvm-7fee2b89fd6e5101bc590e0741f4d7a82b7715e1.zip | |
[Remarks] Add string deduplication using a string table
* Add support for uniquing strings in the remark streamer and emitting the string table in the remarks section.
* Add parsing support for the string table in the RemarkParser.
From this remark:
```
--- !Missed
Pass: inline
Name: NoDefinition
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
Line: 7, Column: 3 }
Function: printArgsNoRet
Args:
- Callee: printf
- String: ' will not be inlined into '
- Caller: printArgsNoRet
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
Line: 6, Column: 0 }
- String: ' because its definition is unavailable'
...
```
to:
```
--- !Missed
Pass: 0
Name: 1
DebugLoc: { File: 3, Line: 7, Column: 3 }
Function: 2
Args:
- Callee: 4
- String: 5
- Caller: 2
DebugLoc: { File: 3, Line: 6, Column: 0 }
- String: 6
...
```
And the string table in the .remarks/__remarks section containing:
```
inline\0NoDefinition\0printArgsNoRet\0
test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c\0printf\0
will not be inlined into \0 because its definition is unavailable\0
```
This is mostly supposed to be used for testing purposes, but it gives us
a 2x reduction in the remark size, and is an incremental change for the
updates to the remarks file format.
Differential Revision: https://reviews.llvm.org/D60227
llvm-svn: 359050
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 23 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt | 2 | ||||
| -rw-r--r-- | llvm/lib/IR/DiagnosticInfo.cpp | 57 | ||||
| -rw-r--r-- | llvm/lib/IR/LLVMBuild.txt | 2 | ||||
| -rw-r--r-- | llvm/lib/IR/RemarkStreamer.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Remarks/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/Remarks/RemarkParser.cpp | 28 | ||||
| -rw-r--r-- | llvm/lib/Remarks/RemarkStringTable.cpp | 48 | ||||
| -rw-r--r-- | llvm/lib/Remarks/YAMLRemarkParser.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Remarks/YAMLRemarkParser.h | 15 |
10 files changed, 172 insertions, 20 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 987d324df2c..fc5049b9067 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1362,6 +1362,29 @@ void AsmPrinter::emitRemarksSection(Module &M) { support::endian::write64le(Version.data(), remarks::Version); OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size())); + // Emit the string table in the section. + // Note: we need to use the streamer here to emit it in the section. We can't + // just use the serialize function with a raw_ostream because of the way + // MCStreamers work. + const remarks::StringTable &StrTab = RS->getStringTable(); + std::vector<StringRef> StrTabStrings = StrTab.serialize(); + uint64_t StrTabSize = StrTab.SerializedSize; + // Emit the total size of the string table (the size itself excluded): + // little-endian uint64_t. + // The total size is located after the version number. + std::array<char, 8> StrTabSizeBuf; + support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); + OutStreamer->EmitBinaryData( + StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size())); + // Emit a list of null-terminated strings. + // Note: the order is important here: the ID used in the remarks corresponds + // to the position of the string in the section. + for (StringRef Str : StrTabStrings) { + OutStreamer->EmitBytes(Str); + // Explicitly emit a '\0'. + OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); + } + // Emit the null-terminated absolute path to the remark file. // The path is located at the offset 0x4 in the section. StringRef FilenameRef = RS->getFilename(); diff --git a/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt index 56449269681..44595a18f1b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt +++ b/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = AsmPrinter parent = Libraries -required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Support Target +required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Remarks Support Target diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp index 14bee35dc29..7c387e95724 100644 --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -43,6 +43,8 @@ using namespace llvm; +cl::opt<bool> UseStringTable("remarks-yaml-string-table", cl::init(false)); + int llvm::getNextAvailablePluginDiagnosticKind() { static std::atomic<int> PluginKindID(DK_FirstPluginKind); return ++PluginKindID; @@ -373,6 +375,20 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const { void OptimizationRemarkAnalysisFPCommute::anchor() {} void OptimizationRemarkAnalysisAliasing::anchor() {} +template <typename T> +static void mapRemarkHeader( + yaml::IO &io, T PassName, T RemarkName, DiagnosticLocation DL, + T FunctionName, Optional<uint64_t> Hotness, + SmallVectorImpl<DiagnosticInfoOptimizationBase::Argument> &Args) { + io.mapRequired("Pass", PassName); + io.mapRequired("Name", RemarkName); + if (!io.outputting() || DL.isValid()) + io.mapOptional("DebugLoc", DL); + io.mapRequired("Function", FunctionName); + io.mapOptional("Hotness", Hotness); + io.mapOptional("Args", Args); +} + namespace llvm { namespace yaml { @@ -413,13 +429,18 @@ void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping( GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName()); StringRef PassName(OptDiag->PassName); - io.mapRequired("Pass", PassName); - io.mapRequired("Name", OptDiag->RemarkName); - if (!io.outputting() || DL.isValid()) - io.mapOptional("DebugLoc", DL); - io.mapRequired("Function", FN); - io.mapOptional("Hotness", OptDiag->Hotness); - io.mapOptional("Args", OptDiag->Args); + if (UseStringTable) { + remarks::StringTable &StrTab = + reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable(); + unsigned PassID = StrTab.add(PassName).first; + unsigned NameID = StrTab.add(OptDiag->RemarkName).first; + unsigned FunctionID = StrTab.add(FN).first; + mapRemarkHeader(io, PassID, NameID, DL, FunctionID, OptDiag->Hotness, + OptDiag->Args); + } else { + mapRemarkHeader(io, PassName, OptDiag->RemarkName, DL, FN, OptDiag->Hotness, + OptDiag->Args); + } } template <> struct MappingTraits<DiagnosticLocation> { @@ -430,7 +451,15 @@ template <> struct MappingTraits<DiagnosticLocation> { unsigned Line = DL.getLine(); unsigned Col = DL.getColumn(); - io.mapRequired("File", File); + if (UseStringTable) { + remarks::StringTable &StrTab = + reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable(); + unsigned FileID = StrTab.add(File).first; + io.mapRequired("File", FileID); + } else { + io.mapRequired("File", File); + } + io.mapRequired("Line", Line); io.mapRequired("Column", Col); } @@ -459,12 +488,18 @@ template <> struct BlockScalarTraits<StringBlockVal> { template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> { static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) { assert(io.outputting() && "input not yet implemented"); - // Emit a string block scalar for multiline strings, to preserve newlines. - if (StringRef(A.Val).count('\n') > 1) { + + if (UseStringTable) { + remarks::StringTable &StrTab = + reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable(); + auto ValueID = StrTab.add(A.Val).first; + io.mapRequired(A.Key.data(), ValueID); + } else if (StringRef(A.Val).count('\n') > 1) { StringBlockVal S(A.Val); io.mapRequired(A.Key.data(), S); - } else + } else { io.mapRequired(A.Key.data(), A.Val); + } if (A.Loc.isValid()) io.mapOptional("DebugLoc", A.Loc); } diff --git a/llvm/lib/IR/LLVMBuild.txt b/llvm/lib/IR/LLVMBuild.txt index b3dcd413f41..73d97108c40 100644 --- a/llvm/lib/IR/LLVMBuild.txt +++ b/llvm/lib/IR/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Core parent = Libraries -required_libraries = BinaryFormat Support +required_libraries = BinaryFormat Remarks Support diff --git a/llvm/lib/IR/RemarkStreamer.cpp b/llvm/lib/IR/RemarkStreamer.cpp index 022c17d6722..d2a4ed4adf4 100644 --- a/llvm/lib/IR/RemarkStreamer.cpp +++ b/llvm/lib/IR/RemarkStreamer.cpp @@ -17,7 +17,7 @@ using namespace llvm; RemarkStreamer::RemarkStreamer(StringRef Filename, raw_ostream &OS) : Filename(Filename), OS(OS), - YAMLOutput(OS, reinterpret_cast<void *>(this)) { + YAMLOutput(OS, reinterpret_cast<void *>(this)), StrTab() { assert(!Filename.empty() && "This needs to be a real filename."); } diff --git a/llvm/lib/Remarks/CMakeLists.txt b/llvm/lib/Remarks/CMakeLists.txt index 2ab7e8476a1..ccbca7ea4f4 100644 --- a/llvm/lib/Remarks/CMakeLists.txt +++ b/llvm/lib/Remarks/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMRemarks Remark.cpp RemarkParser.cpp + RemarkStringTable.cpp YAMLRemarkParser.cpp ) diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp index 30de40dd54a..144f08f6feb 100644 --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -22,6 +22,9 @@ using namespace llvm::remarks; Parser::Parser(StringRef Buf) : Impl(llvm::make_unique<YAMLParserImpl>(Buf)) {} +Parser::Parser(StringRef Buf, StringRef StrTabBuf) + : Impl(llvm::make_unique<YAMLParserImpl>(Buf, StrTabBuf)) {} + Parser::~Parser() = default; static Expected<const Remark *> getNextYAML(YAMLParserImpl &Impl) { @@ -56,6 +59,31 @@ Expected<const Remark *> Parser::getNext() const { llvm_unreachable("Get next called with an unknown parsing implementation."); } +ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) { + while (!InBuffer.empty()) { + // Strings are separated by '\0' bytes. + std::pair<StringRef, StringRef> Split = InBuffer.split('\0'); + // We only store the offset from the beginning of the buffer. + Offsets.push_back(Split.first.data() - Buffer.data()); + InBuffer = Split.second; + } +} + +Expected<StringRef> ParsedStringTable::operator[](size_t Index) { + if (Index >= Offsets.size()) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "String with index %u is out of bounds (size = %u).", Index, + Offsets.size()); + + size_t Offset = Offsets[Index]; + // If it's the last offset, we can't use the next offset to know the size of + // the string. + size_t NextOffset = + (Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1]; + return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1); +} + // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef) diff --git a/llvm/lib/Remarks/RemarkStringTable.cpp b/llvm/lib/Remarks/RemarkStringTable.cpp new file mode 100644 index 00000000000..984aa5b33b4 --- /dev/null +++ b/llvm/lib/Remarks/RemarkStringTable.cpp @@ -0,0 +1,48 @@ +//===- RemarkStringTable.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the Remark string table used at remark generation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/RemarkStringTable.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/Error.h" +#include <vector> + +using namespace llvm; +using namespace llvm::remarks; + +std::pair<unsigned, StringRef> StringTable::add(StringRef Str) { + size_t NextID = StrTab.size(); + auto KV = StrTab.insert({Str, NextID}); + // If it's a new string, add it to the final size. + if (KV.second) + SerializedSize += KV.first->first().size() + 1; // +1 for the '\0' + // Can be either NextID or the previous ID if the string is already there. + return {KV.first->second, KV.first->first()}; +} + +void StringTable::serialize(raw_ostream &OS) const { + // Emit the number of strings. + uint64_t StrTabSize = SerializedSize; + support::endian::write(OS, StrTabSize, support::little); + // Emit the sequence of strings. + for (StringRef Str : serialize()) { + OS << Str; + // Explicitly emit a '\0'. + OS.write('\0'); + } +} + +std::vector<StringRef> StringTable::serialize() const { + std::vector<StringRef> Strings{StrTab.size()}; + for (const auto &KV : StrTab) + Strings[KV.second] = KV.first(); + return Strings; +} diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp index db6b0b25bd1..0c265856d28 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -34,7 +34,19 @@ Error YAMLRemarkParser::parseStr(T &Result, yaml::KeyValueNode &Node) { auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue()); if (!Value) return make_error<YAMLParseError>("expected a value of scalar type.", Node); - StringRef Tmp = Value->getRawValue(); + StringRef Tmp; + if (!StrTab) { + Tmp = Value->getRawValue(); + } else { + // If we have a string table, parse it as an unsigned. + unsigned StrID = 0; + if (Error E = parseUnsigned(StrID, Node)) + return E; + if (Expected<StringRef> Str = (*StrTab)[StrID]) + Tmp = *Str; + else + return Str.takeError(); + } if (Tmp.front() == '\'') Tmp = Tmp.drop_front(); diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h index 5095a4be730..5fd17865b69 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.h +++ b/llvm/lib/Remarks/YAMLRemarkParser.h @@ -17,6 +17,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkParser.h" #include "llvm/Support/Error.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" @@ -38,7 +39,8 @@ struct YAMLRemarkParser { raw_string_ostream ErrorStream; /// Temporary parsing buffer for the arguments. SmallVector<Argument, 8> TmpArgs; - + /// The string table used for parsing strings. + Optional<ParsedStringTable> StrTab; /// The state used by the parser to parse a remark entry. Invalidated with /// every call to `parseYAMLElement`. struct ParseState { @@ -57,10 +59,13 @@ struct YAMLRemarkParser { /// not be containing any value. Optional<ParseState> State; - YAMLRemarkParser(StringRef Buf) + YAMLRemarkParser(StringRef Buf, Optional<StringRef> StrTabBuf = None) : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString), - TmpArgs() { + TmpArgs(), StrTab() { SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this); + + if (StrTabBuf) + StrTab.emplace(*StrTabBuf); } /// Parse a YAML element. @@ -122,8 +127,8 @@ struct YAMLParserImpl : public ParserImpl { /// Set to `true` if we had any errors during parsing. bool HasErrors = false; - YAMLParserImpl(StringRef Buf) - : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf), + YAMLParserImpl(StringRef Buf, Optional<StringRef> StrTabBuf = None) + : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf, StrTabBuf), YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {} static bool classof(const ParserImpl *PI) { |

