summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorFrancis Visoiu Mistrih <francisvm@yahoo.com>2019-04-24 00:06:24 +0000
committerFrancis Visoiu Mistrih <francisvm@yahoo.com>2019-04-24 00:06:24 +0000
commit7fee2b89fd6e5101bc590e0741f4d7a82b7715e1 (patch)
tree6358f4d132215ab591592762043c3a8fffb2ee31 /llvm/lib
parent53796d9439018b97a0e6f35af0ba83843a7270e7 (diff)
downloadbcm5719-llvm-7fee2b89fd6e5101bc590e0741f4d7a82b7715e1.tar.gz
bcm5719-llvm-7fee2b89fd6e5101bc590e0741f4d7a82b7715e1.zip
[Remarks] Add string deduplication using a string table
* Add support for uniquing strings in the remark streamer and emitting the string table in the remarks section. * Add parsing support for the string table in the RemarkParser. From this remark: ``` --- !Missed Pass: inline Name: NoDefinition DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c', Line: 7, Column: 3 } Function: printArgsNoRet Args: - Callee: printf - String: ' will not be inlined into ' - Caller: printArgsNoRet DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c', Line: 6, Column: 0 } - String: ' because its definition is unavailable' ... ``` to: ``` --- !Missed Pass: 0 Name: 1 DebugLoc: { File: 3, Line: 7, Column: 3 } Function: 2 Args: - Callee: 4 - String: 5 - Caller: 2 DebugLoc: { File: 3, Line: 6, Column: 0 } - String: 6 ... ``` And the string table in the .remarks/__remarks section containing: ``` inline\0NoDefinition\0printArgsNoRet\0 test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c\0printf\0 will not be inlined into \0 because its definition is unavailable\0 ``` This is mostly supposed to be used for testing purposes, but it gives us a 2x reduction in the remark size, and is an incremental change for the updates to the remarks file format. Differential Revision: https://reviews.llvm.org/D60227 llvm-svn: 359050
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp23
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt2
-rw-r--r--llvm/lib/IR/DiagnosticInfo.cpp57
-rw-r--r--llvm/lib/IR/LLVMBuild.txt2
-rw-r--r--llvm/lib/IR/RemarkStreamer.cpp2
-rw-r--r--llvm/lib/Remarks/CMakeLists.txt1
-rw-r--r--llvm/lib/Remarks/RemarkParser.cpp28
-rw-r--r--llvm/lib/Remarks/RemarkStringTable.cpp48
-rw-r--r--llvm/lib/Remarks/YAMLRemarkParser.cpp14
-rw-r--r--llvm/lib/Remarks/YAMLRemarkParser.h15
10 files changed, 172 insertions, 20 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 987d324df2c..fc5049b9067 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1362,6 +1362,29 @@ void AsmPrinter::emitRemarksSection(Module &M) {
support::endian::write64le(Version.data(), remarks::Version);
OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
+ // Emit the string table in the section.
+ // Note: we need to use the streamer here to emit it in the section. We can't
+ // just use the serialize function with a raw_ostream because of the way
+ // MCStreamers work.
+ const remarks::StringTable &StrTab = RS->getStringTable();
+ std::vector<StringRef> StrTabStrings = StrTab.serialize();
+ uint64_t StrTabSize = StrTab.SerializedSize;
+ // Emit the total size of the string table (the size itself excluded):
+ // little-endian uint64_t.
+ // The total size is located after the version number.
+ std::array<char, 8> StrTabSizeBuf;
+ support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+ OutStreamer->EmitBinaryData(
+ StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
+ // Emit a list of null-terminated strings.
+ // Note: the order is important here: the ID used in the remarks corresponds
+ // to the position of the string in the section.
+ for (StringRef Str : StrTabStrings) {
+ OutStreamer->EmitBytes(Str);
+ // Explicitly emit a '\0'.
+ OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+ }
+
// Emit the null-terminated absolute path to the remark file.
// The path is located at the offset 0x4 in the section.
StringRef FilenameRef = RS->getFilename();
diff --git a/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt
index 56449269681..44595a18f1b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt
+++ b/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -18,4 +18,4 @@
type = Library
name = AsmPrinter
parent = Libraries
-required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Support Target
+required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Remarks Support Target
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index 14bee35dc29..7c387e95724 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -43,6 +43,8 @@
using namespace llvm;
+cl::opt<bool> UseStringTable("remarks-yaml-string-table", cl::init(false));
+
int llvm::getNextAvailablePluginDiagnosticKind() {
static std::atomic<int> PluginKindID(DK_FirstPluginKind);
return ++PluginKindID;
@@ -373,6 +375,20 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
void OptimizationRemarkAnalysisFPCommute::anchor() {}
void OptimizationRemarkAnalysisAliasing::anchor() {}
+template <typename T>
+static void mapRemarkHeader(
+ yaml::IO &io, T PassName, T RemarkName, DiagnosticLocation DL,
+ T FunctionName, Optional<uint64_t> Hotness,
+ SmallVectorImpl<DiagnosticInfoOptimizationBase::Argument> &Args) {
+ io.mapRequired("Pass", PassName);
+ io.mapRequired("Name", RemarkName);
+ if (!io.outputting() || DL.isValid())
+ io.mapOptional("DebugLoc", DL);
+ io.mapRequired("Function", FunctionName);
+ io.mapOptional("Hotness", Hotness);
+ io.mapOptional("Args", Args);
+}
+
namespace llvm {
namespace yaml {
@@ -413,13 +429,18 @@ void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
StringRef PassName(OptDiag->PassName);
- io.mapRequired("Pass", PassName);
- io.mapRequired("Name", OptDiag->RemarkName);
- if (!io.outputting() || DL.isValid())
- io.mapOptional("DebugLoc", DL);
- io.mapRequired("Function", FN);
- io.mapOptional("Hotness", OptDiag->Hotness);
- io.mapOptional("Args", OptDiag->Args);
+ if (UseStringTable) {
+ remarks::StringTable &StrTab =
+ reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+ unsigned PassID = StrTab.add(PassName).first;
+ unsigned NameID = StrTab.add(OptDiag->RemarkName).first;
+ unsigned FunctionID = StrTab.add(FN).first;
+ mapRemarkHeader(io, PassID, NameID, DL, FunctionID, OptDiag->Hotness,
+ OptDiag->Args);
+ } else {
+ mapRemarkHeader(io, PassName, OptDiag->RemarkName, DL, FN, OptDiag->Hotness,
+ OptDiag->Args);
+ }
}
template <> struct MappingTraits<DiagnosticLocation> {
@@ -430,7 +451,15 @@ template <> struct MappingTraits<DiagnosticLocation> {
unsigned Line = DL.getLine();
unsigned Col = DL.getColumn();
- io.mapRequired("File", File);
+ if (UseStringTable) {
+ remarks::StringTable &StrTab =
+ reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+ unsigned FileID = StrTab.add(File).first;
+ io.mapRequired("File", FileID);
+ } else {
+ io.mapRequired("File", File);
+ }
+
io.mapRequired("Line", Line);
io.mapRequired("Column", Col);
}
@@ -459,12 +488,18 @@ template <> struct BlockScalarTraits<StringBlockVal> {
template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
assert(io.outputting() && "input not yet implemented");
- // Emit a string block scalar for multiline strings, to preserve newlines.
- if (StringRef(A.Val).count('\n') > 1) {
+
+ if (UseStringTable) {
+ remarks::StringTable &StrTab =
+ reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+ auto ValueID = StrTab.add(A.Val).first;
+ io.mapRequired(A.Key.data(), ValueID);
+ } else if (StringRef(A.Val).count('\n') > 1) {
StringBlockVal S(A.Val);
io.mapRequired(A.Key.data(), S);
- } else
+ } else {
io.mapRequired(A.Key.data(), A.Val);
+ }
if (A.Loc.isValid())
io.mapOptional("DebugLoc", A.Loc);
}
diff --git a/llvm/lib/IR/LLVMBuild.txt b/llvm/lib/IR/LLVMBuild.txt
index b3dcd413f41..73d97108c40 100644
--- a/llvm/lib/IR/LLVMBuild.txt
+++ b/llvm/lib/IR/LLVMBuild.txt
@@ -18,4 +18,4 @@
type = Library
name = Core
parent = Libraries
-required_libraries = BinaryFormat Support
+required_libraries = BinaryFormat Remarks Support
diff --git a/llvm/lib/IR/RemarkStreamer.cpp b/llvm/lib/IR/RemarkStreamer.cpp
index 022c17d6722..d2a4ed4adf4 100644
--- a/llvm/lib/IR/RemarkStreamer.cpp
+++ b/llvm/lib/IR/RemarkStreamer.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
RemarkStreamer::RemarkStreamer(StringRef Filename, raw_ostream &OS)
: Filename(Filename), OS(OS),
- YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+ YAMLOutput(OS, reinterpret_cast<void *>(this)), StrTab() {
assert(!Filename.empty() && "This needs to be a real filename.");
}
diff --git a/llvm/lib/Remarks/CMakeLists.txt b/llvm/lib/Remarks/CMakeLists.txt
index 2ab7e8476a1..ccbca7ea4f4 100644
--- a/llvm/lib/Remarks/CMakeLists.txt
+++ b/llvm/lib/Remarks/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_library(LLVMRemarks
Remark.cpp
RemarkParser.cpp
+ RemarkStringTable.cpp
YAMLRemarkParser.cpp
)
diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp
index 30de40dd54a..144f08f6feb 100644
--- a/llvm/lib/Remarks/RemarkParser.cpp
+++ b/llvm/lib/Remarks/RemarkParser.cpp
@@ -22,6 +22,9 @@ using namespace llvm::remarks;
Parser::Parser(StringRef Buf) : Impl(llvm::make_unique<YAMLParserImpl>(Buf)) {}
+Parser::Parser(StringRef Buf, StringRef StrTabBuf)
+ : Impl(llvm::make_unique<YAMLParserImpl>(Buf, StrTabBuf)) {}
+
Parser::~Parser() = default;
static Expected<const Remark *> getNextYAML(YAMLParserImpl &Impl) {
@@ -56,6 +59,31 @@ Expected<const Remark *> Parser::getNext() const {
llvm_unreachable("Get next called with an unknown parsing implementation.");
}
+ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
+ while (!InBuffer.empty()) {
+ // Strings are separated by '\0' bytes.
+ std::pair<StringRef, StringRef> Split = InBuffer.split('\0');
+ // We only store the offset from the beginning of the buffer.
+ Offsets.push_back(Split.first.data() - Buffer.data());
+ InBuffer = Split.second;
+ }
+}
+
+Expected<StringRef> ParsedStringTable::operator[](size_t Index) {
+ if (Index >= Offsets.size())
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "String with index %u is out of bounds (size = %u).", Index,
+ Offsets.size());
+
+ size_t Offset = Offsets[Index];
+ // If it's the last offset, we can't use the next offset to know the size of
+ // the string.
+ size_t NextOffset =
+ (Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1];
+ return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
+}
+
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef)
diff --git a/llvm/lib/Remarks/RemarkStringTable.cpp b/llvm/lib/Remarks/RemarkStringTable.cpp
new file mode 100644
index 00000000000..984aa5b33b4
--- /dev/null
+++ b/llvm/lib/Remarks/RemarkStringTable.cpp
@@ -0,0 +1,48 @@
+//===- RemarkStringTable.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark string table used at remark generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
+ size_t NextID = StrTab.size();
+ auto KV = StrTab.insert({Str, NextID});
+ // If it's a new string, add it to the final size.
+ if (KV.second)
+ SerializedSize += KV.first->first().size() + 1; // +1 for the '\0'
+ // Can be either NextID or the previous ID if the string is already there.
+ return {KV.first->second, KV.first->first()};
+}
+
+void StringTable::serialize(raw_ostream &OS) const {
+ // Emit the number of strings.
+ uint64_t StrTabSize = SerializedSize;
+ support::endian::write(OS, StrTabSize, support::little);
+ // Emit the sequence of strings.
+ for (StringRef Str : serialize()) {
+ OS << Str;
+ // Explicitly emit a '\0'.
+ OS.write('\0');
+ }
+}
+
+std::vector<StringRef> StringTable::serialize() const {
+ std::vector<StringRef> Strings{StrTab.size()};
+ for (const auto &KV : StrTab)
+ Strings[KV.second] = KV.first();
+ return Strings;
+}
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp
index db6b0b25bd1..0c265856d28 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp
@@ -34,7 +34,19 @@ Error YAMLRemarkParser::parseStr(T &Result, yaml::KeyValueNode &Node) {
auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
if (!Value)
return make_error<YAMLParseError>("expected a value of scalar type.", Node);
- StringRef Tmp = Value->getRawValue();
+ StringRef Tmp;
+ if (!StrTab) {
+ Tmp = Value->getRawValue();
+ } else {
+ // If we have a string table, parse it as an unsigned.
+ unsigned StrID = 0;
+ if (Error E = parseUnsigned(StrID, Node))
+ return E;
+ if (Expected<StringRef> Str = (*StrTab)[StrID])
+ Tmp = *Str;
+ else
+ return Str.takeError();
+ }
if (Tmp.front() == '\'')
Tmp = Tmp.drop_front();
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h
index 5095a4be730..5fd17865b69 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.h
+++ b/llvm/lib/Remarks/YAMLRemarkParser.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLParser.h"
@@ -38,7 +39,8 @@ struct YAMLRemarkParser {
raw_string_ostream ErrorStream;
/// Temporary parsing buffer for the arguments.
SmallVector<Argument, 8> TmpArgs;
-
+ /// The string table used for parsing strings.
+ Optional<ParsedStringTable> StrTab;
/// The state used by the parser to parse a remark entry. Invalidated with
/// every call to `parseYAMLElement`.
struct ParseState {
@@ -57,10 +59,13 @@ struct YAMLRemarkParser {
/// not be containing any value.
Optional<ParseState> State;
- YAMLRemarkParser(StringRef Buf)
+ YAMLRemarkParser(StringRef Buf, Optional<StringRef> StrTabBuf = None)
: SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
- TmpArgs() {
+ TmpArgs(), StrTab() {
SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this);
+
+ if (StrTabBuf)
+ StrTab.emplace(*StrTabBuf);
}
/// Parse a YAML element.
@@ -122,8 +127,8 @@ struct YAMLParserImpl : public ParserImpl {
/// Set to `true` if we had any errors during parsing.
bool HasErrors = false;
- YAMLParserImpl(StringRef Buf)
- : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf),
+ YAMLParserImpl(StringRef Buf, Optional<StringRef> StrTabBuf = None)
+ : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf, StrTabBuf),
YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {}
static bool classof(const ParserImpl *PI) {
OpenPOWER on IntegriCloud