1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
//===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a simple interactive tool which can be used to manually
// evaluate symbol search quality of Clangd index.
//
//===----------------------------------------------------------------------===//
#include "../../../index/SymbolYAML.h"
#include "../Dex.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/LineEditor/LineEditor.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Signals.h"
using clang::clangd::FuzzyFindRequest;
using clang::clangd::loadIndex;
using clang::clangd::Symbol;
using clang::clangd::SymbolIndex;
using llvm::StringRef;
namespace {
llvm::cl::opt<std::string>
SymbolCollection("symbol-collection-file",
llvm::cl::desc("Path to the file with symbol collection"),
llvm::cl::Positional, llvm::cl::Required);
static const std::string Overview = R"(
This is an **experimental** interactive tool to process user-provided search
queries over given symbol collection obtained via global-symbol-builder. The
tool can be used to evaluate search quality of existing index implementations
and manually construct non-trivial test cases.
Type use "help" request to get information about the details.
)";
void reportTime(StringRef Name, llvm::function_ref<void()> F) {
const auto TimerStart = std::chrono::high_resolution_clock::now();
F();
const auto TimerStop = std::chrono::high_resolution_clock::now();
const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
TimerStop - TimerStart);
llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
}
void fuzzyFind(llvm::StringRef UnqualifiedName, const SymbolIndex &Index) {
FuzzyFindRequest Request;
Request.Limit = 10;
Request.Query = UnqualifiedName;
// FIXME(kbobyrev): Print symbol final scores to see the distribution.
static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
"Symbol Name");
size_t Rank = 0;
Index.fuzzyFind(Request, [&](const Symbol &Sym) {
llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(), Sym.Name);
});
}
static const std::string HelpMessage = R"(dexp commands:
> find Name
Constructs fuzzy find request given unqualified symbol name and returns top 10
symbols retrieved from index.
> lookup SymbolID
Retrieves symbol names given USR.
)";
void help() { llvm::outs() << HelpMessage; }
void lookup(StringRef USR, const SymbolIndex &Index) {
llvm::DenseSet<clang::clangd::SymbolID> IDs{clang::clangd::SymbolID{USR}};
clang::clangd::LookupRequest Request{IDs};
bool FoundSymbol = false;
Index.lookup(Request, [&](const Symbol &Sym) {
if (!FoundSymbol)
FoundSymbol = true;
llvm::outs() << SymbolToYAML(Sym);
});
if (!FoundSymbol)
llvm::outs() << "not found\n";
}
// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
// library for parsing flags and arguments.
// FIXME(kbobyrev): Ideas for commands:
// * symbol lookup: print out symbol in YAML format given SymbolID
// * find symbol references: print set of reference locations
// * load/swap/reload index: this would make it possible to get rid of llvm::cl
// usages in the tool driver and actually use llvm::cl library in the REPL.
// * show posting list density histogram (our dump data somewhere so that user
// could build one)
// * show number of tokens of each kind
// * print out tokens with the most dense posting lists
// * print out tokens with least dense posting lists
void dispatch(StringRef Request, const SymbolIndex &Index) {
llvm::SmallVector<StringRef, 2> Arguments;
Request.split(Arguments, ' ');
if (Arguments.empty()) {
llvm::outs() << "Request can not be empty.\n";
help();
return;
}
if (Arguments.front() == "find") {
if (Arguments.size() != 2) {
llvm::outs() << "find request must specify unqualified symbol name.\n";
return;
}
reportTime("fuzzy find request",
[&]() { fuzzyFind(Arguments.back(), Index); });
} else if (Arguments.front() == "lookup") {
if (Arguments.size() != 2) {
llvm::outs() << "lookup request must specify symbol ID .\n";
return;
}
reportTime("lookup request", [&]() { lookup(Arguments.back(), Index); });
} else if (Arguments.front() == "help") {
help();
} else {
llvm::outs() << "Unknown command. Try 'help'\n";
}
}
} // namespace
int main(int argc, const char *argv[]) {
llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
std::unique_ptr<SymbolIndex> Index;
reportTime("Dex build", [&]() {
Index = loadIndex(SymbolCollection, /*URISchemes=*/{},
/*UseDex=*/true);
});
if (!Index) {
llvm::outs()
<< "ERROR: Please provide a valid path to symbol collection file.\n";
return -1;
}
llvm::LineEditor LE("dexp");
while (llvm::Optional<std::string> Request = LE.readLine())
dispatch(Request.getValue(), *Index);
return 0;
}
|