diff options
| author | Benjamin Kramer <benny.kra@googlemail.com> | 2018-01-23 23:05:04 +0000 |
|---|---|---|
| committer | Benjamin Kramer <benny.kra@googlemail.com> | 2018-01-23 23:05:04 +0000 |
| commit | cbce2f02e9df04a1f7f292c4a2b8b50321f14fae (patch) | |
| tree | 31c75b234822c626138472de0a4c41185576f407 /llvm/utils | |
| parent | 0627a33ed4bd72f77c56ed9ffc34d190d51b4628 (diff) | |
| download | bcm5719-llvm-cbce2f02e9df04a1f7f292c4a2b8b50321f14fae.tar.gz bcm5719-llvm-cbce2f02e9df04a1f7f292c4a2b8b50321f14fae.zip | |
[TableGen] Optimize the regex search.
llvm::Regex is still the slowest regex engine on earth, running it over
all instructions on X86 takes a while. Extract a prefix and use a binary
search to reduce the search space before we resort to regex matching.
There are a couple of caveats here:
- The generic opcodes are outside of the sorted enum. They're handled in an extra loop.
- If there's a top-level bar we can't use the prefix trick.
- We bail on top-level ?. This could be handled, but it's rare.
This brings the time to generate X86GenInstrInfo.inc from 21s to 4.7s on
my machine.
llvm-svn: 323277
Diffstat (limited to 'llvm/utils')
| -rw-r--r-- | llvm/utils/TableGen/CodeGenSchedule.cpp | 96 |
1 files changed, 76 insertions, 20 deletions
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp index b753e19a544..93409104313 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenInstruction.h" #include "CodeGenSchedule.h" +#include "CodeGenInstruction.h" #include "CodeGenTarget.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include <algorithm> #include <iterator> @@ -50,36 +51,91 @@ struct InstrsOp : public SetTheory::Operator { }; // (instregex "OpcPat",...) Find all instructions matching an opcode pattern. -// -// TODO: Since this is a prefix match, perform a binary search over the -// instruction names using lower_bound. Note that the predefined instrs must be -// scanned linearly first. However, this is only safe if the regex pattern has -// no top-level bars. The DAG already has a list of patterns, so there's no -// reason to use top-level bars, but we need a way to verify they don't exist -// before implementing the optimization. struct InstRegexOp : public SetTheory::Operator { const CodeGenTarget &Target; InstRegexOp(const CodeGenTarget &t): Target(t) {} + /// Remove any text inside of parentheses from S. + static std::string removeParens(llvm::StringRef S) { + std::string Result; + unsigned Paren = 0; + // NB: We don't care about escaped parens here. + for (char C : S) { + switch (C) { + case '(': + ++Paren; + break; + case ')': + --Paren; + break; + default: + if (Paren == 0) + Result += C; + } + } + return Result; + } + void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, ArrayRef<SMLoc> Loc) override { - SmallVector<Regex, 4> RegexList; + SmallVector<std::pair<StringRef, Optional<Regex>>, 4> RegexList; for (Init *Arg : make_range(Expr->arg_begin(), Expr->arg_end())) { StringInit *SI = dyn_cast<StringInit>(Arg); if (!SI) - PrintFatalError(Loc, "instregex requires pattern string: " - + Expr->getAsString()); - std::string pat = SI->getValue(); - // Implement a python-style prefix match. + PrintFatalError(Loc, "instregex requires pattern string: " + + Expr->getAsString()); + // Extract a prefix that we can binary search on. + static const char RegexMetachars[] = "()^$|*+?.[]\\{}"; + auto FirstMeta = SI->getValue().find_first_of(RegexMetachars); + // Look for top-level | or ?. We cannot optimize them to binary search. + if (removeParens(SI->getValue()).find_first_of("|?") != std::string::npos) + FirstMeta = 0; + StringRef Prefix = SI->getValue().substr(0, FirstMeta); + std::string pat = SI->getValue().substr(FirstMeta); + if (pat.empty()) { + RegexList.push_back(std::make_pair(Prefix, None)); + continue; + } + // For the rest use a python-style prefix match. if (pat[0] != '^') { pat.insert(0, "^("); pat.insert(pat.end(), ')'); } - RegexList.push_back(Regex(pat)); - } - for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { - for (auto &R : RegexList) { - if (R.match(Inst->TheDef->getName())) + RegexList.push_back(std::make_pair(Prefix, Regex(pat))); + } + for (auto &R : RegexList) { + // The generic opcodes are unsorted, handle them manually. + for (auto *Inst : Target.getInstructionsByEnumValue().slice( + 0, TargetOpcode::GENERIC_OP_END + 1)) { + if (Inst->TheDef->getName().startswith(R.first) && + (!R.second || + R.second->match(Inst->TheDef->getName().substr(R.first.size())))) + Elts.insert(Inst->TheDef); + } + + ArrayRef<const CodeGenInstruction *> Instructions = + Target.getInstructionsByEnumValue().slice( + TargetOpcode::GENERIC_OP_END + 1); + + // Target instructions are sorted. Find the range that starts with our + // prefix. + struct Comp { + bool operator()(const CodeGenInstruction *LHS, StringRef RHS) { + return LHS->TheDef->getName() < RHS; + } + bool operator()(StringRef LHS, const CodeGenInstruction *RHS) { + return LHS < RHS->TheDef->getName() && + !RHS->TheDef->getName().startswith(LHS); + } + }; + auto Range = std::equal_range(Instructions.begin(), Instructions.end(), + R.first, Comp()); + + // For this range we know that it starts with the prefix. Check if there's + // a regex that needs to be checked. + for (auto *Inst : make_range(Range)) { + if (!R.second || + R.second->match(Inst->TheDef->getName().substr(R.first.size()))) Elts.insert(Inst->TheDef); } } |

