diff options
| author | Rui Ueyama <ruiu@google.com> | 2016-11-03 17:57:38 +0000 |
|---|---|---|
| committer | Rui Ueyama <ruiu@google.com> | 2016-11-03 17:57:38 +0000 |
| commit | f91282e1cf6871f933bc7186332f47f3d4b38b7d (patch) | |
| tree | bdaec6784f7bccdf7ca40ea5d0f5a4bbcd3f9ceb | |
| parent | cc34983181a281761ae4bb7a577d6284dae23d53 (diff) | |
| download | bcm5719-llvm-f91282e1cf6871f933bc7186332f47f3d4b38b7d.tar.gz bcm5719-llvm-f91282e1cf6871f933bc7186332f47f3d4b38b7d.zip | |
Add [<chars>] to the glob matcher to eliminate use of llvm::Regex.
Previously, it didn't support the character class, so we couldn't
eliminate the use fo llvm::Regex. Now that it is supported, we
can remove compileGlobPattern, which converts a glob pattern to
a regex.
This patch contains optimization for exact/prefix/suffix matches.
Differential Revision: https://reviews.llvm.org/D26284
llvm-svn: 285949
| -rw-r--r-- | lld/ELF/LinkerScript.cpp | 18 | ||||
| -rw-r--r-- | lld/ELF/LinkerScript.h | 22 | ||||
| -rw-r--r-- | lld/ELF/Strings.cpp | 172 | ||||
| -rw-r--r-- | lld/ELF/Strings.h | 39 | ||||
| -rw-r--r-- | lld/ELF/SymbolTable.cpp | 16 | ||||
| -rw-r--r-- | lld/ELF/SymbolTable.h | 4 |
6 files changed, 179 insertions, 92 deletions
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 1169f838e19..6b8e39dc832 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -111,11 +111,11 @@ template <class ELFT> bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { for (InputSectionDescription *ID : Opt.KeptSections) { StringRef Filename = S->getFile()->getName(); - if (!ID->FileRe.match(sys::path::filename(Filename))) + if (!ID->FilePat.match(sys::path::filename(Filename))) continue; for (SectionPattern &P : ID->SectionPatterns) - if (P.SectionRe.match(S->Name)) + if (P.SectionPat.match(S->Name)) return true; } return false; @@ -178,13 +178,13 @@ void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { size_t SizeBefore = I->Sections.size(); for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { StringRef Filename = sys::path::filename(F->getName()); - if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) + if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) continue; for (InputSectionBase<ELFT> *S : F->getSections()) - if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) + if (!isDiscarded(S) && !S->OutSec && Pat.SectionPat.match(S->Name)) I->Sections.push_back(S); - if (Pat.SectionRe.match("COMMON")) + if (Pat.SectionPat.match("COMMON")) I->Sections.push_back(InputSection<ELFT>::CommonInputSection); } @@ -1211,7 +1211,7 @@ StringMatcher ScriptParser::readFilePatterns() { std::vector<StringRef> V; while (!Error && !consume(")")) V.push_back(next()); - return StringMatcher(std::move(V)); + return StringMatcher(V); } SortSectionPolicy ScriptParser::readSortKind() { @@ -1236,10 +1236,10 @@ SortSectionPolicy ScriptParser::readSortKind() { std::vector<SectionPattern> ScriptParser::readInputSectionsList() { std::vector<SectionPattern> Ret; while (!Error && peek() != ")") { - StringMatcher ExcludeFileRe; + StringMatcher ExcludeFilePat; if (consume("EXCLUDE_FILE")) { expect("("); - ExcludeFileRe = readFilePatterns(); + ExcludeFilePat = readFilePatterns(); } std::vector<StringRef> V; @@ -1247,7 +1247,7 @@ std::vector<SectionPattern> ScriptParser::readInputSectionsList() { V.push_back(next()); if (!V.empty()) - Ret.push_back({std::move(ExcludeFileRe), StringMatcher(std::move(V))}); + Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); else setError("section pattern is expected"); } diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 0503a6c73cc..7d693f84ed4 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -114,28 +114,20 @@ struct OutputSectionCommand : BaseCommand { // It can optionally have negative match pattern for EXCLUDED_FILE command. // Also it may be surrounded with SORT() command, so contains sorting rules. struct SectionPattern { - SectionPattern(StringMatcher &&Re1, StringMatcher &&Re2) - : ExcludedFileRe(std::forward<StringMatcher>(Re1)), - SectionRe(std::forward<StringMatcher>(Re2)) {} - - SectionPattern(SectionPattern &&Other) { - std::swap(ExcludedFileRe, Other.ExcludedFileRe); - std::swap(SectionRe, Other.SectionRe); - std::swap(SortOuter, Other.SortOuter); - std::swap(SortInner, Other.SortInner); - } - - StringMatcher ExcludedFileRe; - StringMatcher SectionRe; + SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2) + : ExcludedFilePat(Pat1), SectionPat(Pat2) {} + + StringMatcher ExcludedFilePat; + StringMatcher SectionPat; SortSectionPolicy SortOuter; SortSectionPolicy SortInner; }; struct InputSectionDescription : BaseCommand { InputSectionDescription(StringRef FilePattern) - : BaseCommand(InputSectionKind), FileRe(FilePattern) {} + : BaseCommand(InputSectionKind), FilePat({FilePattern}) {} static bool classof(const BaseCommand *C); - StringMatcher FileRe; + StringMatcher FilePat; // Input sections that matches at least one of SectionPatterns // will be associated with this InputSectionDescription. diff --git a/lld/ELF/Strings.cpp b/lld/ELF/Strings.cpp index 09459ab0e04..b3f6a3dbe1d 100644 --- a/lld/ELF/Strings.cpp +++ b/lld/ELF/Strings.cpp @@ -21,36 +21,142 @@ using namespace llvm; using namespace lld; using namespace lld::elf; -// Returns true if S matches T. S can contain glob meta-characters. -// The asterisk ('*') matches zero or more characters, and the question -// mark ('?') matches one character. -static bool globMatch(StringRef S, StringRef T) { +// This is a scanner for the glob pattern. +// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]" +// (which is a negative form of "[<chars>]"), or a non-meta character. +// This function returns the first token in S. +BitVector GlobPattern::scan(StringRef &S) { + switch (S[0]) { + case '*': + S = S.substr(1); + // '*' is represented by an empty bitvector. + // All other bitvectors are 256-bit long. + return BitVector(); + case '?': + S = S.substr(1); + return BitVector(256, true); + case '[': { + size_t End = S.find(']', 1); + if (End == StringRef::npos) { + error("invalid glob pattern: " + Original); + return BitVector(256, false); + } + StringRef Chars = S.substr(1, End - 1); + S = S.substr(End + 1); + if (Chars.startswith("^")) + return expand(Chars.substr(1)).flip(); + return expand(Chars); + } + default: + BitVector BV(256, false); + BV[S[0]] = true; + S = S.substr(1); + return BV; + } +} + +// Expands character ranges and returns a bitmap. +// For example, "a-cf-hz" is expanded to "abcfghz". +BitVector GlobPattern::expand(StringRef S) { + BitVector BV(256, false); + + // Expand "x-y". for (;;) { - if (S.empty()) - return T.empty(); - if (S[0] == '*') { + if (S.size() < 3) + break; + + // If it doesn't start with something like "x-y", + // consume the first character and proceed. + if (S[1] != '-') { + BV[S[0]] = true; S = S.substr(1); - if (S.empty()) + continue; + } + + // It must be in the form of "x-y". + // Validate it and then interpret the range. + if (S[0] > S[2]) { + error("invalid glob pattern: " + Original); + return BV; + } + for (int C = S[0]; C <= S[2]; ++C) + BV[C] = true; + S = S.substr(3); + } + + for (char C : S) + BV[C] = true; + return BV; +} + +GlobPattern::GlobPattern(StringRef S) : Original(S) { + if (!hasWildcard(S)) { + // S doesn't contain any metacharacter, + // so the regular string comparison should work. + Exact = S; + } else if (S.endswith("*") && !hasWildcard(S.drop_back())) { + // S is something like "foo*". We can use startswith(). + Prefix = S.drop_back(); + } else if (S.startswith("*") && !hasWildcard(S.drop_front())) { + // S is something like "*foo". We can use endswith(). + Suffix = S.drop_front(); + } else { + // Otherwise, we need to do real glob pattern matching. + // Parse the pattern now. + while (!S.empty()) + Tokens.push_back(scan(S)); + } +} + +bool GlobPattern::match(StringRef S) const { + if (Exact) + return S == *Exact; + if (Prefix) + return S.startswith(*Prefix); + if (Suffix) + return S.endswith(*Suffix); + return matchOne(Tokens, S); +} + +// Runs glob pattern Pats against string S. +bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { + for (;;) { + if (Pats.empty()) + return S.empty(); + + // If Pats[0] is '*', try to match Pats[1..] against all possible + // substrings of S to see at least one pattern succeeds. + if (Pats[0].size() == 0) { + Pats = Pats.slice(1); + if (Pats.empty()) // Fast path. If a pattern is '*', it matches anything. return true; - for (size_t I = 0, E = T.size(); I < E; ++I) - if (globMatch(S, T.substr(I))) + for (size_t I = 0, E = S.size(); I < E; ++I) + if (matchOne(Pats, S.substr(I))) return true; return false; } - if (T.empty() || (S[0] != T[0] && S[0] != '?')) + + // If Pats[0] is not '*', it must consume one character. + if (S.empty() || !Pats[0][S[0]]) return false; + Pats = Pats.slice(1); S = S.substr(1); - T = T.substr(1); } } -bool StringMatcher::match(StringRef S) { - for (StringRef P : Patterns) - if (globMatch(P, S)) +StringMatcher::StringMatcher(const std::vector<StringRef> &Pat) { + for (StringRef S : Pat) + Patterns.push_back(GlobPattern(S)); +} + +bool StringMatcher::match(StringRef S) const { + for (const GlobPattern &Pat : Patterns) + if (Pat.match(S)) return true; return false; } + // If an input string is in the form of "foo.N" where N is a number, // return N. Otherwise, returns 65536, which is one greater than the // lowest priority. @@ -74,42 +180,6 @@ StringRef elf::unquote(StringRef S) { return S.substr(1, S.size() - 2); } -// Converts a glob pattern to a regular expression. -static std::string toRegex(StringRef S) { - std::string T; - bool InBracket = false; - while (!S.empty()) { - char C = S.front(); - if (InBracket) { - InBracket = C != ']'; - T += C; - S = S.drop_front(); - continue; - } - - if (C == '*') - T += ".*"; - else if (C == '?') - T += '.'; - else if (StringRef(".+^${}()|/\\").find_first_of(C) != StringRef::npos) - T += std::string("\\") + C; - else - T += C; - - InBracket = C == '['; - S = S.substr(1); - } - return T; -} - -// Converts multiple glob patterns to a regular expression. -Regex elf::compileGlobPatterns(ArrayRef<StringRef> V) { - std::string T = "^(" + toRegex(V[0]); - for (StringRef S : V.slice(1)) - T += "|" + toRegex(S); - return Regex(T + ")$"); -} - // Converts a hex string (e.g. "deadbeef") to a vector. std::vector<uint8_t> elf::parseHex(StringRef S) { std::vector<uint8_t> Hex; diff --git a/lld/ELF/Strings.h b/lld/ELF/Strings.h index 3194b996011..8ffcb0ed056 100644 --- a/lld/ELF/Strings.h +++ b/lld/ELF/Strings.h @@ -12,29 +12,54 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Regex.h" #include <vector> namespace lld { namespace elf { -llvm::Regex compileGlobPatterns(ArrayRef<StringRef> V); + int getPriority(StringRef S); bool hasWildcard(StringRef S); std::vector<uint8_t> parseHex(StringRef S); bool isValidCIdentifier(StringRef S); StringRef unquote(StringRef S); +// This class represents a glob pattern. Supported metacharacters +// are "*", "?", "[<chars>]" and "[^<chars>]". +class GlobPattern { +public: + explicit GlobPattern(StringRef Pat); + bool match(StringRef S) const; + +private: + bool matchOne(ArrayRef<llvm::BitVector> Pat, StringRef S) const; + llvm::BitVector scan(StringRef &S); + llvm::BitVector expand(StringRef S); + + // Parsed glob pattern. + std::vector<llvm::BitVector> Tokens; + + // A glob pattern given to this class. This is for error reporting. + StringRef Original; + + // The following members are for optimization. + llvm::Optional<StringRef> Exact; + llvm::Optional<StringRef> Prefix; + llvm::Optional<StringRef> Suffix; +}; + +// This class represents multiple glob patterns. class StringMatcher { public: StringMatcher() = default; - explicit StringMatcher(StringRef P) : Patterns({P}) {} - explicit StringMatcher(std::vector<StringRef> &&Pat) - : Patterns(std::move(Pat)) {} + explicit StringMatcher(const std::vector<StringRef> &Pat); + + bool match(StringRef S) const; - bool match(StringRef S); private: - std::vector<StringRef> Patterns; + std::vector<GlobPattern> Patterns; }; // Returns a demangled C++ symbol name. If Name is not a mangled diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 0d8c62dd475..5c6c8e63897 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -470,12 +470,12 @@ template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) { // Returns a list of defined symbols that match with a given regex. template <class ELFT> -std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const Regex &Re) { +std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const StringMatcher &M) { std::vector<SymbolBody *> Res; for (Symbol *Sym : SymVector) { SymbolBody *B = Sym->body(); StringRef Name = B->getName(); - if (!B->isUndefined() && const_cast<Regex &>(Re).match(Name)) + if (!B->isUndefined() && M.match(Name)) Res.push_back(B); } return Res; @@ -611,10 +611,10 @@ findDemangled(std::map<std::string, std::vector<SymbolBody *>> &D, static std::vector<SymbolBody *> findAllDemangled(const std::map<std::string, std::vector<SymbolBody *>> &D, - const Regex &Re) { + StringMatcher &M) { std::vector<SymbolBody *> Res; for (auto &P : D) { - if (const_cast<Regex &>(Re).match(P.first)) + if (M.match(P.first)) for (SymbolBody *Body : P.second) if (!Body->isUndefined()) Res.push_back(Body); @@ -639,8 +639,8 @@ template <class ELFT> void SymbolTable<ELFT>::handleAnonymousVersion() { } if (Patterns.empty()) return; - Regex Re = compileGlobPatterns(Patterns); - std::vector<SymbolBody *> Syms = findAll(Re); + StringMatcher M(Patterns); + std::vector<SymbolBody *> Syms = findAll(M); for (SymbolBody *B : Syms) B->symbol()->VersionId = VER_NDX_GLOBAL; } @@ -696,9 +696,9 @@ template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() { for (SymbolVersion &Sym : V.Globals) { if (!Sym.HasWildcards) continue; - Regex Re = compileGlobPatterns({Sym.Name}); + StringMatcher M({Sym.Name}); std::vector<SymbolBody *> Syms = - Sym.IsExternCpp ? findAllDemangled(Demangled, Re) : findAll(Re); + Sym.IsExternCpp ? findAllDemangled(Demangled, M) : findAll(M); // Exact matching takes precendence over fuzzy matching, // so we set a version to a symbol only if no version has been assigned diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 08cbaefecc7..2f8f4edced5 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -12,9 +12,9 @@ #include "InputFiles.h" #include "LTO.h" +#include "Strings.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Support/Regex.h" namespace lld { namespace elf { @@ -92,7 +92,7 @@ public: void wrap(StringRef Name); private: - std::vector<SymbolBody *> findAll(const llvm::Regex &Re); + std::vector<SymbolBody *> findAll(const StringMatcher &M); std::pair<Symbol *, bool> insert(StringRef &Name); std::pair<Symbol *, bool> insert(StringRef &Name, uint8_t Type, uint8_t Visibility, bool CanOmitFromDynSym, |

