diff options
Diffstat (limited to 'lld')
| -rw-r--r-- | lld/ELF/LinkerScript.cpp | 18 | ||||
| -rw-r--r-- | lld/ELF/LinkerScript.h | 22 | ||||
| -rw-r--r-- | lld/ELF/Strings.cpp | 172 | ||||
| -rw-r--r-- | lld/ELF/Strings.h | 39 | ||||
| -rw-r--r-- | lld/ELF/SymbolTable.cpp | 16 | ||||
| -rw-r--r-- | lld/ELF/SymbolTable.h | 4 | 
6 files changed, 179 insertions, 92 deletions
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 1169f838e19..6b8e39dc832 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -111,11 +111,11 @@ template <class ELFT>  bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {    for (InputSectionDescription *ID : Opt.KeptSections) {      StringRef Filename = S->getFile()->getName(); -    if (!ID->FileRe.match(sys::path::filename(Filename))) +    if (!ID->FilePat.match(sys::path::filename(Filename)))        continue;      for (SectionPattern &P : ID->SectionPatterns) -      if (P.SectionRe.match(S->Name)) +      if (P.SectionPat.match(S->Name))          return true;    }    return false; @@ -178,13 +178,13 @@ void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) {      size_t SizeBefore = I->Sections.size();      for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) {        StringRef Filename = sys::path::filename(F->getName()); -      if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) +      if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename))          continue;        for (InputSectionBase<ELFT> *S : F->getSections()) -        if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) +        if (!isDiscarded(S) && !S->OutSec && Pat.SectionPat.match(S->Name))            I->Sections.push_back(S); -      if (Pat.SectionRe.match("COMMON")) +      if (Pat.SectionPat.match("COMMON"))          I->Sections.push_back(InputSection<ELFT>::CommonInputSection);      } @@ -1211,7 +1211,7 @@ StringMatcher ScriptParser::readFilePatterns() {    std::vector<StringRef> V;    while (!Error && !consume(")"))      V.push_back(next()); -  return StringMatcher(std::move(V)); +  return StringMatcher(V);  }  SortSectionPolicy ScriptParser::readSortKind() { @@ -1236,10 +1236,10 @@ SortSectionPolicy ScriptParser::readSortKind() {  std::vector<SectionPattern> ScriptParser::readInputSectionsList() {    std::vector<SectionPattern> Ret;    while (!Error && peek() != ")") { -    StringMatcher ExcludeFileRe; +    StringMatcher ExcludeFilePat;      if (consume("EXCLUDE_FILE")) {        expect("("); -      ExcludeFileRe = readFilePatterns(); +      ExcludeFilePat = readFilePatterns();      }      std::vector<StringRef> V; @@ -1247,7 +1247,7 @@ std::vector<SectionPattern> ScriptParser::readInputSectionsList() {        V.push_back(next());      if (!V.empty()) -      Ret.push_back({std::move(ExcludeFileRe), StringMatcher(std::move(V))}); +      Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});      else        setError("section pattern is expected");    } diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 0503a6c73cc..7d693f84ed4 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -114,28 +114,20 @@ struct OutputSectionCommand : BaseCommand {  // It can optionally have negative match pattern for EXCLUDED_FILE command.  // Also it may be surrounded with SORT() command, so contains sorting rules.  struct SectionPattern { -  SectionPattern(StringMatcher &&Re1, StringMatcher &&Re2) -      : ExcludedFileRe(std::forward<StringMatcher>(Re1)), -        SectionRe(std::forward<StringMatcher>(Re2)) {} - -  SectionPattern(SectionPattern &&Other) { -    std::swap(ExcludedFileRe, Other.ExcludedFileRe); -    std::swap(SectionRe, Other.SectionRe); -    std::swap(SortOuter, Other.SortOuter); -    std::swap(SortInner, Other.SortInner); -  } - -  StringMatcher ExcludedFileRe; -  StringMatcher SectionRe; +  SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2) +      : ExcludedFilePat(Pat1), SectionPat(Pat2) {} + +  StringMatcher ExcludedFilePat; +  StringMatcher SectionPat;    SortSectionPolicy SortOuter;    SortSectionPolicy SortInner;  };  struct InputSectionDescription : BaseCommand {    InputSectionDescription(StringRef FilePattern) -      : BaseCommand(InputSectionKind), FileRe(FilePattern) {} +      : BaseCommand(InputSectionKind), FilePat({FilePattern}) {}    static bool classof(const BaseCommand *C); -  StringMatcher FileRe; +  StringMatcher FilePat;    // Input sections that matches at least one of SectionPatterns    // will be associated with this InputSectionDescription. diff --git a/lld/ELF/Strings.cpp b/lld/ELF/Strings.cpp index 09459ab0e04..b3f6a3dbe1d 100644 --- a/lld/ELF/Strings.cpp +++ b/lld/ELF/Strings.cpp @@ -21,36 +21,142 @@ using namespace llvm;  using namespace lld;  using namespace lld::elf; -// Returns true if S matches T. S can contain glob meta-characters. -// The asterisk ('*') matches zero or more characters, and the question -// mark ('?') matches one character. -static bool globMatch(StringRef S, StringRef T) { +// This is a scanner for the glob pattern. +// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]" +// (which is a negative form of "[<chars>]"), or a non-meta character. +// This function returns the first token in S. +BitVector GlobPattern::scan(StringRef &S) { +  switch (S[0]) { +  case '*': +    S = S.substr(1); +    // '*' is represented by an empty bitvector. +    // All other bitvectors are 256-bit long. +    return BitVector(); +  case '?': +    S = S.substr(1); +    return BitVector(256, true); +  case '[': { +    size_t End = S.find(']', 1); +    if (End == StringRef::npos) { +      error("invalid glob pattern: " + Original); +      return BitVector(256, false); +    } +    StringRef Chars = S.substr(1, End - 1); +    S = S.substr(End + 1); +    if (Chars.startswith("^")) +      return expand(Chars.substr(1)).flip(); +    return expand(Chars); +  } +  default: +    BitVector BV(256, false); +    BV[S[0]] = true; +    S = S.substr(1); +    return BV; +  } +} + +// Expands character ranges and returns a bitmap. +// For example, "a-cf-hz" is expanded to "abcfghz". +BitVector GlobPattern::expand(StringRef S) { +  BitVector BV(256, false); + +  // Expand "x-y".    for (;;) { -    if (S.empty()) -      return T.empty(); -    if (S[0] == '*') { +    if (S.size() < 3) +      break; + +    // If it doesn't start with something like "x-y", +    // consume the first character and proceed. +    if (S[1] != '-') { +      BV[S[0]] = true;        S = S.substr(1); -      if (S.empty()) +      continue; +    } + +    // It must be in the form of "x-y". +    // Validate it and then interpret the range. +    if (S[0] > S[2]) { +      error("invalid glob pattern: " + Original); +      return BV; +    } +    for (int C = S[0]; C <= S[2]; ++C) +      BV[C] = true; +    S = S.substr(3); +  } + +  for (char C : S) +    BV[C] = true; +  return BV; +} + +GlobPattern::GlobPattern(StringRef S) : Original(S) { +  if (!hasWildcard(S)) { +    // S doesn't contain any metacharacter, +    // so the regular string comparison should work. +    Exact = S; +  } else if (S.endswith("*") && !hasWildcard(S.drop_back())) { +    // S is something like "foo*". We can use startswith(). +    Prefix = S.drop_back(); +  } else if (S.startswith("*") && !hasWildcard(S.drop_front())) { +    // S is something like "*foo". We can use endswith(). +    Suffix = S.drop_front(); +  } else { +    // Otherwise, we need to do real glob pattern matching. +    // Parse the pattern now. +    while (!S.empty()) +      Tokens.push_back(scan(S)); +  } +} + +bool GlobPattern::match(StringRef S) const { +  if (Exact) +    return S == *Exact; +  if (Prefix) +    return S.startswith(*Prefix); +  if (Suffix) +    return S.endswith(*Suffix); +  return matchOne(Tokens, S); +} + +// Runs glob pattern Pats against string S. +bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { +  for (;;) { +    if (Pats.empty()) +      return S.empty(); + +    // If Pats[0] is '*', try to match Pats[1..] against all possible +    // substrings of S to see at least one pattern succeeds. +    if (Pats[0].size() == 0) { +      Pats = Pats.slice(1); +      if (Pats.empty())          // Fast path. If a pattern is '*', it matches anything.          return true; -      for (size_t I = 0, E = T.size(); I < E; ++I) -        if (globMatch(S, T.substr(I))) +      for (size_t I = 0, E = S.size(); I < E; ++I) +        if (matchOne(Pats, S.substr(I)))            return true;        return false;      } -    if (T.empty() || (S[0] != T[0] && S[0] != '?')) + +    // If Pats[0] is not '*', it must consume one character. +    if (S.empty() || !Pats[0][S[0]])        return false; +    Pats = Pats.slice(1);      S = S.substr(1); -    T = T.substr(1);    }  } -bool StringMatcher::match(StringRef S) { -  for (StringRef P : Patterns) -    if (globMatch(P, S)) +StringMatcher::StringMatcher(const std::vector<StringRef> &Pat) { +  for (StringRef S : Pat) +    Patterns.push_back(GlobPattern(S)); +} + +bool StringMatcher::match(StringRef S) const { +  for (const GlobPattern &Pat : Patterns) +    if (Pat.match(S))        return true;    return false;  } +  // If an input string is in the form of "foo.N" where N is a number,  // return N. Otherwise, returns 65536, which is one greater than the  // lowest priority. @@ -74,42 +180,6 @@ StringRef elf::unquote(StringRef S) {    return S.substr(1, S.size() - 2);  } -// Converts a glob pattern to a regular expression. -static std::string toRegex(StringRef S) { -  std::string T; -  bool InBracket = false; -  while (!S.empty()) { -    char C = S.front(); -    if (InBracket) { -      InBracket = C != ']'; -      T += C; -      S = S.drop_front(); -      continue; -    } - -    if (C == '*') -      T += ".*"; -    else if (C == '?') -      T += '.'; -    else if (StringRef(".+^${}()|/\\").find_first_of(C) != StringRef::npos) -      T += std::string("\\") + C; -    else -      T += C; - -    InBracket = C == '['; -    S = S.substr(1); -  } -  return T; -} - -// Converts multiple glob patterns to a regular expression. -Regex elf::compileGlobPatterns(ArrayRef<StringRef> V) { -  std::string T = "^(" + toRegex(V[0]); -  for (StringRef S : V.slice(1)) -    T += "|" + toRegex(S); -  return Regex(T + ")$"); -} -  // Converts a hex string (e.g. "deadbeef") to a vector.  std::vector<uint8_t> elf::parseHex(StringRef S) {    std::vector<uint8_t> Hex; diff --git a/lld/ELF/Strings.h b/lld/ELF/Strings.h index 3194b996011..8ffcb0ed056 100644 --- a/lld/ELF/Strings.h +++ b/lld/ELF/Strings.h @@ -12,29 +12,54 @@  #include "lld/Core/LLVM.h"  #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h"  #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Regex.h"  #include <vector>  namespace lld {  namespace elf { -llvm::Regex compileGlobPatterns(ArrayRef<StringRef> V); +  int getPriority(StringRef S);  bool hasWildcard(StringRef S);  std::vector<uint8_t> parseHex(StringRef S);  bool isValidCIdentifier(StringRef S);  StringRef unquote(StringRef S); +// This class represents a glob pattern. Supported metacharacters +// are "*", "?", "[<chars>]" and "[^<chars>]". +class GlobPattern { +public: +  explicit GlobPattern(StringRef Pat); +  bool match(StringRef S) const; + +private: +  bool matchOne(ArrayRef<llvm::BitVector> Pat, StringRef S) const; +  llvm::BitVector scan(StringRef &S); +  llvm::BitVector expand(StringRef S); + +  // Parsed glob pattern. +  std::vector<llvm::BitVector> Tokens; + +  // A glob pattern given to this class. This is for error reporting. +  StringRef Original; + +  // The following members are for optimization. +  llvm::Optional<StringRef> Exact; +  llvm::Optional<StringRef> Prefix; +  llvm::Optional<StringRef> Suffix; +}; + +// This class represents multiple glob patterns.  class StringMatcher {  public:    StringMatcher() = default; -  explicit StringMatcher(StringRef P) : Patterns({P}) {} -  explicit StringMatcher(std::vector<StringRef> &&Pat) -      : Patterns(std::move(Pat)) {} +  explicit StringMatcher(const std::vector<StringRef> &Pat); + +  bool match(StringRef S) const; -  bool match(StringRef S);  private: -  std::vector<StringRef> Patterns; +  std::vector<GlobPattern> Patterns;  };  // Returns a demangled C++ symbol name. If Name is not a mangled diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 0d8c62dd475..5c6c8e63897 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -470,12 +470,12 @@ template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) {  // Returns a list of defined symbols that match with a given regex.  template <class ELFT> -std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const Regex &Re) { +std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const StringMatcher &M) {    std::vector<SymbolBody *> Res;    for (Symbol *Sym : SymVector) {      SymbolBody *B = Sym->body();      StringRef Name = B->getName(); -    if (!B->isUndefined() && const_cast<Regex &>(Re).match(Name)) +    if (!B->isUndefined() && M.match(Name))        Res.push_back(B);    }    return Res; @@ -611,10 +611,10 @@ findDemangled(std::map<std::string, std::vector<SymbolBody *>> &D,  static std::vector<SymbolBody *>  findAllDemangled(const std::map<std::string, std::vector<SymbolBody *>> &D, -                 const Regex &Re) { +                 StringMatcher &M) {    std::vector<SymbolBody *> Res;    for (auto &P : D) { -    if (const_cast<Regex &>(Re).match(P.first)) +    if (M.match(P.first))        for (SymbolBody *Body : P.second)          if (!Body->isUndefined())            Res.push_back(Body); @@ -639,8 +639,8 @@ template <class ELFT> void SymbolTable<ELFT>::handleAnonymousVersion() {    }    if (Patterns.empty())      return; -  Regex Re = compileGlobPatterns(Patterns); -  std::vector<SymbolBody *> Syms = findAll(Re); +  StringMatcher M(Patterns); +  std::vector<SymbolBody *> Syms = findAll(M);    for (SymbolBody *B : Syms)      B->symbol()->VersionId = VER_NDX_GLOBAL;  } @@ -696,9 +696,9 @@ template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() {      for (SymbolVersion &Sym : V.Globals) {        if (!Sym.HasWildcards)          continue; -      Regex Re = compileGlobPatterns({Sym.Name}); +      StringMatcher M({Sym.Name});        std::vector<SymbolBody *> Syms = -          Sym.IsExternCpp ? findAllDemangled(Demangled, Re) : findAll(Re); +          Sym.IsExternCpp ? findAllDemangled(Demangled, M) : findAll(M);        // Exact matching takes precendence over fuzzy matching,        // so we set a version to a symbol only if no version has been assigned diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 08cbaefecc7..2f8f4edced5 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -12,9 +12,9 @@  #include "InputFiles.h"  #include "LTO.h" +#include "Strings.h"  #include "llvm/ADT/CachedHashString.h"  #include "llvm/ADT/DenseMap.h" -#include "llvm/Support/Regex.h"  namespace lld {  namespace elf { @@ -92,7 +92,7 @@ public:    void wrap(StringRef Name);  private: -  std::vector<SymbolBody *> findAll(const llvm::Regex &Re); +  std::vector<SymbolBody *> findAll(const StringMatcher &M);    std::pair<Symbol *, bool> insert(StringRef &Name);    std::pair<Symbol *, bool> insert(StringRef &Name, uint8_t Type,                                     uint8_t Visibility, bool CanOmitFromDynSym,  | 

