summaryrefslogtreecommitdiffstats
path: root/lld/ELF/Strings.cpp
blob: 349325cb71ddaa6641af3f1cacf614245de9bea7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
//===- Strings.cpp -------------------------------------------------------===//
//
//                             The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "Strings.h"
#include "Config.h"
#include "Error.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Demangle/Demangle.h"
#include <algorithm>
#include <cstring>

using namespace llvm;
using namespace lld;
using namespace lld::elf;

// This is a scanner for the glob pattern.
// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
// (which is a negative form of "[<chars>]"), or a non-meta character.
// This function returns the first token in S.
BitVector GlobPattern::scan(StringRef &S) {
  switch (S[0]) {
  case '*':
    S = S.substr(1);
    // '*' is represented by an empty bitvector.
    // All other bitvectors are 256-bit long.
    return BitVector();
  case '?':
    S = S.substr(1);
    return BitVector(256, true);
  case '[': {
    size_t End = S.find(']', 1);
    if (End == StringRef::npos) {
      error("invalid glob pattern: " + Original);
      return BitVector(256, false);
    }
    StringRef Chars = S.substr(1, End - 1);
    S = S.substr(End + 1);
    if (Chars.startswith("^"))
      return expand(Chars.substr(1)).flip();
    return expand(Chars);
  }
  default:
    BitVector BV(256, false);
    BV[S[0]] = true;
    S = S.substr(1);
    return BV;
  }
}

// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
BitVector GlobPattern::expand(StringRef S) {
  BitVector BV(256, false);

  // Expand "x-y".
  for (;;) {
    if (S.size() < 3)
      break;

    // If it doesn't start with something like "x-y",
    // consume the first character and proceed.
    if (S[1] != '-') {
      BV[S[0]] = true;
      S = S.substr(1);
      continue;
    }

    // It must be in the form of "x-y".
    // Validate it and then interpret the range.
    if (S[0] > S[2]) {
      error("invalid glob pattern: " + Original);
      return BV;
    }
    for (int C = S[0]; C <= S[2]; ++C)
      BV[C] = true;
    S = S.substr(3);
  }

  for (char C : S)
    BV[C] = true;
  return BV;
}

GlobPattern::GlobPattern(StringRef S) : Original(S) {
  if (!hasWildcard(S)) {
    // S doesn't contain any metacharacter,
    // so the regular string comparison should work.
    Exact = S;
  } else if (S.endswith("*") && !hasWildcard(S.drop_back())) {
    // S is something like "foo*". We can use startswith().
    Prefix = S.drop_back();
  } else if (S.startswith("*") && !hasWildcard(S.drop_front())) {
    // S is something like "*foo". We can use endswith().
    Suffix = S.drop_front();
  } else {
    // Otherwise, we need to do real glob pattern matching.
    // Parse the pattern now.
    while (!S.empty())
      Tokens.push_back(scan(S));
  }
}

bool GlobPattern::match(StringRef S) const {
  if (Exact)
    return S == *Exact;
  if (Prefix)
    return S.startswith(*Prefix);
  if (Suffix)
    return S.endswith(*Suffix);
  return matchOne(Tokens, S);
}

// Runs glob pattern Pats against string S.
bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
  for (;;) {
    if (Pats.empty())
      return S.empty();

    // If Pats[0] is '*', try to match Pats[1..] against all possible
    // substrings of S to see at least one pattern succeeds.
    if (Pats[0].size() == 0) {
      Pats = Pats.slice(1);
      if (Pats.empty())
        // Fast path. If a pattern is '*', it matches anything.
        return true;
      for (size_t I = 0, E = S.size(); I < E; ++I)
        if (matchOne(Pats, S.substr(I)))
          return true;
      return false;
    }

    // If Pats[0] is not '*', it must consume one character.
    if (S.empty() || !Pats[0][S[0]])
      return false;
    Pats = Pats.slice(1);
    S = S.substr(1);
  }
}

StringMatcher::StringMatcher(const std::vector<StringRef> &Pat) {
  for (StringRef S : Pat)
    Patterns.push_back(GlobPattern(S));
}

bool StringMatcher::match(StringRef S) const {
  for (const GlobPattern &Pat : Patterns)
    if (Pat.match(S))
      return true;
  return false;
}

// If an input string is in the form of "foo.N" where N is a number,
// return N. Otherwise, returns 65536, which is one greater than the
// lowest priority.
int elf::getPriority(StringRef S) {
  size_t Pos = S.rfind('.');
  if (Pos == StringRef::npos)
    return 65536;
  int V;
  if (S.substr(Pos + 1).getAsInteger(10, V))
    return 65536;
  return V;
}

bool elf::hasWildcard(StringRef S) {
  return S.find_first_of("?*[") != StringRef::npos;
}

StringRef elf::unquote(StringRef S) {
  if (!S.startswith("\""))
    return S;
  return S.substr(1, S.size() - 2);
}

// Converts a hex string (e.g. "deadbeef") to a vector.
std::vector<uint8_t> elf::parseHex(StringRef S) {
  std::vector<uint8_t> Hex;
  while (!S.empty()) {
    StringRef B = S.substr(0, 2);
    S = S.substr(2);
    uint8_t H;
    if (B.getAsInteger(16, H)) {
      error("not a hexadecimal value: " + B);
      return {};
    }
    Hex.push_back(H);
  }
  return Hex;
}

static bool isAlpha(char C) {
  return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_';
}

static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); }

// Returns true if S is valid as a C language identifier.
bool elf::isValidCIdentifier(StringRef S) {
  return !S.empty() && isAlpha(S[0]) &&
         std::all_of(S.begin() + 1, S.end(), isAlnum);
}

// Returns the demangled C++ symbol name for Name.
Optional<std::string> elf::demangle(StringRef Name) {
  // __cxa_demangle can be used to demangle strings other than symbol
  // names which do not necessarily start with "_Z". Name can be
  // either a C or C++ symbol. Don't call __cxa_demangle if the name
  // does not look like a C++ symbol name to avoid getting unexpected
  // result for a C symbol that happens to match a mangled type name.
  if (!Name.startswith("_Z"))
    return None;

  char *Buf = itaniumDemangle(Name.str().c_str(), nullptr, nullptr, nullptr);
  if (!Buf)
    return None;
  std::string S(Buf);
  free(Buf);
  return S;
}
OpenPOWER on IntegriCloud