diff options
| -rw-r--r-- | llvm/include/llvm/ADT/StringExtras.h | 15 | ||||
| -rw-r--r-- | llvm/lib/MC/MCParser/AsmLexer.cpp | 25 | ||||
| -rw-r--r-- | llvm/test/MC/AsmParser/non-english-characters.s | 14 |
3 files changed, 42 insertions, 12 deletions
diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index cc32bf43f29..a9a8c87d0d7 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -59,6 +59,21 @@ static inline unsigned hexDigitValue(char C) { return -1U; } +/// Checks if character \p C is one of the 10 decimal digits. +static inline bool isDigit(char C) { return C >= '0' && C <= '9'; } + +/// Checks if character \p C is a hexadecimal numeric character. +static inline bool isHexDigit(char C) { return hexDigitValue(C) != -1U; } + +/// Checks if character \p C is a valid letter as classified by "C" locale. +static inline bool isAlpha(char C) { + return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z'); +} + +/// Checks whether character \p C is either a decimal digit or an uppercase or +/// lowercase letter as classified by "C" locale. +static inline bool isAlnum(char C) { return isAlpha(C) || isDigit(C); } + static inline std::string utohexstr(uint64_t X, bool LowerCase = false) { char Buffer[17]; char *BufPtr = std::end(Buffer); diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index 2b963607b83..e9123b9d714 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" @@ -68,7 +69,7 @@ int AsmLexer::getNextChar() { /// consumed. AsmToken AsmLexer::LexFloatLiteral() { // Skip the fractional digit sequence. - while (isdigit(*CurPtr)) + while (isDigit(*CurPtr)) ++CurPtr; // Check for exponent; we intentionally accept a slighlty wider set of @@ -78,7 +79,7 @@ AsmToken AsmLexer::LexFloatLiteral() { ++CurPtr; if (*CurPtr == '-' || *CurPtr == '+') ++CurPtr; - while (isdigit(*CurPtr)) + while (isDigit(*CurPtr)) ++CurPtr; } @@ -102,7 +103,7 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { ++CurPtr; const char *FracStart = CurPtr; - while (isxdigit(*CurPtr)) + while (isHexDigit(*CurPtr)) ++CurPtr; NoFracDigits = CurPtr == FracStart; @@ -123,7 +124,7 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { // N.b. exponent digits are *not* hex const char *ExpStart = CurPtr; - while (isdigit(*CurPtr)) + while (isDigit(*CurPtr)) ++CurPtr; if (CurPtr == ExpStart) @@ -135,15 +136,15 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* static bool IsIdentifierChar(char c, bool AllowAt) { - return isalnum(c) || c == '_' || c == '$' || c == '.' || + return isAlnum(c) || c == '_' || c == '$' || c == '.' || (c == '@' && AllowAt) || c == '?'; } AsmToken AsmLexer::LexIdentifier() { // Check for floating point literals. - if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { + if (CurPtr[-1] == '.' && isDigit(*CurPtr)) { // Disambiguate a .1243foo identifier from a floating literal. - while (isdigit(*CurPtr)) + while (isDigit(*CurPtr)) ++CurPtr; if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) @@ -244,9 +245,9 @@ static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { const char *FirstHex = nullptr; const char *LookAhead = CurPtr; while (true) { - if (isdigit(*LookAhead)) { + if (isDigit(*LookAhead)) { ++LookAhead; - } else if (isxdigit(*LookAhead)) { + } else if (isHexDigit(*LookAhead)) { if (!FirstHex) FirstHex = LookAhead; ++LookAhead; @@ -282,7 +283,7 @@ AsmToken AsmLexer::LexDigit() { const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr; const char *OldCurPtr = CurPtr; - while (isxdigit(*CurPtr)) { + while (isHexDigit(*CurPtr)) { if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) FirstNonBinary = CurPtr; ++CurPtr; @@ -346,7 +347,7 @@ AsmToken AsmLexer::LexDigit() { if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { ++CurPtr; // See if we actually have "0b" as part of something like "jmp 0b\n" - if (!isdigit(CurPtr[0])) { + if (!isDigit(CurPtr[0])) { --CurPtr; StringRef Result(TokStart, CurPtr - TokStart); return AsmToken(AsmToken::Integer, Result, 0); @@ -375,7 +376,7 @@ AsmToken AsmLexer::LexDigit() { if ((*CurPtr == 'x') || (*CurPtr == 'X')) { ++CurPtr; const char *NumStart = CurPtr; - while (isxdigit(CurPtr[0])) + while (isHexDigit(CurPtr[0])) ++CurPtr; // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be diff --git a/llvm/test/MC/AsmParser/non-english-characters.s b/llvm/test/MC/AsmParser/non-english-characters.s new file mode 100644 index 00000000000..12d78ee83be --- /dev/null +++ b/llvm/test/MC/AsmParser/non-english-characters.s @@ -0,0 +1,14 @@ +# RUN: llvm-mc -triple i386-linux-gnu -filetype=obj -o %t %s +# RUN: llvm-readobj %t | FileCheck %s +# CHECK: Format: ELF32-i386 + +# 0bÑ +# 0xÑ +# .Ñ4 +# .XÑ +# .1Ñ +# .1eÑ +# 0x.Ñ +# 0x0pÑ +.intel_syntax +# 1Ñ |

