diff options
Diffstat (limited to 'llvm/lib/MC/MCParser/AsmLexer.cpp')
-rw-r--r-- | llvm/lib/MC/MCParser/AsmLexer.cpp | 137 |
1 files changed, 77 insertions, 60 deletions
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index f653304186b..d56071aea4d 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -23,7 +23,8 @@ using namespace llvm; AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { CurPtr = nullptr; - isAtStartOfLine = true; + IsAtStartOfLine = true; + IsAtStartOfStatement = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -50,20 +51,9 @@ AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { } int AsmLexer::getNextChar() { - char CurChar = *CurPtr++; - switch (CurChar) { - default: - return (unsigned char)CurChar; - case 0: - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (CurPtr - 1 != CurBuf.end()) - return 0; // Just whitespace. - - // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. + if (CurPtr == CurBuf.end()) return EOF; - } + return (unsigned char)*CurPtr++; } /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? @@ -169,43 +159,52 @@ AsmToken AsmLexer::LexIdentifier() { AsmToken AsmLexer::LexSlash() { switch (*CurPtr) { case '*': + IsAtStartOfStatement = false; break; // C style comment. case '/': ++CurPtr; return LexLineComment(); default: - return AsmToken(AsmToken::Slash, StringRef(CurPtr - 1, 1)); + IsAtStartOfStatement = false; + return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); } // C Style comment. ++CurPtr; // skip the star. - while (1) { - int CurChar = getNextChar(); - switch (CurChar) { - case EOF: - return ReturnError(TokStart, "unterminated comment"); + while (CurPtr != CurBuf.end()) { + switch (*CurPtr++) { case '*': // End of the comment? - if (CurPtr[0] != '/') break; - + if (*CurPtr != '/') + break; ++CurPtr; // End the */. - return LexToken(); + return AsmToken(AsmToken::Comment, + StringRef(TokStart, CurPtr - TokStart)); } } + return ReturnError(TokStart, "unterminated comment"); } /// LexLineComment: Comment: #[^\n]* /// : //[^\n]* AsmToken AsmLexer::LexLineComment() { - // FIXME: This is broken if we happen to a comment at the end of a file, which - // was .included, and which doesn't end with a newline. + // Mark This as an end of statement with a body of the + // comment. While it would be nicer to leave this two tokens, + // backwards compatability with TargetParsers makes keeping this in this form + // better. int CurChar = getNextChar(); while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) CurChar = getNextChar(); - if (CurChar == EOF) - return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); - return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); + IsAtStartOfLine = true; + // Whis is a whole line comment. leave newline + if (IsAtStartOfStatement) + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - TokStart)); + IsAtStartOfStatement = true; + + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - 1 - TokStart)); } static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { @@ -423,8 +422,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() { while (!isAtStartOfComment(CurPtr) && // Start of line comment. !isAtStatementSeparator(CurPtr) && // End of statement marker. - *CurPtr != '\n' && *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf.end())) { + *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -433,8 +431,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() { StringRef AsmLexer::LexUntilEndOfLine() { TokStart = CurPtr; - while (*CurPtr != '\n' && *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf.end())) { + while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -444,7 +441,8 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, bool ShouldSkipSpace) { const char *SavedTokStart = TokStart; const char *SavedCurPtr = CurPtr; - bool SavedAtStartOfLine = isAtStartOfLine; + bool SavedAtStartOfLine = IsAtStartOfLine; + bool SavedAtStartOfStatement = IsAtStartOfStatement; bool SavedSkipSpace = SkipSpace; std::string SavedErr = getErr(); @@ -465,7 +463,8 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, SetError(SavedErrLoc, SavedErr); SkipSpace = SavedSkipSpace; - isAtStartOfLine = SavedAtStartOfLine; + IsAtStartOfLine = SavedAtStartOfLine; + IsAtStartOfStatement = SavedAtStartOfStatement; CurPtr = SavedCurPtr; TokStart = SavedTokStart; @@ -495,29 +494,45 @@ AsmToken AsmLexer::LexToken() { // This always consumes at least one character. int CurChar = getNextChar(); - if (isAtStartOfComment(TokStart)) { - // If this comment starts with a '#', then return the Hash token and let - // the assembler parser see if it can be parsed as a cpp line filename - // comment. We do this only if we are at the start of a line. - if (CurChar == '#' && isAtStartOfLine) - return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); - isAtStartOfLine = true; + if (CurChar == '#' && IsAtStartOfStatement) { + // If this starts with a '#', this may be a cpp + // hash directive and otherwise a line comment. + AsmToken TokenBuf[2]; + MutableArrayRef<AsmToken> Buf(TokenBuf, 2); + size_t num = peekTokens(Buf, true); + // There cannot be a space preceeding this + if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && + TokenBuf[1].is(AsmToken::String)) { + CurPtr = TokStart; // reset curPtr; + StringRef s = LexUntilEndOfLine(); + UnLex(TokenBuf[1]); + UnLex(TokenBuf[0]); + return AsmToken(AsmToken::HashDirective, s); + } return LexLineComment(); } + + if (isAtStartOfComment(TokStart)) + return LexLineComment(); + if (isAtStatementSeparator(TokStart)) { CurPtr += strlen(MAI.getSeparatorString()) - 1; + IsAtStartOfLine = true; + IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, strlen(MAI.getSeparatorString()))); } // If we're missing a newline at EOF, make sure we still get an // EndOfStatement token before the Eof token. - if (CurChar == EOF && !isAtStartOfLine) { - isAtStartOfLine = true; + if (CurChar == EOF && !IsAtStartOfStatement) { + IsAtStartOfLine = true; + IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); } - - isAtStartOfLine = false; + IsAtStartOfLine = false; + bool OldIsAtStartOfStatement = IsAtStartOfStatement; + IsAtStartOfStatement = false; switch (CurChar) { default: // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* @@ -526,24 +541,24 @@ AsmToken AsmLexer::LexToken() { // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); - case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + case EOF: + IsAtStartOfLine = true; + IsAtStartOfStatement = true; + return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); case 0: case ' ': case '\t': - if (SkipSpace) { - // Ignore whitespace. - return LexToken(); - } else { - int len = 1; - while (*CurPtr==' ' || *CurPtr=='\t') { - CurPtr++; - len++; - } - return AsmToken(AsmToken::Space, StringRef(TokStart, len)); - } - case '\n': // FALL THROUGH. + IsAtStartOfStatement = OldIsAtStartOfStatement; + while (*CurPtr == ' ' || *CurPtr == '\t') + CurPtr++; + if (SkipSpace) + return LexToken(); // Ignore whitespace. + else + return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); + case '\n': case '\r': - isAtStartOfLine = true; + IsAtStartOfLine = true; + IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); @@ -586,7 +601,9 @@ AsmToken AsmLexer::LexToken() { } return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); - case '/': return LexSlash(); + case '/': + IsAtStartOfStatement = OldIsAtStartOfStatement; + return LexSlash(); case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); case '\'': return LexSingleQuote(); case '"': return LexQuote(); |