diff options
Diffstat (limited to 'llvm/lib/MC/MCParser')
| -rw-r--r-- | llvm/lib/MC/MCParser/AsmLexer.cpp | 137 | ||||
| -rw-r--r-- | llvm/lib/MC/MCParser/AsmParser.cpp | 88 |
2 files changed, 114 insertions, 111 deletions
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index f653304186b..d56071aea4d 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -23,7 +23,8 @@ using namespace llvm; AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { CurPtr = nullptr; - isAtStartOfLine = true; + IsAtStartOfLine = true; + IsAtStartOfStatement = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -50,20 +51,9 @@ AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { } int AsmLexer::getNextChar() { - char CurChar = *CurPtr++; - switch (CurChar) { - default: - return (unsigned char)CurChar; - case 0: - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (CurPtr - 1 != CurBuf.end()) - return 0; // Just whitespace. - - // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. + if (CurPtr == CurBuf.end()) return EOF; - } + return (unsigned char)*CurPtr++; } /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? @@ -169,43 +159,52 @@ AsmToken AsmLexer::LexIdentifier() { AsmToken AsmLexer::LexSlash() { switch (*CurPtr) { case '*': + IsAtStartOfStatement = false; break; // C style comment. case '/': ++CurPtr; return LexLineComment(); default: - return AsmToken(AsmToken::Slash, StringRef(CurPtr - 1, 1)); + IsAtStartOfStatement = false; + return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); } // C Style comment. ++CurPtr; // skip the star. - while (1) { - int CurChar = getNextChar(); - switch (CurChar) { - case EOF: - return ReturnError(TokStart, "unterminated comment"); + while (CurPtr != CurBuf.end()) { + switch (*CurPtr++) { case '*': // End of the comment? - if (CurPtr[0] != '/') break; - + if (*CurPtr != '/') + break; ++CurPtr; // End the */. - return LexToken(); + return AsmToken(AsmToken::Comment, + StringRef(TokStart, CurPtr - TokStart)); } } + return ReturnError(TokStart, "unterminated comment"); } /// LexLineComment: Comment: #[^\n]* /// : //[^\n]* AsmToken AsmLexer::LexLineComment() { - // FIXME: This is broken if we happen to a comment at the end of a file, which - // was .included, and which doesn't end with a newline. + // Mark This as an end of statement with a body of the + // comment. While it would be nicer to leave this two tokens, + // backwards compatability with TargetParsers makes keeping this in this form + // better. int CurChar = getNextChar(); while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) CurChar = getNextChar(); - if (CurChar == EOF) - return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); - return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); + IsAtStartOfLine = true; + // Whis is a whole line comment. leave newline + if (IsAtStartOfStatement) + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - TokStart)); + IsAtStartOfStatement = true; + + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - 1 - TokStart)); } static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { @@ -423,8 +422,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() { while (!isAtStartOfComment(CurPtr) && // Start of line comment. !isAtStatementSeparator(CurPtr) && // End of statement marker. - *CurPtr != '\n' && *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf.end())) { + *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -433,8 +431,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() { StringRef AsmLexer::LexUntilEndOfLine() { TokStart = CurPtr; - while (*CurPtr != '\n' && *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf.end())) { + while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -444,7 +441,8 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, bool ShouldSkipSpace) { const char *SavedTokStart = TokStart; const char *SavedCurPtr = CurPtr; - bool SavedAtStartOfLine = isAtStartOfLine; + bool SavedAtStartOfLine = IsAtStartOfLine; + bool SavedAtStartOfStatement = IsAtStartOfStatement; bool SavedSkipSpace = SkipSpace; std::string SavedErr = getErr(); @@ -465,7 +463,8 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, SetError(SavedErrLoc, SavedErr); SkipSpace = SavedSkipSpace; - isAtStartOfLine = SavedAtStartOfLine; + IsAtStartOfLine = SavedAtStartOfLine; + IsAtStartOfStatement = SavedAtStartOfStatement; CurPtr = SavedCurPtr; TokStart = SavedTokStart; @@ -495,29 +494,45 @@ AsmToken AsmLexer::LexToken() { // This always consumes at least one character. int CurChar = getNextChar(); - if (isAtStartOfComment(TokStart)) { - // If this comment starts with a '#', then return the Hash token and let - // the assembler parser see if it can be parsed as a cpp line filename - // comment. We do this only if we are at the start of a line. - if (CurChar == '#' && isAtStartOfLine) - return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); - isAtStartOfLine = true; + if (CurChar == '#' && IsAtStartOfStatement) { + // If this starts with a '#', this may be a cpp + // hash directive and otherwise a line comment. + AsmToken TokenBuf[2]; + MutableArrayRef<AsmToken> Buf(TokenBuf, 2); + size_t num = peekTokens(Buf, true); + // There cannot be a space preceeding this + if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && + TokenBuf[1].is(AsmToken::String)) { + CurPtr = TokStart; // reset curPtr; + StringRef s = LexUntilEndOfLine(); + UnLex(TokenBuf[1]); + UnLex(TokenBuf[0]); + return AsmToken(AsmToken::HashDirective, s); + } return LexLineComment(); } + + if (isAtStartOfComment(TokStart)) + return LexLineComment(); + if (isAtStatementSeparator(TokStart)) { CurPtr += strlen(MAI.getSeparatorString()) - 1; + IsAtStartOfLine = true; + IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, strlen(MAI.getSeparatorString()))); } // If we're missing a newline at EOF, make sure we still get an // EndOfStatement token before the Eof token. - if (CurChar == EOF && !isAtStartOfLine) { - isAtStartOfLine = true; + if (CurChar == EOF && !IsAtStartOfStatement) { + IsAtStartOfLine = true; + IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); } - - isAtStartOfLine = false; + IsAtStartOfLine = false; + bool OldIsAtStartOfStatement = IsAtStartOfStatement; + IsAtStartOfStatement = false; switch (CurChar) { default: // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* @@ -526,24 +541,24 @@ AsmToken AsmLexer::LexToken() { // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); - case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + case EOF: + IsAtStartOfLine = true; + IsAtStartOfStatement = true; + return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); case 0: case ' ': case '\t': - if (SkipSpace) { - // Ignore whitespace. - return LexToken(); - } else { - int len = 1; - while (*CurPtr==' ' || *CurPtr=='\t') { - CurPtr++; - len++; - } - return AsmToken(AsmToken::Space, StringRef(TokStart, len)); - } - case '\n': // FALL THROUGH. + IsAtStartOfStatement = OldIsAtStartOfStatement; + while (*CurPtr == ' ' || *CurPtr == '\t') + CurPtr++; + if (SkipSpace) + return LexToken(); // Ignore whitespace. + else + return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); + case '\n': case '\r': - isAtStartOfLine = true; + IsAtStartOfLine = true; + IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); @@ -586,7 +601,9 @@ AsmToken AsmLexer::LexToken() { } return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); - case '/': return LexSlash(); + case '/': + IsAtStartOfStatement = OldIsAtStartOfStatement; + return LexSlash(); case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); case '\'': return LexSingleQuote(); case '"': return LexQuote(); diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 5b8c4ee5df1..9a331ec4458 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -257,7 +257,6 @@ private: bool parseStatement(ParseStatementInfo &Info, MCAsmParserSemaCallback *SI); bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites); - void eatToEndOfLine(); bool parseCppHashLineFilenameComment(SMLoc L); void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, @@ -628,6 +627,10 @@ const AsmToken &AsmParser::Lex() { Error(Lexer.getErrLoc(), Lexer.getErr()); const AsmToken *tok = &Lexer.Lex(); + // Drop comments here. + while (tok->is(AsmToken::Comment)) { + tok = &Lexer.Lex(); + } if (tok->is(AsmToken::Eof)) { // If this is the end of an included file, pop the parent file off the @@ -635,7 +638,7 @@ const AsmToken &AsmParser::Lex() { SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); if (ParentIncludeLoc != SMLoc()) { jumpToLoc(ParentIncludeLoc); - tok = &Lexer.Lex(); + return Lex(); } } @@ -720,8 +723,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // first referenced for a source location. We need to add something // to track that. Currently, we just point to the end of the file. HadError |= - Error(getLexer().getLoc(), "assembler local symbol '" + - Sym->getName() + "' not defined"); + Error(getTok().getLoc(), "assembler local symbol '" + + Sym->getName() + "' not defined"); } } @@ -766,7 +769,7 @@ StringRef AsmParser::parseStringToEndOfStatement() { const char *Start = getTok().getLoc().getPointer(); while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) - Lex(); + Lexer.Lex(); const char *End = getTok().getLoc().getPointer(); return StringRef(Start, End - Start); @@ -777,7 +780,7 @@ StringRef AsmParser::parseStringToComma() { while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Comma) && Lexer.isNot(AsmToken::Eof)) - Lex(); + Lexer.Lex(); const char *End = getTok().getLoc().getPointer(); return StringRef(Start, End - Start); @@ -859,7 +862,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { if (!MAI.useParensForSymbolVariant()) { if (FirstTokenKind == AsmToken::String) { if (Lexer.is(AsmToken::At)) { - Lexer.Lex(); // eat @ + Lex(); // eat @ SMLoc AtLoc = getLexer().getLoc(); StringRef VName; if (parseIdentifier(VName)) @@ -871,14 +874,14 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { Split = Identifier.split('@'); } } else if (Lexer.is(AsmToken::LParen)) { - Lexer.Lex(); // eat ( + Lex(); // eat '('. StringRef VName; parseIdentifier(VName); if (Lexer.isNot(AsmToken::RParen)) { return Error(Lexer.getTok().getLoc(), "unexpected token in variant, expected ')'"); } - Lexer.Lex(); // eat ) + Lex(); // eat ')'. Split = std::make_pair(Identifier, VName); } @@ -1343,21 +1346,24 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, /// ::= Label* Identifier OperandList* EndOfStatement bool AsmParser::parseStatement(ParseStatementInfo &Info, MCAsmParserSemaCallback *SI) { + // Eat initial spaces and comments + while (Lexer.is(AsmToken::Space)) + Lex(); if (Lexer.is(AsmToken::EndOfStatement)) { - Out.AddBlankLine(); + // if this is a line comment we can drop it safely + if (getTok().getString().front() == '\r' || + getTok().getString().front() == '\n') + Out.AddBlankLine(); Lex(); return false; } - - // Statements always start with an identifier or are a full line comment. + // Statements always start with an identifier. AsmToken ID = getTok(); SMLoc IDLoc = ID.getLoc(); StringRef IDVal; int64_t LocalLabelVal = -1; - // A full line comment is a '#' as the first token. - if (Lexer.is(AsmToken::Hash)) + if (Lexer.is(AsmToken::HashDirective)) return parseCppHashLineFilenameComment(IDLoc); - // Allow an integer followed by a ':' as a directional local label. if (Lexer.is(AsmToken::Integer)) { LocalLabelVal = getTok().getIntVal(); @@ -1648,7 +1654,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveIncbin(); case DK_CODE16: case DK_CODE16GCC: - return TokError(Twine(IDVal) + " not supported yet"); + return TokError(Twine(IDVal) + + " not currently supported for this target"); case DK_REPT: return parseDirectiveRept(IDLoc, IDVal); case DK_IRP: @@ -1868,37 +1875,20 @@ AsmParser::parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> &AsmStrRewrites) { return true; } -/// eatToEndOfLine uses the Lexer to eat the characters to the end of the line -/// since they may not be able to be tokenized to get to the end of line token. -void AsmParser::eatToEndOfLine() { - if (!Lexer.is(AsmToken::EndOfStatement)) - Lexer.LexUntilEndOfLine(); - // Eat EOL. - Lex(); -} - /// parseCppHashLineFilenameComment as this: /// ::= # number "filename" -/// or just as a full line comment if it doesn't have a number and a string. bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) { Lex(); // Eat the hash token. - - if (getLexer().isNot(AsmToken::Integer)) { - // Consume the line since in cases it is not a well-formed line directive, - // as if were simply a full line comment. - eatToEndOfLine(); - return false; - } - + // Lexer only ever emits HashDirective if it fully formed if it's + // done the checking already so this is an internal error. + assert(getTok().is(AsmToken::Integer) && + "Lexing Cpp line comment: Expected Integer"); int64_t LineNumber = getTok().getIntVal(); Lex(); - - if (getLexer().isNot(AsmToken::String)) { - eatToEndOfLine(); - return false; - } - + assert(getTok().is(AsmToken::String) && + "Lexing Cpp line comment: Expected String"); StringRef Filename = getTok().getString(); + Lex(); // Get rid of the enclosing quotes. Filename = Filename.substr(1, Filename.size() - 2); @@ -1907,9 +1897,6 @@ bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) { CppHashInfo.Filename = Filename; CppHashInfo.LineNumber = LineNumber; CppHashInfo.Buf = CurBuffer; - - // Ignore any trailing characters, they're just comment. - eatToEndOfLine(); return false; } @@ -2268,7 +2255,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M, break; if (FAI >= NParameters) { - assert(M && "expected macro to be defined"); + assert(M && "expected macro to be defined"); Error(IDLoc, "parameter named '" + FA.Name + "' does not exist for macro '" + M->Name + "'"); @@ -2426,7 +2413,7 @@ bool AsmParser::parseIdentifier(StringRef &Res) { // Construct the joined identifier and consume the token. Res = StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1); - Lexer.Lex(); // Lexer's Lex guarantees consecutive token + Lex(); // Parser Lex to maintain invariants. return false; } @@ -2568,16 +2555,16 @@ bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) { if (Lexer.isNot(AsmToken::Comma)) return TokError("expected comma"); - Lexer.Lex(); + Lex(); if (Lexer.isNot(AsmToken::Identifier)) return TokError("expected relocation name"); SMLoc NameLoc = Lexer.getTok().getLoc(); StringRef Name = Lexer.getTok().getIdentifier(); - Lexer.Lex(); + Lex(); if (Lexer.is(AsmToken::Comma)) { - Lexer.Lex(); + Lex(); SMLoc ExprLoc = Lexer.getLoc(); if (parseExpression(Expr)) return true; @@ -5250,10 +5237,9 @@ static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *Value) { bool parseAssignmentExpression(StringRef Name, bool allow_redef, MCAsmParser &Parser, MCSymbol *&Sym, const MCExpr *&Value) { - MCAsmLexer &Lexer = Parser.getLexer(); // FIXME: Use better location, we should use proper tokens. - SMLoc EqualLoc = Lexer.getLoc(); + SMLoc EqualLoc = Parser.getTok().getLoc(); if (Parser.parseExpression(Value)) { Parser.TokError("missing expression"); @@ -5265,7 +5251,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, // a = b // b = c - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (Parser.getTok().isNot(AsmToken::EndOfStatement)) return Parser.TokError("unexpected token in assignment"); // Eat the end of statement marker. |

