diff options
Diffstat (limited to 'llvm/lib/TableGen/TGLexer.cpp')
-rw-r--r-- | llvm/lib/TableGen/TGLexer.cpp | 557 |
1 files changed, 22 insertions, 535 deletions
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index fcabce5329c..652be6e8dbb 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -19,7 +19,6 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/TableGen/Error.h" -#include <algorithm> #include <cctype> #include <cerrno> #include <cstdint> @@ -29,35 +28,11 @@ using namespace llvm; -namespace { -// A list of supported preprocessing directives with their -// internal token kinds and names. -struct { - tgtok::TokKind Kind; - const char *Word; -} PreprocessorDirs[] = { - { tgtok::Ifdef, "ifdef" }, - { tgtok::Else, "else" }, - { tgtok::Endif, "endif" }, - { tgtok::Define, "define" } -}; -} // end anonymous namespace - -TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) { +TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) { CurBuffer = SrcMgr.getMainFileID(); CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); CurPtr = CurBuf.begin(); TokStart = nullptr; - - // Pretend that we enter the "top-level" include file. - PrepIncludeStack.push_back( - make_unique<std::vector<PreprocessorControlDesc>>()); - - // Put all macros defined in the command line into the DefinedMacros set. - std::for_each(Macros.begin(), Macros.end(), - [this](const std::string &MacroName) { - DefinedMacros.insert(MacroName); - }); } SMLoc TGLexer::getLoc() const { @@ -66,42 +41,11 @@ SMLoc TGLexer::getLoc() const { /// ReturnError - Set the error to the specified string at the specified /// location. This is defined to always return tgtok::Error. -tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) { +tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { PrintError(Loc, Msg); return tgtok::Error; } -tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { - return ReturnError(SMLoc::getFromPointer(Loc), Msg); -} - -bool TGLexer::processEOF() { - SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); - if (ParentIncludeLoc != SMLoc()) { - // If prepExitInclude() detects a problem with the preprocessing - // control stack, it will return false. Pretend that we reached - // the final EOF and stop lexing more tokens by returning false - // to LexToken(). - if (!prepExitInclude(false)) - return false; - - CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); - CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); - CurPtr = ParentIncludeLoc.getPointer(); - // Make sure TokStart points into the parent file's buffer. - // LexToken() assigns to it before calling getNextChar(), - // so it is pointing into the included file now. - TokStart = CurPtr; - return true; - } - - // Pretend that we exit the "top-level" include file. - // Note that in case of an error (e.g. control stack imbalance) - // the routine will issue a fatal error. - prepExitInclude(true); - return false; -} - int TGLexer::getNextChar() { char CurChar = *CurPtr++; switch (CurChar) { @@ -113,6 +57,16 @@ int TGLexer::getNextChar() { if (CurPtr-1 != CurBuf.end()) return 0; // Just whitespace. + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); + CurPtr = ParentIncludeLoc.getPointer(); + return getNextChar(); + } + // Otherwise, return end of file. --CurPtr; // Another call to lex will return EOF again. return EOF; @@ -129,11 +83,11 @@ int TGLexer::getNextChar() { } } -int TGLexer::peekNextChar(int Index) const { +int TGLexer::peekNextChar(int Index) { return *(CurPtr + Index); } -tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { +tgtok::TokKind TGLexer::LexToken() { TokStart = CurPtr; // This always consumes at least one character. int CurChar = getNextChar(); @@ -146,18 +100,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { // Unknown character, emit an error. return ReturnError(TokStart, "Unexpected character"); - case EOF: - // Lex next token, if we just left an include file. - // Note that leaving an include file means that the next - // symbol is located at the end of 'include "..."' - // construct, so LexToken() is called with default - // false parameter. - if (processEOF()) - return LexToken(); - - // Return EOF denoting the end of lexing. - return tgtok::Eof; - + case EOF: return tgtok::Eof; case ':': return tgtok::colon; case ';': return tgtok::semi; case '.': return tgtok::period; @@ -171,27 +114,15 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { case ')': return tgtok::r_paren; case '=': return tgtok::equal; case '?': return tgtok::question; - case '#': - if (FileOrLineStart) { - tgtok::TokKind Kind = prepIsDirective(); - if (Kind != tgtok::Error) - return lexPreprocessor(Kind); - } - - return tgtok::paste; - - case '\r': - PrintFatalError("getNextChar() must never return '\r'"); - return tgtok::Error; + case '#': return tgtok::paste; case 0: case ' ': case '\t': - // Ignore whitespace. - return LexToken(FileOrLineStart); case '\n': - // Ignore whitespace, and identify the new line. - return LexToken(true); + case '\r': + // Ignore whitespace. + return LexToken(); case '/': // If this is the start of a // comment, skip until the end of the line or // the end of the buffer. @@ -202,7 +133,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { return tgtok::Error; } else // Otherwise, this is an error. return ReturnError(TokStart, "Unexpected character"); - return LexToken(FileOrLineStart); + return LexToken(); case '-': case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { @@ -318,10 +249,10 @@ tgtok::TokKind TGLexer::LexVarName() { } tgtok::TokKind TGLexer::LexIdentifier() { - // The first letter is [a-zA-Z_]. + // The first letter is [a-zA-Z_#]. const char *IdentStart = TokStart; - // Match the rest of the identifier regex: [0-9a-zA-Z_]* + // Match the rest of the identifier regex: [0-9a-zA-Z_#]* while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') ++CurPtr; @@ -391,9 +322,6 @@ bool TGLexer::LexInclude() { // Save the line number and lex buffer of the includer. CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); CurPtr = CurBuf.begin(); - - PrepIncludeStack.push_back( - make_unique<std::vector<PreprocessorControlDesc>>()); return false; } @@ -568,444 +496,3 @@ tgtok::TokKind TGLexer::LexExclaim() { return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); } - -bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { - // Report an error, if preprocessor control stack for the current - // file is not empty. - if (!PrepIncludeStack.back()->empty()) { - prepReportPreprocessorStackError(); - - return false; - } - - // Pop the preprocessing controls from the include stack. - if (PrepIncludeStack.empty()) { - PrintFatalError("Preprocessor include stack is empty"); - } - - PrepIncludeStack.pop_back(); - - if (IncludeStackMustBeEmpty) { - if (!PrepIncludeStack.empty()) - PrintFatalError("Preprocessor include stack is not empty"); - } else { - if (PrepIncludeStack.empty()) - PrintFatalError("Preprocessor include stack is empty"); - } - - return true; -} - -tgtok::TokKind TGLexer::prepIsDirective() const { - for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) { - int NextChar = *CurPtr; - bool Match = true; - unsigned I = 0; - for (; I < strlen(PreprocessorDirs[ID].Word); ++I) { - if (NextChar != PreprocessorDirs[ID].Word[I]) { - Match = false; - break; - } - - NextChar = peekNextChar(I + 1); - } - - // Check for whitespace after the directive. If there is no whitespace, - // then we do not recognize it as a preprocessing directive. - if (Match) { - tgtok::TokKind Kind = PreprocessorDirs[ID].Kind; - - // New line and EOF may follow only #else/#endif. It will be reported - // as an error for #ifdef/#define after the call to prepLexMacroName(). - if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF || - NextChar == '\n' || - // It looks like TableGen does not support '\r' as the actual - // carriage return, e.g. getNextChar() treats a single '\r' - // as '\n'. So we do the same here. - NextChar == '\r') - return Kind; - - // Allow comments after some directives, e.g.: - // #else// OR #else/**/ - // #endif// OR #endif/**/ - // - // Note that we do allow comments after #ifdef/#define here, e.g. - // #ifdef/**/ AND #ifdef// - // #define/**/ AND #define// - // - // These cases will be reported as incorrect after calling - // prepLexMacroName(). We could have supported C-style comments - // after #ifdef/#define, but this would complicate the code - // for little benefit. - if (NextChar == '/') { - NextChar = peekNextChar(I + 1); - - if (NextChar == '*' || NextChar == '/') - return Kind; - - // Pretend that we do not recognize the directive. - } - } - } - - return tgtok::Error; -} - -bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { - TokStart = CurPtr; - - for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) - if (PreprocessorDirs[ID].Kind == Kind) { - // Advance CurPtr to the end of the preprocessing word. - CurPtr += strlen(PreprocessorDirs[ID].Word); - return true; - } - - PrintFatalError("Unsupported preprocessing token in " - "prepEatPreprocessorDirective()"); - return false; -} - -tgtok::TokKind TGLexer::lexPreprocessor( - tgtok::TokKind Kind, bool ReturnNextLiveToken) { - - // We must be looking at a preprocessing directive. Eat it! - if (!prepEatPreprocessorDirective(Kind)) - PrintFatalError("lexPreprocessor() called for unknown " - "preprocessor directive"); - - if (Kind == tgtok::Ifdef) { - StringRef MacroName = prepLexMacroName(); - if (MacroName.empty()) - return ReturnError(TokStart, "Expected macro name after #ifdef"); - - bool MacroIsDefined = DefinedMacros.count(MacroName) != 0; - - // Regardless of whether we are processing tokens or not, - // we put the #ifdef control on stack. - PrepIncludeStack.back()->push_back( - {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)}); - - if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, - "Only comments are supported after #ifdef NAME"); - - // If we were not processing tokens before this #ifdef, - // then just return back to the lines skipping code. - if (!ReturnNextLiveToken) - return Kind; - - // If we were processing tokens before this #ifdef, - // and the macro is defined, then just return the next token. - if (MacroIsDefined) - return LexToken(); - - // We were processing tokens before this #ifdef, and the macro - // is not defined, so we have to start skipping the lines. - // If the skipping is successful, it will return the token following - // either #else or #endif corresponding to this #ifdef. - if (prepSkipRegion(ReturnNextLiveToken)) - return LexToken(); - - return tgtok::Error; - } else if (Kind == tgtok::Else) { - // Check if this #else is correct before calling prepSkipDirectiveEnd(), - // which will move CurPtr away from the beginning of #else. - if (PrepIncludeStack.back()->empty()) - return ReturnError(TokStart, "#else without #ifdef"); - - auto &IfdefEntry = PrepIncludeStack.back()->back(); - - if (IfdefEntry.Kind != tgtok::Ifdef) { - PrintError(TokStart, "double #else"); - return ReturnError(IfdefEntry.SrcPos, "Previous #else is here"); - } - - // Replace the corresponding #ifdef's control with its negation - // on the control stack. - PrepIncludeStack.back()->pop_back(); - PrepIncludeStack.back()->push_back( - {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)}); - - if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, "Only comments are supported after #else"); - - // If we were processing tokens before this #else, - // we have to start skipping lines until the matching #endif. - if (ReturnNextLiveToken) { - if (prepSkipRegion(ReturnNextLiveToken)) - return LexToken(); - - return tgtok::Error; - } - - // Return to the lines skipping code. - return Kind; - } else if (Kind == tgtok::Endif) { - // Check if this #endif is correct before calling prepSkipDirectiveEnd(), - // which will move CurPtr away from the beginning of #endif. - if (PrepIncludeStack.back()->empty()) - return ReturnError(TokStart, "#endif without #ifdef"); - - auto &IfdefOrElseEntry = PrepIncludeStack.back()->back(); - - if (IfdefOrElseEntry.Kind != tgtok::Ifdef && - IfdefOrElseEntry.Kind != tgtok::Else) { - PrintFatalError("Invalid preprocessor control on the stack"); - return tgtok::Error; - } - - if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, "Only comments are supported after #endif"); - - PrepIncludeStack.back()->pop_back(); - - // If we were processing tokens before this #endif, then - // we should continue it. - if (ReturnNextLiveToken) { - return LexToken(); - } - - // Return to the lines skipping code. - return Kind; - } else if (Kind == tgtok::Define) { - StringRef MacroName = prepLexMacroName(); - if (MacroName.empty()) - return ReturnError(TokStart, "Expected macro name after #define"); - - if (!DefinedMacros.insert(MacroName).second) - PrintWarning(getLoc(), - "Duplicate definition of macro: " + Twine(MacroName)); - - if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, - "Only comments are supported after #define NAME"); - - if (!ReturnNextLiveToken) { - PrintFatalError("#define must be ignored during the lines skipping"); - return tgtok::Error; - } - - return LexToken(); - } - - PrintFatalError("Preprocessing directive is not supported"); - return tgtok::Error; -} - -bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { - if (!MustNeverBeFalse) - PrintFatalError("Invalid recursion."); - - do { - // Skip all symbols to the line end. - prepSkipToLineEnd(); - - // Find the first non-whitespace symbol in the next line(s). - if (!prepSkipLineBegin()) - return false; - - // If the first non-blank/comment symbol on the line is '#', - // it may be a start of preprocessing directive. - // - // If it is not '#' just go to the next line. - if (*CurPtr == '#') - ++CurPtr; - else - continue; - - tgtok::TokKind Kind = prepIsDirective(); - - // If we did not find a preprocessing directive or it is #define, - // then just skip to the next line. We do not have to do anything - // for #define in the line-skipping mode. - if (Kind == tgtok::Error || Kind == tgtok::Define) - continue; - - tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false); - - // If lexPreprocessor() encountered an error during lexing this - // preprocessor idiom, then return false to the calling lexPreprocessor(). - // This will force tgtok::Error to be returned to the tokens processing. - if (ProcessedKind == tgtok::Error) - return false; - - if (Kind != ProcessedKind) - PrintFatalError("prepIsDirective() and lexPreprocessor() " - "returned different token kinds"); - - // If this preprocessing directive enables tokens processing, - // then return to the lexPreprocessor() and get to the next token. - // We can move from line-skipping mode to processing tokens only - // due to #else or #endif. - if (prepIsProcessingEnabled()) { - if (Kind != tgtok::Else && Kind != tgtok::Endif) { - PrintFatalError("Tokens processing was enabled by an unexpected " - "preprocessing directive"); - return false; - } - - return true; - } - } while (CurPtr != CurBuf.end()); - - // We have reached the end of the file, but never left the lines-skipping - // mode. This means there is no matching #endif. - prepReportPreprocessorStackError(); - return false; -} - -StringRef TGLexer::prepLexMacroName() { - // Skip whitespaces between the preprocessing directive and the macro name. - while (*CurPtr == ' ' || *CurPtr == '\t') - ++CurPtr; - - TokStart = CurPtr; - // Macro names start with [a-zA-Z_]. - if (*CurPtr != '_' && !isalpha(*CurPtr)) - return ""; - - // Match the rest of the identifier regex: [0-9a-zA-Z_]* - while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') - ++CurPtr; - - return StringRef(TokStart, CurPtr - TokStart); -} - -bool TGLexer::prepSkipLineBegin() { - while (CurPtr != CurBuf.end()) { - switch (*CurPtr) { - case ' ': - case '\t': - case '\n': - case '\r': - break; - - case '/': { - int NextChar = peekNextChar(1); - if (NextChar == '*') { - // Skip C-style comment. - // Note that we do not care about skipping the C++-style comments. - // If the line contains "//", it may not contain any processable - // preprocessing directive. Just return CurPtr pointing to - // the first '/' in this case. We also do not care about - // incorrect symbols after the first '/' - we are in lines-skipping - // mode, so incorrect code is allowed to some extent. - - // Set TokStart to the beginning of the comment to enable proper - // diagnostic printing in case of error in SkipCComment(). - TokStart = CurPtr; - - // CurPtr must point to '*' before call to SkipCComment(). - ++CurPtr; - if (SkipCComment()) - return false; - } else { - // CurPtr points to the non-whitespace '/'. - return true; - } - - // We must not increment CurPtr after the comment was lexed. - continue; - } - - default: - return true; - } - - ++CurPtr; - } - - // We have reached the end of the file. Return to the lines skipping - // code, and allow it to handle the EOF as needed. - return true; -} - -bool TGLexer::prepSkipDirectiveEnd() { - while (CurPtr != CurBuf.end()) { - switch (*CurPtr) { - case ' ': - case '\t': - break; - - case '\n': - case '\r': - return true; - - case '/': { - int NextChar = peekNextChar(1); - if (NextChar == '/') { - // Skip C++-style comment. - // We may just return true now, but let's skip to the line/buffer end - // to simplify the method specification. - ++CurPtr; - SkipBCPLComment(); - } else if (NextChar == '*') { - // When we are skipping C-style comment at the end of a preprocessing - // directive, we can skip several lines. If any meaningful TD token - // follows the end of the C-style comment on the same line, it will - // be considered as an invalid usage of TD token. - // For example, we want to forbid usages like this one: - // #define MACRO class Class {} - // But with C-style comments we also disallow the following: - // #define MACRO /* This macro is used - // to ... */ class Class {} - // One can argue that this should be allowed, but it does not seem - // to be worth of the complication. Moreover, this matches - // the C preprocessor behavior. - - // Set TokStart to the beginning of the comment to enable proper - // diagnostic printer in case of error in SkipCComment(). - TokStart = CurPtr; - ++CurPtr; - if (SkipCComment()) - return false; - } else { - TokStart = CurPtr; - PrintError(CurPtr, "Unexpected character"); - return false; - } - - // We must not increment CurPtr after the comment was lexed. - continue; - } - - default: - // Do not allow any non-whitespaces after the directive. - TokStart = CurPtr; - return false; - } - - ++CurPtr; - } - - return true; -} - -void TGLexer::prepSkipToLineEnd() { - while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) - ++CurPtr; -} - -bool TGLexer::prepIsProcessingEnabled() { - for (auto I = PrepIncludeStack.back()->rbegin(), - E = PrepIncludeStack.back()->rend(); - I != E; ++I) { - if (!I->IsDefined) - return false; - } - - return true; -} - -void TGLexer::prepReportPreprocessorStackError() { - if (PrepIncludeStack.back()->empty()) - PrintFatalError("prepReportPreprocessorStackError() called with " - "empty control stack"); - - auto &PrepControl = PrepIncludeStack.back()->back(); - PrintError(CurBuf.end(), "Reached EOF without matching #endif"); - PrintError(PrepControl.SrcPos, "The latest preprocessor control is here"); - - TokStart = CurPtr; -} |