diff options
author | Vyacheslav Zakharin <vyacheslav.p.zakharin@intel.com> | 2018-11-27 18:57:43 +0000 |
---|---|---|
committer | Vyacheslav Zakharin <vyacheslav.p.zakharin@intel.com> | 2018-11-27 18:57:43 +0000 |
commit | f7d079e93e0ea6c97d03c917c4af9dd812bef2de (patch) | |
tree | 42daef0737045e23789eb5c621fbc08788d48905 /llvm/lib/TableGen/TGLexer.h | |
parent | 4a3d758ae4aaf3389349a5b76686f3145c76cd24 (diff) | |
download | bcm5719-llvm-f7d079e93e0ea6c97d03c917c4af9dd812bef2de.tar.gz bcm5719-llvm-f7d079e93e0ea6c97d03c917c4af9dd812bef2de.zip |
[TableGen] Preprocessing support
Differential Revision: https://reviews.llvm.org/D54926
llvm-svn: 347686
Diffstat (limited to 'llvm/lib/TableGen/TGLexer.h')
-rw-r--r-- | llvm/lib/TableGen/TGLexer.h | 243 |
1 files changed, 238 insertions, 5 deletions
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 2c80743e3a6..e9980b36b97 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -14,11 +14,14 @@ #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H #define LLVM_LIB_TABLEGEN_TGLEXER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/SMLoc.h" #include <cassert> #include <map> +#include <memory> #include <string> namespace llvm { @@ -59,7 +62,11 @@ namespace tgtok { BinaryIntVal, // String valued tokens. - Id, StrVal, VarName, CodeFragment + Id, StrVal, VarName, CodeFragment, + + // Preprocessing tokens for internal usage by the lexer. + // They are never returned as a result of Lex(). + Ifdef, Else, Endif, Define }; } @@ -87,10 +94,10 @@ private: DependenciesMapTy Dependencies; public: - TGLexer(SourceMgr &SrcMgr); + TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros); tgtok::TokKind Lex() { - return CurCode = LexToken(); + return CurCode = LexToken(CurPtr == CurBuf.begin()); } const DependenciesMapTy &getDependencies() const { @@ -119,12 +126,13 @@ public: private: /// LexToken - Read the next token and return its code. - tgtok::TokKind LexToken(); + tgtok::TokKind LexToken(bool FileOrLineStart = false); + tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg); tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg); int getNextChar(); - int peekNextChar(int Index); + int peekNextChar(int Index) const; void SkipBCPLComment(); bool SkipCComment(); tgtok::TokKind LexIdentifier(); @@ -134,6 +142,231 @@ private: tgtok::TokKind LexNumber(); tgtok::TokKind LexBracket(); tgtok::TokKind LexExclaim(); + + // Process EOF encountered in LexToken(). + // If EOF is met in an include file, then the method will update + // CurPtr, CurBuf and preprocessing include stack, and return true. + // If EOF is met in the top-level file, then the method will + // update and check the preprocessing include stack, and return false. + bool processEOF(); + + // *** Structures and methods for preprocessing support *** + + // A set of macro names that are defined either via command line or + // by using: + // #define NAME + StringSet<> DefinedMacros; + + // Each of #ifdef and #else directives has a descriptor associated + // with it. + // + // An ordered list of preprocessing controls defined by #ifdef/#else + // directives that are in effect currently is called preprocessing + // control stack. It is represented as a vector of PreprocessorControlDesc's. + // + // The control stack is updated according to the following rules: + // + // For each #ifdef we add an element to the control stack. + // For each #else we replace the top element with a descriptor + // with an inverted IsDefined value. + // For each #endif we pop the top element from the control stack. + // + // When CurPtr reaches the current buffer's end, the control stack + // must be empty, i.e. #ifdef and the corresponding #endif + // must be located in the same file. + struct PreprocessorControlDesc { + // Either tgtok::Ifdef or tgtok::Else. + tgtok::TokKind Kind; + + // True, if the condition for this directive is true, false - otherwise. + // Examples: + // #ifdef NAME : true, if NAME is defined, false - otherwise. + // ... + // #else : false, if NAME is defined, true - otherwise. + bool IsDefined; + + // Pointer into CurBuf to the beginning of the preprocessing directive + // word, e.g.: + // #ifdef NAME + // ^ - SrcPos + SMLoc SrcPos; + }; + + // We want to disallow code like this: + // file1.td: + // #define NAME + // #ifdef NAME + // include "file2.td" + // EOF + // file2.td: + // #endif + // EOF + // + // To do this, we clear the preprocessing control stack on entry + // to each of the included file. PrepIncludeStack is used to store + // preprocessing control stacks for the current file and all its + // parent files. The back() element is the preprocessing control + // stack for the current file. + std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>> + PrepIncludeStack; + + // Validate that the current preprocessing control stack is empty, + // since we are about to exit a file, and pop the include stack. + // + // If IncludeStackMustBeEmpty is true, the include stack must be empty + // after the popping, otherwise, the include stack must not be empty + // after the popping. Basically, the include stack must be empty + // only if we exit the "top-level" file (i.e. finish lexing). + // + // The method returns false, if the current preprocessing control stack + // is not empty (e.g. there is an unterminated #ifdef/#else), + // true - otherwise. + bool prepExitInclude(bool IncludeStackMustBeEmpty); + + // Look ahead for a preprocessing directive starting from CurPtr. The caller + // must only call this method, if *(CurPtr - 1) is '#'. If the method matches + // a preprocessing directive word followed by a whitespace, then it returns + // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define. + // + // CurPtr is not adjusted by this method. + tgtok::TokKind prepIsDirective() const; + + // Given a preprocessing token kind, adjusts CurPtr to the end + // of the preprocessing directive word. Returns true, unless + // an unsupported token kind is passed in. + // + // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective() + // to avoid adjusting CurPtr before we are sure that '#' is followed + // by a preprocessing directive. If it is not, then we fall back to + // tgtok::paste interpretation of '#'. + bool prepEatPreprocessorDirective(tgtok::TokKind Kind); + + // The main "exit" point from the token parsing to preprocessor. + // + // The method is called for CurPtr, when prepIsDirective() returns + // true. The first parameter matches the result of prepIsDirective(), + // denoting the actual preprocessor directive to be processed. + // + // If the preprocessing directive disables the tokens processing, e.g.: + // #ifdef NAME // NAME is undefined + // then lexPreprocessor() enters the lines-skipping mode. + // In this mode, it does not parse any tokens, because the code under + // the #ifdef may not even be a correct tablegen code. The preprocessor + // looks for lines containing other preprocessing directives, which + // may be prepended with whitespaces and C-style comments. If the line + // does not contain a preprocessing directive, it is skipped completely. + // Otherwise, the preprocessing directive is processed by recursively + // calling lexPreprocessor(). The processing of the encountered + // preprocessing directives includes updating preprocessing control stack + // and adding new macros into DefinedMacros set. + // + // The second parameter controls whether lexPreprocessor() is called from + // LexToken() (true) or recursively from lexPreprocessor() (false). + // + // If ReturnNextLiveToken is true, the method returns the next + // LEX token following the current directive or following the end + // of the disabled preprocessing region corresponding to this directive. + // If ReturnNextLiveToken is false, the method returns the first parameter, + // unless there were errors encountered in the disabled preprocessing + // region - in this case, it returns tgtok::Error. + tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind, + bool ReturnNextLiveToken = true); + + // Worker method for lexPreprocessor() to skip lines after some + // preprocessing directive up to the buffer end or to the directive + // that re-enables token processing. The method returns true + // upon processing the next directive that re-enables tokens + // processing. False is returned if an error was encountered. + // + // Note that prepSkipRegion() calls lexPreprocessor() to process + // encountered preprocessing directives. In this case, the second + // parameter to lexPreprocessor() is set to false. Being passed + // false ReturnNextLiveToken, lexPreprocessor() must never call + // prepSkipRegion(). We assert this by passing ReturnNextLiveToken + // to prepSkipRegion() and checking that it is never set to false. + bool prepSkipRegion(bool MustNeverBeFalse); + + // Lex name of the macro after either #ifdef or #define. We could have used + // LexIdentifier(), but it has special handling of "include" word, which + // could result in awkward diagnostic errors. Consider: + // ---- + // #ifdef include + // class ... + // ---- + // LexIdentifier() will engage LexInclude(), which will complain about + // missing file with name "class". Instead, prepLexMacroName() will treat + // "include" as a normal macro name. + // + // On entry, CurPtr points to the end of a preprocessing directive word. + // The method allows for whitespaces between the preprocessing directive + // and the macro name. The allowed whitespaces are ' ' and '\t'. + // + // If the first non-whitespace symbol after the preprocessing directive + // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then + // the method updates TokStart to the position of the first non-whitespace + // symbol, sets CurPtr to the position of the macro name's last symbol, + // and returns a string reference to the macro name. Otherwise, + // TokStart is set to the first non-whitespace symbol after the preprocessing + // directive, and the method returns an empty string reference. + // + // In all cases, TokStart may be used to point to the word following + // the preprocessing directive. + StringRef prepLexMacroName(); + + // Skip any whitespaces starting from CurPtr. The method is used + // only in the lines-skipping mode to find the first non-whitespace + // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n' + // and '\r'. The method skips C-style comments as well, because + // it is used to find the beginning of the preprocessing directive. + // If we do not handle C-style comments the following code would + // result in incorrect detection of a preprocessing directive: + // /* + // #ifdef NAME + // */ + // As long as we skip C-style comments, the following code is correctly + // recognized as a preprocessing directive: + // /* first line comment + // second line comment */ #ifdef NAME + // + // The method returns true upon reaching the first non-whitespace symbol + // or EOF, CurPtr is set to point to this symbol. The method returns false, + // if an error occured during skipping of a C-style comment. + bool prepSkipLineBegin(); + + // Skip any whitespaces or comments after a preprocessing directive. + // The method returns true upon reaching either end of the line + // or end of the file. If there is a multiline C-style comment + // after the preprocessing directive, the method skips + // the comment, so the final CurPtr may point to one of the next lines. + // The method returns false, if an error occured during skipping + // C- or C++-style comment, or a non-whitespace symbol appears + // after the preprocessing directive. + // + // The method maybe called both during lines-skipping and tokens + // processing. It actually verifies that only whitespaces or/and + // comments follow a preprocessing directive. + // + // After the execution of this mehod, CurPtr points either to new line + // symbol, buffer end or non-whitespace symbol following the preprocesing + // directive. + bool prepSkipDirectiveEnd(); + + // Skip all symbols to the end of the line/file. + // The method adjusts CurPtr, so that it points to either new line + // symbol in the current line or the buffer end. + void prepSkipToLineEnd(); + + // Return true, if the current preprocessor control stack is such that + // we should allow lexer to process the next token, false - otherwise. + // + // In particular, the method returns true, if all the #ifdef/#else + // controls on the stack have their IsDefined member set to true. + bool prepIsProcessingEnabled(); + + // Report an error, if we reach EOF with non-empty preprocessing control + // stack. This means there is no matching #endif for the previous + // #ifdef/#else. + void prepReportPreprocessorStackError(); }; } // end namespace llvm |