summaryrefslogtreecommitdiffstats
path: root/llvm/lib/TableGen/TGLexer.h
diff options
context:
space:
mode:
authorVyacheslav Zakharin <vyacheslav.p.zakharin@intel.com>2018-11-27 18:57:43 +0000
committerVyacheslav Zakharin <vyacheslav.p.zakharin@intel.com>2018-11-27 18:57:43 +0000
commitf7d079e93e0ea6c97d03c917c4af9dd812bef2de (patch)
tree42daef0737045e23789eb5c621fbc08788d48905 /llvm/lib/TableGen/TGLexer.h
parent4a3d758ae4aaf3389349a5b76686f3145c76cd24 (diff)
downloadbcm5719-llvm-f7d079e93e0ea6c97d03c917c4af9dd812bef2de.tar.gz
bcm5719-llvm-f7d079e93e0ea6c97d03c917c4af9dd812bef2de.zip
[TableGen] Preprocessing support
Differential Revision: https://reviews.llvm.org/D54926 llvm-svn: 347686
Diffstat (limited to 'llvm/lib/TableGen/TGLexer.h')
-rw-r--r--llvm/lib/TableGen/TGLexer.h243
1 files changed, 238 insertions, 5 deletions
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 2c80743e3a6..e9980b36b97 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -14,11 +14,14 @@
#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
#define LLVM_LIB_TABLEGEN_TGLEXER_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/SMLoc.h"
#include <cassert>
#include <map>
+#include <memory>
#include <string>
namespace llvm {
@@ -59,7 +62,11 @@ namespace tgtok {
BinaryIntVal,
// String valued tokens.
- Id, StrVal, VarName, CodeFragment
+ Id, StrVal, VarName, CodeFragment,
+
+ // Preprocessing tokens for internal usage by the lexer.
+ // They are never returned as a result of Lex().
+ Ifdef, Else, Endif, Define
};
}
@@ -87,10 +94,10 @@ private:
DependenciesMapTy Dependencies;
public:
- TGLexer(SourceMgr &SrcMgr);
+ TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
tgtok::TokKind Lex() {
- return CurCode = LexToken();
+ return CurCode = LexToken(CurPtr == CurBuf.begin());
}
const DependenciesMapTy &getDependencies() const {
@@ -119,12 +126,13 @@ public:
private:
/// LexToken - Read the next token and return its code.
- tgtok::TokKind LexToken();
+ tgtok::TokKind LexToken(bool FileOrLineStart = false);
+ tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
int getNextChar();
- int peekNextChar(int Index);
+ int peekNextChar(int Index) const;
void SkipBCPLComment();
bool SkipCComment();
tgtok::TokKind LexIdentifier();
@@ -134,6 +142,231 @@ private:
tgtok::TokKind LexNumber();
tgtok::TokKind LexBracket();
tgtok::TokKind LexExclaim();
+
+ // Process EOF encountered in LexToken().
+ // If EOF is met in an include file, then the method will update
+ // CurPtr, CurBuf and preprocessing include stack, and return true.
+ // If EOF is met in the top-level file, then the method will
+ // update and check the preprocessing include stack, and return false.
+ bool processEOF();
+
+ // *** Structures and methods for preprocessing support ***
+
+ // A set of macro names that are defined either via command line or
+ // by using:
+ // #define NAME
+ StringSet<> DefinedMacros;
+
+ // Each of #ifdef and #else directives has a descriptor associated
+ // with it.
+ //
+ // An ordered list of preprocessing controls defined by #ifdef/#else
+ // directives that are in effect currently is called preprocessing
+ // control stack. It is represented as a vector of PreprocessorControlDesc's.
+ //
+ // The control stack is updated according to the following rules:
+ //
+ // For each #ifdef we add an element to the control stack.
+ // For each #else we replace the top element with a descriptor
+ // with an inverted IsDefined value.
+ // For each #endif we pop the top element from the control stack.
+ //
+ // When CurPtr reaches the current buffer's end, the control stack
+ // must be empty, i.e. #ifdef and the corresponding #endif
+ // must be located in the same file.
+ struct PreprocessorControlDesc {
+ // Either tgtok::Ifdef or tgtok::Else.
+ tgtok::TokKind Kind;
+
+ // True, if the condition for this directive is true, false - otherwise.
+ // Examples:
+ // #ifdef NAME : true, if NAME is defined, false - otherwise.
+ // ...
+ // #else : false, if NAME is defined, true - otherwise.
+ bool IsDefined;
+
+ // Pointer into CurBuf to the beginning of the preprocessing directive
+ // word, e.g.:
+ // #ifdef NAME
+ // ^ - SrcPos
+ SMLoc SrcPos;
+ };
+
+ // We want to disallow code like this:
+ // file1.td:
+ // #define NAME
+ // #ifdef NAME
+ // include "file2.td"
+ // EOF
+ // file2.td:
+ // #endif
+ // EOF
+ //
+ // To do this, we clear the preprocessing control stack on entry
+ // to each of the included file. PrepIncludeStack is used to store
+ // preprocessing control stacks for the current file and all its
+ // parent files. The back() element is the preprocessing control
+ // stack for the current file.
+ std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
+ PrepIncludeStack;
+
+ // Validate that the current preprocessing control stack is empty,
+ // since we are about to exit a file, and pop the include stack.
+ //
+ // If IncludeStackMustBeEmpty is true, the include stack must be empty
+ // after the popping, otherwise, the include stack must not be empty
+ // after the popping. Basically, the include stack must be empty
+ // only if we exit the "top-level" file (i.e. finish lexing).
+ //
+ // The method returns false, if the current preprocessing control stack
+ // is not empty (e.g. there is an unterminated #ifdef/#else),
+ // true - otherwise.
+ bool prepExitInclude(bool IncludeStackMustBeEmpty);
+
+ // Look ahead for a preprocessing directive starting from CurPtr. The caller
+ // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
+ // a preprocessing directive word followed by a whitespace, then it returns
+ // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
+ //
+ // CurPtr is not adjusted by this method.
+ tgtok::TokKind prepIsDirective() const;
+
+ // Given a preprocessing token kind, adjusts CurPtr to the end
+ // of the preprocessing directive word. Returns true, unless
+ // an unsupported token kind is passed in.
+ //
+ // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
+ // to avoid adjusting CurPtr before we are sure that '#' is followed
+ // by a preprocessing directive. If it is not, then we fall back to
+ // tgtok::paste interpretation of '#'.
+ bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
+
+ // The main "exit" point from the token parsing to preprocessor.
+ //
+ // The method is called for CurPtr, when prepIsDirective() returns
+ // true. The first parameter matches the result of prepIsDirective(),
+ // denoting the actual preprocessor directive to be processed.
+ //
+ // If the preprocessing directive disables the tokens processing, e.g.:
+ // #ifdef NAME // NAME is undefined
+ // then lexPreprocessor() enters the lines-skipping mode.
+ // In this mode, it does not parse any tokens, because the code under
+ // the #ifdef may not even be a correct tablegen code. The preprocessor
+ // looks for lines containing other preprocessing directives, which
+ // may be prepended with whitespaces and C-style comments. If the line
+ // does not contain a preprocessing directive, it is skipped completely.
+ // Otherwise, the preprocessing directive is processed by recursively
+ // calling lexPreprocessor(). The processing of the encountered
+ // preprocessing directives includes updating preprocessing control stack
+ // and adding new macros into DefinedMacros set.
+ //
+ // The second parameter controls whether lexPreprocessor() is called from
+ // LexToken() (true) or recursively from lexPreprocessor() (false).
+ //
+ // If ReturnNextLiveToken is true, the method returns the next
+ // LEX token following the current directive or following the end
+ // of the disabled preprocessing region corresponding to this directive.
+ // If ReturnNextLiveToken is false, the method returns the first parameter,
+ // unless there were errors encountered in the disabled preprocessing
+ // region - in this case, it returns tgtok::Error.
+ tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
+ bool ReturnNextLiveToken = true);
+
+ // Worker method for lexPreprocessor() to skip lines after some
+ // preprocessing directive up to the buffer end or to the directive
+ // that re-enables token processing. The method returns true
+ // upon processing the next directive that re-enables tokens
+ // processing. False is returned if an error was encountered.
+ //
+ // Note that prepSkipRegion() calls lexPreprocessor() to process
+ // encountered preprocessing directives. In this case, the second
+ // parameter to lexPreprocessor() is set to false. Being passed
+ // false ReturnNextLiveToken, lexPreprocessor() must never call
+ // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
+ // to prepSkipRegion() and checking that it is never set to false.
+ bool prepSkipRegion(bool MustNeverBeFalse);
+
+ // Lex name of the macro after either #ifdef or #define. We could have used
+ // LexIdentifier(), but it has special handling of "include" word, which
+ // could result in awkward diagnostic errors. Consider:
+ // ----
+ // #ifdef include
+ // class ...
+ // ----
+ // LexIdentifier() will engage LexInclude(), which will complain about
+ // missing file with name "class". Instead, prepLexMacroName() will treat
+ // "include" as a normal macro name.
+ //
+ // On entry, CurPtr points to the end of a preprocessing directive word.
+ // The method allows for whitespaces between the preprocessing directive
+ // and the macro name. The allowed whitespaces are ' ' and '\t'.
+ //
+ // If the first non-whitespace symbol after the preprocessing directive
+ // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
+ // the method updates TokStart to the position of the first non-whitespace
+ // symbol, sets CurPtr to the position of the macro name's last symbol,
+ // and returns a string reference to the macro name. Otherwise,
+ // TokStart is set to the first non-whitespace symbol after the preprocessing
+ // directive, and the method returns an empty string reference.
+ //
+ // In all cases, TokStart may be used to point to the word following
+ // the preprocessing directive.
+ StringRef prepLexMacroName();
+
+ // Skip any whitespaces starting from CurPtr. The method is used
+ // only in the lines-skipping mode to find the first non-whitespace
+ // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
+ // and '\r'. The method skips C-style comments as well, because
+ // it is used to find the beginning of the preprocessing directive.
+ // If we do not handle C-style comments the following code would
+ // result in incorrect detection of a preprocessing directive:
+ // /*
+ // #ifdef NAME
+ // */
+ // As long as we skip C-style comments, the following code is correctly
+ // recognized as a preprocessing directive:
+ // /* first line comment
+ // second line comment */ #ifdef NAME
+ //
+ // The method returns true upon reaching the first non-whitespace symbol
+ // or EOF, CurPtr is set to point to this symbol. The method returns false,
+ // if an error occured during skipping of a C-style comment.
+ bool prepSkipLineBegin();
+
+ // Skip any whitespaces or comments after a preprocessing directive.
+ // The method returns true upon reaching either end of the line
+ // or end of the file. If there is a multiline C-style comment
+ // after the preprocessing directive, the method skips
+ // the comment, so the final CurPtr may point to one of the next lines.
+ // The method returns false, if an error occured during skipping
+ // C- or C++-style comment, or a non-whitespace symbol appears
+ // after the preprocessing directive.
+ //
+ // The method maybe called both during lines-skipping and tokens
+ // processing. It actually verifies that only whitespaces or/and
+ // comments follow a preprocessing directive.
+ //
+ // After the execution of this mehod, CurPtr points either to new line
+ // symbol, buffer end or non-whitespace symbol following the preprocesing
+ // directive.
+ bool prepSkipDirectiveEnd();
+
+ // Skip all symbols to the end of the line/file.
+ // The method adjusts CurPtr, so that it points to either new line
+ // symbol in the current line or the buffer end.
+ void prepSkipToLineEnd();
+
+ // Return true, if the current preprocessor control stack is such that
+ // we should allow lexer to process the next token, false - otherwise.
+ //
+ // In particular, the method returns true, if all the #ifdef/#else
+ // controls on the stack have their IsDefined member set to true.
+ bool prepIsProcessingEnabled();
+
+ // Report an error, if we reach EOF with non-empty preprocessing control
+ // stack. This means there is no matching #endif for the previous
+ // #ifdef/#else.
+ void prepReportPreprocessorStackError();
};
} // end namespace llvm
OpenPOWER on IntegriCloud