summaryrefslogtreecommitdiffstats
path: root/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Frontend/Rewrite/InclusionRewriter.cpp')
-rw-r--r--clang/lib/Frontend/Rewrite/InclusionRewriter.cpp547
1 files changed, 547 insertions, 0 deletions
diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
new file mode 100644
index 00000000000..aa7017baee2
--- /dev/null
+++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -0,0 +1,547 @@
+//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This code rewrites include invocations into their expansions. This gives you
+// a file with all included files merged into it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Rewrite/Frontend/Rewriters.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/PreprocessorOutputOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+using namespace llvm;
+
+namespace {
+
+class InclusionRewriter : public PPCallbacks {
+ /// Information about which #includes were actually performed,
+ /// created by preprocessor callbacks.
+ struct FileChange {
+ const Module *Mod;
+ SourceLocation From;
+ FileID Id;
+ SrcMgr::CharacteristicKind FileType;
+ FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
+ }
+ };
+ Preprocessor &PP; ///< Used to find inclusion directives.
+ SourceManager &SM; ///< Used to read and manage source files.
+ raw_ostream &OS; ///< The destination stream for rewritten contents.
+ const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
+ bool ShowLineMarkers; ///< Show #line markers.
+ bool UseLineDirective; ///< Use of line directives or line markers.
+ typedef std::map<unsigned, FileChange> FileChangeMap;
+ FileChangeMap FileChanges; ///< Tracks which files were included where.
+ /// Used transitively for building up the FileChanges mapping over the
+ /// various \c PPCallbacks callbacks.
+ FileChangeMap::iterator LastInsertedFileChange;
+public:
+ InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
+ bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
+ void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
+ PredefinesBuffer = Buf;
+ }
+private:
+ void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType,
+ FileID PrevFID) override;
+ void FileSkipped(const FileEntry &ParentFile, const Token &FilenameTok,
+ SrcMgr::CharacteristicKind FileType) override;
+ void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+ StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange, const FileEntry *File,
+ StringRef SearchPath, StringRef RelativePath,
+ const Module *Imported) override;
+ void WriteLineInfo(const char *Filename, int Line,
+ SrcMgr::CharacteristicKind FileType,
+ StringRef EOL, StringRef Extra = StringRef());
+ void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
+ void OutputContentUpTo(const MemoryBuffer &FromFile,
+ unsigned &WriteFrom, unsigned WriteTo,
+ StringRef EOL, int &lines,
+ bool EnsureNewline);
+ void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
+ const MemoryBuffer &FromFile, StringRef EOL,
+ unsigned &NextToWrite, int &Lines);
+ bool HandleHasInclude(FileID FileId, Lexer &RawLex,
+ const DirectoryLookup *Lookup, Token &Tok,
+ bool &FileExists);
+ const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
+ StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
+};
+
+} // end anonymous namespace
+
+/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
+InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
+ bool ShowLineMarkers)
+ : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(nullptr),
+ ShowLineMarkers(ShowLineMarkers),
+ LastInsertedFileChange(FileChanges.end()) {
+ // If we're in microsoft mode, use normal #line instead of line markers.
+ UseLineDirective = PP.getLangOpts().MicrosoftExt;
+}
+
+/// Write appropriate line information as either #line directives or GNU line
+/// markers depending on what mode we're in, including the \p Filename and
+/// \p Line we are located at, using the specified \p EOL line separator, and
+/// any \p Extra context specifiers in GNU line directives.
+void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
+ SrcMgr::CharacteristicKind FileType,
+ StringRef EOL, StringRef Extra) {
+ if (!ShowLineMarkers)
+ return;
+ if (UseLineDirective) {
+ OS << "#line" << ' ' << Line << ' ' << '"';
+ OS.write_escaped(Filename);
+ OS << '"';
+ } else {
+ // Use GNU linemarkers as described here:
+ // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
+ OS << '#' << ' ' << Line << ' ' << '"';
+ OS.write_escaped(Filename);
+ OS << '"';
+ if (!Extra.empty())
+ OS << Extra;
+ if (FileType == SrcMgr::C_System)
+ // "`3' This indicates that the following text comes from a system header
+ // file, so certain warnings should be suppressed."
+ OS << " 3";
+ else if (FileType == SrcMgr::C_ExternCSystem)
+ // as above for `3', plus "`4' This indicates that the following text
+ // should be treated as being wrapped in an implicit extern "C" block."
+ OS << " 3 4";
+ }
+ OS << EOL;
+}
+
+void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
+ StringRef EOL) {
+ OS << "@import " << Mod->getFullModuleName() << ";"
+ << " /* clang -frewrite-includes: implicit import */" << EOL;
+}
+
+/// FileChanged - Whenever the preprocessor enters or exits a #include file
+/// it invokes this handler.
+void InclusionRewriter::FileChanged(SourceLocation Loc,
+ FileChangeReason Reason,
+ SrcMgr::CharacteristicKind NewFileType,
+ FileID) {
+ if (Reason != EnterFile)
+ return;
+ if (LastInsertedFileChange == FileChanges.end())
+ // we didn't reach this file (eg: the main file) via an inclusion directive
+ return;
+ LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
+ LastInsertedFileChange->second.FileType = NewFileType;
+ LastInsertedFileChange = FileChanges.end();
+}
+
+/// Called whenever an inclusion is skipped due to canonical header protection
+/// macros.
+void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
+ const Token &/*FilenameTok*/,
+ SrcMgr::CharacteristicKind /*FileType*/) {
+ assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
+ "found via an inclusion directive, was skipped");
+ FileChanges.erase(LastInsertedFileChange);
+ LastInsertedFileChange = FileChanges.end();
+}
+
+/// This should be called whenever the preprocessor encounters include
+/// directives. It does not say whether the file has been included, but it
+/// provides more information about the directive (hash location instead
+/// of location inside the included file). It is assumed that the matching
+/// FileChanged() or FileSkipped() is called after this.
+void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
+ const Token &/*IncludeTok*/,
+ StringRef /*FileName*/,
+ bool /*IsAngled*/,
+ CharSourceRange /*FilenameRange*/,
+ const FileEntry * /*File*/,
+ StringRef /*SearchPath*/,
+ StringRef /*RelativePath*/,
+ const Module *Imported) {
+ assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
+ "directive was found before the previous one was processed");
+ std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
+ std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
+ assert(p.second && "Unexpected revisitation of the same include directive");
+ if (!Imported)
+ LastInsertedFileChange = p.first;
+}
+
+/// Simple lookup for a SourceLocation (specifically one denoting the hash in
+/// an inclusion directive) in the map of inclusion information, FileChanges.
+const InclusionRewriter::FileChange *
+InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
+ FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
+ if (I != FileChanges.end())
+ return &I->second;
+ return nullptr;
+}
+
+/// Detect the likely line ending style of \p FromFile by examining the first
+/// newline found within it.
+static StringRef DetectEOL(const MemoryBuffer &FromFile) {
+ // detect what line endings the file uses, so that added content does not mix
+ // the style
+ const char *Pos = strchr(FromFile.getBufferStart(), '\n');
+ if (!Pos)
+ return "\n";
+ if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
+ return "\n\r";
+ if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
+ return "\r\n";
+ return "\n";
+}
+
+/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
+/// \p WriteTo - 1.
+void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
+ unsigned &WriteFrom, unsigned WriteTo,
+ StringRef EOL, int &Line,
+ bool EnsureNewline) {
+ if (WriteTo <= WriteFrom)
+ return;
+ if (&FromFile == PredefinesBuffer) {
+ // Ignore the #defines of the predefines buffer.
+ WriteFrom = WriteTo;
+ return;
+ }
+ OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
+ // count lines manually, it's faster than getPresumedLoc()
+ Line += std::count(FromFile.getBufferStart() + WriteFrom,
+ FromFile.getBufferStart() + WriteTo, '\n');
+ if (EnsureNewline) {
+ char LastChar = FromFile.getBufferStart()[WriteTo - 1];
+ if (LastChar != '\n' && LastChar != '\r')
+ OS << EOL;
+ }
+ WriteFrom = WriteTo;
+}
+
+/// Print characters from \p FromFile starting at \p NextToWrite up until the
+/// inclusion directive at \p StartToken, then print out the inclusion
+/// inclusion directive disabled by a #if directive, updating \p NextToWrite
+/// and \p Line to track the number of source lines visited and the progress
+/// through the \p FromFile buffer.
+void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
+ const Token &StartToken,
+ const MemoryBuffer &FromFile,
+ StringRef EOL,
+ unsigned &NextToWrite, int &Line) {
+ OutputContentUpTo(FromFile, NextToWrite,
+ SM.getFileOffset(StartToken.getLocation()), EOL, Line, false);
+ Token DirectiveToken;
+ do {
+ DirectiveLex.LexFromRawLexer(DirectiveToken);
+ } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
+ if (&FromFile == PredefinesBuffer) {
+ // OutputContentUpTo() would not output anything anyway.
+ return;
+ }
+ OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
+ OutputContentUpTo(FromFile, NextToWrite,
+ SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
+ EOL, Line, true);
+ OS << "#endif /* expanded by -frewrite-includes */" << EOL;
+}
+
+/// Find the next identifier in the pragma directive specified by \p RawToken.
+StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
+ Token &RawToken) {
+ RawLex.LexFromRawLexer(RawToken);
+ if (RawToken.is(tok::raw_identifier))
+ PP.LookUpIdentifierInfo(RawToken);
+ if (RawToken.is(tok::identifier))
+ return RawToken.getIdentifierInfo()->getName();
+ return StringRef();
+}
+
+// Expand __has_include and __has_include_next if possible. If there's no
+// definitive answer return false.
+bool InclusionRewriter::HandleHasInclude(
+ FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
+ bool &FileExists) {
+ // Lex the opening paren.
+ RawLex.LexFromRawLexer(Tok);
+ if (Tok.isNot(tok::l_paren))
+ return false;
+
+ RawLex.LexFromRawLexer(Tok);
+
+ SmallString<128> FilenameBuffer;
+ StringRef Filename;
+ // Since the raw lexer doesn't give us angle_literals we have to parse them
+ // ourselves.
+ // FIXME: What to do if the file name is a macro?
+ if (Tok.is(tok::less)) {
+ RawLex.LexFromRawLexer(Tok);
+
+ FilenameBuffer += '<';
+ do {
+ if (Tok.is(tok::eod)) // Sanity check.
+ return false;
+
+ if (Tok.is(tok::raw_identifier))
+ PP.LookUpIdentifierInfo(Tok);
+
+ // Get the string piece.
+ SmallVector<char, 128> TmpBuffer;
+ bool Invalid = false;
+ StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
+ if (Invalid)
+ return false;
+
+ FilenameBuffer += TmpName;
+
+ RawLex.LexFromRawLexer(Tok);
+ } while (Tok.isNot(tok::greater));
+
+ FilenameBuffer += '>';
+ Filename = FilenameBuffer;
+ } else {
+ if (Tok.isNot(tok::string_literal))
+ return false;
+
+ bool Invalid = false;
+ Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
+ if (Invalid)
+ return false;
+ }
+
+ // Lex the closing paren.
+ RawLex.LexFromRawLexer(Tok);
+ if (Tok.isNot(tok::r_paren))
+ return false;
+
+ // Now ask HeaderInfo if it knows about the header.
+ // FIXME: Subframeworks aren't handled here. Do we care?
+ bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
+ const DirectoryLookup *CurDir;
+ const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
+ Filename, SourceLocation(), isAngled, nullptr, CurDir,
+ PP.getSourceManager().getFileEntryForID(FileId), nullptr, nullptr,
+ nullptr, false);
+
+ FileExists = File != nullptr;
+ return true;
+}
+
+/// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
+/// and including content of included files recursively.
+bool InclusionRewriter::Process(FileID FileId,
+ SrcMgr::CharacteristicKind FileType)
+{
+ bool Invalid;
+ const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
+ if (Invalid) // invalid inclusion
+ return false;
+ const char *FileName = FromFile.getBufferIdentifier();
+ Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
+ RawLex.SetCommentRetentionState(false);
+
+ StringRef EOL = DetectEOL(FromFile);
+
+ // Per the GNU docs: "1" indicates entering a new file.
+ if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
+ WriteLineInfo(FileName, 1, FileType, EOL, "");
+ else
+ WriteLineInfo(FileName, 1, FileType, EOL, " 1");
+
+ if (SM.getFileIDSize(FileId) == 0)
+ return false;
+
+ // The next byte to be copied from the source file, which may be non-zero if
+ // the lexer handled a BOM.
+ unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
+ assert(SM.getLineNumber(FileId, NextToWrite) == 1);
+ int Line = 1; // The current input file line number.
+
+ Token RawToken;
+ RawLex.LexFromRawLexer(RawToken);
+
+ // TODO: Consider adding a switch that strips possibly unimportant content,
+ // such as comments, to reduce the size of repro files.
+ while (RawToken.isNot(tok::eof)) {
+ if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
+ RawLex.setParsingPreprocessorDirective(true);
+ Token HashToken = RawToken;
+ RawLex.LexFromRawLexer(RawToken);
+ if (RawToken.is(tok::raw_identifier))
+ PP.LookUpIdentifierInfo(RawToken);
+ if (RawToken.getIdentifierInfo() != nullptr) {
+ switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
+ case tok::pp_include:
+ case tok::pp_include_next:
+ case tok::pp_import: {
+ CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
+ Line);
+ if (FileId != PP.getPredefinesFileID())
+ WriteLineInfo(FileName, Line - 1, FileType, EOL, "");
+ StringRef LineInfoExtra;
+ if (const FileChange *Change = FindFileChangeLocation(
+ HashToken.getLocation())) {
+ if (Change->Mod) {
+ WriteImplicitModuleImport(Change->Mod, EOL);
+
+ // else now include and recursively process the file
+ } else if (Process(Change->Id, Change->FileType)) {
+ // and set lineinfo back to this file, if the nested one was
+ // actually included
+ // `2' indicates returning to a file (after having included
+ // another file.
+ LineInfoExtra = " 2";
+ }
+ }
+ // fix up lineinfo (since commented out directive changed line
+ // numbers) for inclusions that were skipped due to header guards
+ WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
+ break;
+ }
+ case tok::pp_pragma: {
+ StringRef Identifier = NextIdentifierName(RawLex, RawToken);
+ if (Identifier == "clang" || Identifier == "GCC") {
+ if (NextIdentifierName(RawLex, RawToken) == "system_header") {
+ // keep the directive in, commented out
+ CommentOutDirective(RawLex, HashToken, FromFile, EOL,
+ NextToWrite, Line);
+ // update our own type
+ FileType = SM.getFileCharacteristic(RawToken.getLocation());
+ WriteLineInfo(FileName, Line, FileType, EOL);
+ }
+ } else if (Identifier == "once") {
+ // keep the directive in, commented out
+ CommentOutDirective(RawLex, HashToken, FromFile, EOL,
+ NextToWrite, Line);
+ WriteLineInfo(FileName, Line, FileType, EOL);
+ }
+ break;
+ }
+ case tok::pp_if:
+ case tok::pp_elif: {
+ bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
+ tok::pp_elif);
+ // Rewrite special builtin macros to avoid pulling in host details.
+ do {
+ // Walk over the directive.
+ RawLex.LexFromRawLexer(RawToken);
+ if (RawToken.is(tok::raw_identifier))
+ PP.LookUpIdentifierInfo(RawToken);
+
+ if (RawToken.is(tok::identifier)) {
+ bool HasFile;
+ SourceLocation Loc = RawToken.getLocation();
+
+ // Rewrite __has_include(x)
+ if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
+ if (!HandleHasInclude(FileId, RawLex, nullptr, RawToken,
+ HasFile))
+ continue;
+ // Rewrite __has_include_next(x)
+ } else if (RawToken.getIdentifierInfo()->isStr(
+ "__has_include_next")) {
+ const DirectoryLookup *Lookup = PP.GetCurDirLookup();
+ if (Lookup)
+ ++Lookup;
+
+ if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
+ HasFile))
+ continue;
+ } else {
+ continue;
+ }
+ // Replace the macro with (0) or (1), followed by the commented
+ // out macro for reference.
+ OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
+ EOL, Line, false);
+ OS << '(' << (int) HasFile << ")/*";
+ OutputContentUpTo(FromFile, NextToWrite,
+ SM.getFileOffset(RawToken.getLocation()) +
+ RawToken.getLength(),
+ EOL, Line, false);
+ OS << "*/";
+ }
+ } while (RawToken.isNot(tok::eod));
+ if (elif) {
+ OutputContentUpTo(FromFile, NextToWrite,
+ SM.getFileOffset(RawToken.getLocation()) +
+ RawToken.getLength(),
+ EOL, Line, /*EnsureNewLine*/ true);
+ WriteLineInfo(FileName, Line, FileType, EOL);
+ }
+ break;
+ }
+ case tok::pp_endif:
+ case tok::pp_else: {
+ // We surround every #include by #if 0 to comment it out, but that
+ // changes line numbers. These are fixed up right after that, but
+ // the whole #include could be inside a preprocessor conditional
+ // that is not processed. So it is necessary to fix the line
+ // numbers one the next line after each #else/#endif as well.
+ RawLex.SetKeepWhitespaceMode(true);
+ do {
+ RawLex.LexFromRawLexer(RawToken);
+ } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
+ OutputContentUpTo(
+ FromFile, NextToWrite,
+ SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(),
+ EOL, Line, /*EnsureNewLine*/ true);
+ WriteLineInfo(FileName, Line, FileType, EOL);
+ RawLex.SetKeepWhitespaceMode(false);
+ }
+ default:
+ break;
+ }
+ }
+ RawLex.setParsingPreprocessorDirective(false);
+ }
+ RawLex.LexFromRawLexer(RawToken);
+ }
+ OutputContentUpTo(FromFile, NextToWrite,
+ SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line,
+ /*EnsureNewline*/true);
+ return true;
+}
+
+/// InclusionRewriterInInput - Implement -frewrite-includes mode.
+void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
+ const PreprocessorOutputOptions &Opts) {
+ SourceManager &SM = PP.getSourceManager();
+ InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
+ Opts.ShowLineMarkers);
+ PP.addPPCallbacks(Rewrite);
+ PP.IgnorePragmas();
+
+ // First let the preprocessor process the entire file and call callbacks.
+ // Callbacks will record which #include's were actually performed.
+ PP.EnterMainSourceFile();
+ Token Tok;
+ // Only preprocessor directives matter here, so disable macro expansion
+ // everywhere else as an optimization.
+ // TODO: It would be even faster if the preprocessor could be switched
+ // to a mode where it would parse only preprocessor directives and comments,
+ // nothing else matters for parsing or processing.
+ PP.SetMacroExpansionOnlyInDirectives();
+ do {
+ PP.Lex(Tok);
+ } while (Tok.isNot(tok::eof));
+ Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
+ Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
+ Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
+ OS->flush();
+}
OpenPOWER on IntegriCloud