1 files changed, 547 insertions, 0 deletions
diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
new file mode 100644
index 00000000000..aa7017baee2
--- /dev/null
+++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -0,0 +1,547 @@
+//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This code rewrites include invocations into their expansions.  This gives you
+// a file with all included files merged into it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Rewrite/Frontend/Rewriters.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/PreprocessorOutputOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+using namespace llvm;
+
+namespace {
+
+class InclusionRewriter : public PPCallbacks {
+  /// Information about which #includes were actually performed,
+  /// created by preprocessor callbacks.
+  struct FileChange {
+    const Module *Mod;
+    SourceLocation From;
+    FileID Id;
+    SrcMgr::CharacteristicKind FileType;
+    FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
+    }
+  };
+  Preprocessor &PP; ///< Used to find inclusion directives.
+  SourceManager &SM; ///< Used to read and manage source files.
+  raw_ostream &OS; ///< The destination stream for rewritten contents.
+  const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
+  bool ShowLineMarkers; ///< Show #line markers.
+  bool UseLineDirective; ///< Use of line directives or line markers.
+  typedef std::map<unsigned, FileChange> FileChangeMap;
+  FileChangeMap FileChanges; ///< Tracks which files were included where.
+  /// Used transitively for building up the FileChanges mapping over the
+  /// various \c PPCallbacks callbacks.
+  FileChangeMap::iterator LastInsertedFileChange;
+public:
+  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
+  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
+  void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
+    PredefinesBuffer = Buf;
+  }
+private:
+  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+                   SrcMgr::CharacteristicKind FileType,
+                   FileID PrevFID) override;
+  void FileSkipped(const FileEntry &ParentFile, const Token &FilenameTok,
+                   SrcMgr::CharacteristicKind FileType) override;
+  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+                          StringRef FileName, bool IsAngled,
+                          CharSourceRange FilenameRange, const FileEntry *File,
+                          StringRef SearchPath, StringRef RelativePath,
+                          const Module *Imported) override;
+  void WriteLineInfo(const char *Filename, int Line,
+                     SrcMgr::CharacteristicKind FileType,
+                     StringRef EOL, StringRef Extra = StringRef());
+  void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
+  void OutputContentUpTo(const MemoryBuffer &FromFile,
+                         unsigned &WriteFrom, unsigned WriteTo,
+                         StringRef EOL, int &lines,
+                         bool EnsureNewline);
+  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
+                           const MemoryBuffer &FromFile, StringRef EOL,
+                           unsigned &NextToWrite, int &Lines);
+  bool HandleHasInclude(FileID FileId, Lexer &RawLex,
+                        const DirectoryLookup *Lookup, Token &Tok,
+                        bool &FileExists);
+  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
+  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
+};
+
+}  // end anonymous namespace
+
+/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
+InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
+                                     bool ShowLineMarkers)
+  : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(nullptr),
+    ShowLineMarkers(ShowLineMarkers),
+    LastInsertedFileChange(FileChanges.end()) {
+  // If we're in microsoft mode, use normal #line instead of line markers.
+  UseLineDirective = PP.getLangOpts().MicrosoftExt;
+}
+
+/// Write appropriate line information as either #line directives or GNU line
+/// markers depending on what mode we're in, including the \p Filename and
+/// \p Line we are located at, using the specified \p EOL line separator, and
+/// any \p Extra context specifiers in GNU line directives.
+void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
+                                      SrcMgr::CharacteristicKind FileType,
+                                      StringRef EOL, StringRef Extra) {
+  if (!ShowLineMarkers)
+    return;
+  if (UseLineDirective) {
+    OS << "#line" << ' ' << Line << ' ' << '"';
+    OS.write_escaped(Filename);
+    OS << '"';
+  } else {
+    // Use GNU linemarkers as described here:
+    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
+    OS << '#' << ' ' << Line << ' ' << '"';
+    OS.write_escaped(Filename);
+    OS << '"';
+    if (!Extra.empty())
+      OS << Extra;
+    if (FileType == SrcMgr::C_System)
+      // "`3' This indicates that the following text comes from a system header
+      // file, so certain warnings should be suppressed."
+      OS << " 3";
+    else if (FileType == SrcMgr::C_ExternCSystem)
+      // as above for `3', plus "`4' This indicates that the following text
+      // should be treated as being wrapped in an implicit extern "C" block."
+      OS << " 3 4";
+  }
+  OS << EOL;
+}
+
+void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
+                                                  StringRef EOL) {
+  OS << "@import " << Mod->getFullModuleName() << ";"
+     << " /* clang -frewrite-includes: implicit import */" << EOL;
+}
+
+/// FileChanged - Whenever the preprocessor enters or exits a #include file
+/// it invokes this handler.
+void InclusionRewriter::FileChanged(SourceLocation Loc,
+                                    FileChangeReason Reason,
+                                    SrcMgr::CharacteristicKind NewFileType,
+                                    FileID) {
+  if (Reason != EnterFile)
+    return;
+  if (LastInsertedFileChange == FileChanges.end())
+    // we didn't reach this file (eg: the main file) via an inclusion directive
+    return;
+  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
+  LastInsertedFileChange->second.FileType = NewFileType;
+  LastInsertedFileChange = FileChanges.end();
+}
+
+/// Called whenever an inclusion is skipped due to canonical header protection
+/// macros.
+void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
+                                    const Token &/*FilenameTok*/,
+                                    SrcMgr::CharacteristicKind /*FileType*/) {
+  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
+    "found via an inclusion directive, was skipped");
+  FileChanges.erase(LastInsertedFileChange);
+  LastInsertedFileChange = FileChanges.end();
+}
+
+/// This should be called whenever the preprocessor encounters include
+/// directives. It does not say whether the file has been included, but it
+/// provides more information about the directive (hash location instead
+/// of location inside the included file). It is assumed that the matching
+/// FileChanged() or FileSkipped() is called after this.
+void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
+                                           const Token &/*IncludeTok*/,
+                                           StringRef /*FileName*/,
+                                           bool /*IsAngled*/,
+                                           CharSourceRange /*FilenameRange*/,
+                                           const FileEntry * /*File*/,
+                                           StringRef /*SearchPath*/,
+                                           StringRef /*RelativePath*/,
+                                           const Module *Imported) {
+  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
+    "directive was found before the previous one was processed");
+  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
+    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
+  assert(p.second && "Unexpected revisitation of the same include directive");
+  if (!Imported)
+    LastInsertedFileChange = p.first;
+}
+
+/// Simple lookup for a SourceLocation (specifically one denoting the hash in
+/// an inclusion directive) in the map of inclusion information, FileChanges.
+const InclusionRewriter::FileChange *
+InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
+  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
+  if (I != FileChanges.end())
+    return &I->second;
+  return nullptr;
+}
+
+/// Detect the likely line ending style of \p FromFile by examining the first
+/// newline found within it.
+static StringRef DetectEOL(const MemoryBuffer &FromFile) {
+  // detect what line endings the file uses, so that added content does not mix
+  // the style
+  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
+  if (!Pos)
+    return "\n";
+  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
+    return "\n\r";
+  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
+    return "\r\n";
+  return "\n";
+}
+
+/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
+/// \p WriteTo - 1.
+void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
+                                          unsigned &WriteFrom, unsigned WriteTo,
+                                          StringRef EOL, int &Line,
+                                          bool EnsureNewline) {
+  if (WriteTo <= WriteFrom)
+    return;
+  if (&FromFile == PredefinesBuffer) {
+    // Ignore the #defines of the predefines buffer.
+    WriteFrom = WriteTo;
+    return;
+  }
+  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
+  // count lines manually, it's faster than getPresumedLoc()
+  Line += std::count(FromFile.getBufferStart() + WriteFrom,
+                     FromFile.getBufferStart() + WriteTo, '\n');
+  if (EnsureNewline) {
+    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
+    if (LastChar != '\n' && LastChar != '\r')
+      OS << EOL;
+  }
+  WriteFrom = WriteTo;
+}
+
+/// Print characters from \p FromFile starting at \p NextToWrite up until the
+/// inclusion directive at \p StartToken, then print out the inclusion
+/// inclusion directive disabled by a #if directive, updating \p NextToWrite
+/// and \p Line to track the number of source lines visited and the progress
+/// through the \p FromFile buffer.
+void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
+                                            const Token &StartToken,
+                                            const MemoryBuffer &FromFile,
+                                            StringRef EOL,
+                                            unsigned &NextToWrite, int &Line) {
+  OutputContentUpTo(FromFile, NextToWrite,
+    SM.getFileOffset(StartToken.getLocation()), EOL, Line, false);
+  Token DirectiveToken;
+  do {
+    DirectiveLex.LexFromRawLexer(DirectiveToken);
+  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
+  if (&FromFile == PredefinesBuffer) {
+    // OutputContentUpTo() would not output anything anyway.
+    return;
+  }
+  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
+  OutputContentUpTo(FromFile, NextToWrite,
+    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
+    EOL, Line, true);
+  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
+}
+
+/// Find the next identifier in the pragma directive specified by \p RawToken.
+StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
+                                                Token &RawToken) {
+  RawLex.LexFromRawLexer(RawToken);
+  if (RawToken.is(tok::raw_identifier))
+    PP.LookUpIdentifierInfo(RawToken);
+  if (RawToken.is(tok::identifier))
+    return RawToken.getIdentifierInfo()->getName();
+  return StringRef();
+}
+
+// Expand __has_include and __has_include_next if possible. If there's no
+// definitive answer return false.
+bool InclusionRewriter::HandleHasInclude(
+    FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
+    bool &FileExists) {
+  // Lex the opening paren.
+  RawLex.LexFromRawLexer(Tok);
+  if (Tok.isNot(tok::l_paren))
+    return false;
+
+  RawLex.LexFromRawLexer(Tok);
+
+  SmallString<128> FilenameBuffer;
+  StringRef Filename;
+  // Since the raw lexer doesn't give us angle_literals we have to parse them
+  // ourselves.
+  // FIXME: What to do if the file name is a macro?
+  if (Tok.is(tok::less)) {
+    RawLex.LexFromRawLexer(Tok);
+
+    FilenameBuffer += '<';
+    do {
+      if (Tok.is(tok::eod)) // Sanity check.
+        return false;
+
+      if (Tok.is(tok::raw_identifier))
+        PP.LookUpIdentifierInfo(Tok);
+
+      // Get the string piece.
+      SmallVector<char, 128> TmpBuffer;
+      bool Invalid = false;
+      StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
+      if (Invalid)
+        return false;
+
+      FilenameBuffer += TmpName;
+
+      RawLex.LexFromRawLexer(Tok);
+    } while (Tok.isNot(tok::greater));
+
+    FilenameBuffer += '>';
+    Filename = FilenameBuffer;
+  } else {
+    if (Tok.isNot(tok::string_literal))
+      return false;
+
+    bool Invalid = false;
+    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
+    if (Invalid)
+      return false;
+  }
+
+  // Lex the closing paren.
+  RawLex.LexFromRawLexer(Tok);
+  if (Tok.isNot(tok::r_paren))
+    return false;
+
+  // Now ask HeaderInfo if it knows about the header.
+  // FIXME: Subframeworks aren't handled here. Do we care?
+  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
+  const DirectoryLookup *CurDir;
+  const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
+      Filename, SourceLocation(), isAngled, nullptr, CurDir,
+      PP.getSourceManager().getFileEntryForID(FileId), nullptr, nullptr,
+      nullptr, false);
+
+  FileExists = File != nullptr;
+  return true;
+}
+
+/// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
+/// and including content of included files recursively.
+bool InclusionRewriter::Process(FileID FileId,
+                                SrcMgr::CharacteristicKind FileType)
+{
+  bool Invalid;
+  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
+  if (Invalid) // invalid inclusion
+    return false;
+  const char *FileName = FromFile.getBufferIdentifier();
+  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
+  RawLex.SetCommentRetentionState(false);
+
+  StringRef EOL = DetectEOL(FromFile);
+
+  // Per the GNU docs: "1" indicates entering a new file.
+  if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
+    WriteLineInfo(FileName, 1, FileType, EOL, "");
+  else
+    WriteLineInfo(FileName, 1, FileType, EOL, " 1");
+
+  if (SM.getFileIDSize(FileId) == 0)
+    return false;
+
+  // The next byte to be copied from the source file, which may be non-zero if
+  // the lexer handled a BOM.
+  unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
+  assert(SM.getLineNumber(FileId, NextToWrite) == 1);
+  int Line = 1; // The current input file line number.
+
+  Token RawToken;
+  RawLex.LexFromRawLexer(RawToken);
+
+  // TODO: Consider adding a switch that strips possibly unimportant content,
+  // such as comments, to reduce the size of repro files.
+  while (RawToken.isNot(tok::eof)) {
+    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
+      RawLex.setParsingPreprocessorDirective(true);
+      Token HashToken = RawToken;
+      RawLex.LexFromRawLexer(RawToken);
+      if (RawToken.is(tok::raw_identifier))
+        PP.LookUpIdentifierInfo(RawToken);
+      if (RawToken.getIdentifierInfo() != nullptr) {
+        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
+          case tok::pp_include:
+          case tok::pp_include_next:
+          case tok::pp_import: {
+            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
+              Line);
+            if (FileId != PP.getPredefinesFileID())
+              WriteLineInfo(FileName, Line - 1, FileType, EOL, "");
+            StringRef LineInfoExtra;
+            if (const FileChange *Change = FindFileChangeLocation(
+                HashToken.getLocation())) {
+              if (Change->Mod) {
+                WriteImplicitModuleImport(Change->Mod, EOL);
+
+              // else now include and recursively process the file
+              } else if (Process(Change->Id, Change->FileType)) {
+                // and set lineinfo back to this file, if the nested one was
+                // actually included
+                // `2' indicates returning to a file (after having included
+                // another file.
+                LineInfoExtra = " 2";
+              }
+            }
+            // fix up lineinfo (since commented out directive changed line
+            // numbers) for inclusions that were skipped due to header guards
+            WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
+            break;
+          }
+          case tok::pp_pragma: {
+            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
+            if (Identifier == "clang" || Identifier == "GCC") {
+              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
+                // keep the directive in, commented out
+                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
+                  NextToWrite, Line);
+                // update our own type
+                FileType = SM.getFileCharacteristic(RawToken.getLocation());
+                WriteLineInfo(FileName, Line, FileType, EOL);
+              }
+            } else if (Identifier == "once") {
+              // keep the directive in, commented out
+              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
+                NextToWrite, Line);
+              WriteLineInfo(FileName, Line, FileType, EOL);
+            }
+            break;
+          }
+          case tok::pp_if:
+          case tok::pp_elif: {
+            bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
+                         tok::pp_elif);
+            // Rewrite special builtin macros to avoid pulling in host details.
+            do {
+              // Walk over the directive.
+              RawLex.LexFromRawLexer(RawToken);
+              if (RawToken.is(tok::raw_identifier))
+                PP.LookUpIdentifierInfo(RawToken);
+
+              if (RawToken.is(tok::identifier)) {
+                bool HasFile;
+                SourceLocation Loc = RawToken.getLocation();
+
+                // Rewrite __has_include(x)
+                if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
+                  if (!HandleHasInclude(FileId, RawLex, nullptr, RawToken,
+                                        HasFile))
+                    continue;
+                  // Rewrite __has_include_next(x)
+                } else if (RawToken.getIdentifierInfo()->isStr(
+                               "__has_include_next")) {
+                  const DirectoryLookup *Lookup = PP.GetCurDirLookup();
+                  if (Lookup)
+                    ++Lookup;
+
+                  if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
+                                        HasFile))
+                    continue;
+                } else {
+                  continue;
+                }
+                // Replace the macro with (0) or (1), followed by the commented
+                // out macro for reference.
+                OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
+                                  EOL, Line, false);
+                OS << '(' << (int) HasFile << ")/*";
+                OutputContentUpTo(FromFile, NextToWrite,
+                                  SM.getFileOffset(RawToken.getLocation()) +
+                                  RawToken.getLength(),
+                                  EOL, Line, false);
+                OS << "*/";
+              }
+            } while (RawToken.isNot(tok::eod));
+            if (elif) {
+              OutputContentUpTo(FromFile, NextToWrite,
+                                SM.getFileOffset(RawToken.getLocation()) +
+                                    RawToken.getLength(),
+                                EOL, Line, /*EnsureNewLine*/ true);
+              WriteLineInfo(FileName, Line, FileType, EOL);
+            }
+            break;
+          }
+          case tok::pp_endif:
+          case tok::pp_else: {
+            // We surround every #include by #if 0 to comment it out, but that
+            // changes line numbers. These are fixed up right after that, but
+            // the whole #include could be inside a preprocessor conditional
+            // that is not processed. So it is necessary to fix the line
+            // numbers one the next line after each #else/#endif as well.
+            RawLex.SetKeepWhitespaceMode(true);
+            do {
+              RawLex.LexFromRawLexer(RawToken);
+            } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
+            OutputContentUpTo(
+                FromFile, NextToWrite,
+                SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(),
+                EOL, Line, /*EnsureNewLine*/ true);
+            WriteLineInfo(FileName, Line, FileType, EOL);
+            RawLex.SetKeepWhitespaceMode(false);
+          }
+          default:
+            break;
+        }
+      }
+      RawLex.setParsingPreprocessorDirective(false);
+    }
+    RawLex.LexFromRawLexer(RawToken);
+  }
+  OutputContentUpTo(FromFile, NextToWrite,
+    SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line,
+    /*EnsureNewline*/true);
+  return true;
+}
+
+/// InclusionRewriterInInput - Implement -frewrite-includes mode.
+void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
+                                   const PreprocessorOutputOptions &Opts) {
+  SourceManager &SM = PP.getSourceManager();
+  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
+                                                     Opts.ShowLineMarkers);
+  PP.addPPCallbacks(Rewrite);
+  PP.IgnorePragmas();
+
+  // First let the preprocessor process the entire file and call callbacks.
+  // Callbacks will record which #include's were actually performed.
+  PP.EnterMainSourceFile();
+  Token Tok;
+  // Only preprocessor directives matter here, so disable macro expansion
+  // everywhere else as an optimization.
+  // TODO: It would be even faster if the preprocessor could be switched
+  // to a mode where it would parse only preprocessor directives and comments,
+  // nothing else matters for parsing or processing.
+  PP.SetMacroExpansionOnlyInDirectives();
+  do {
+    PP.Lex(Tok);
+  } while (Tok.isNot(tok::eof));
+  Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
+  Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
+  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
+  OS->flush();
+}