6 files changed, 125 insertions, 122 deletions
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 6f7ab0b01fd..6ab8c5d899d 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -323,6 +323,14 @@ class Preprocessor {
   /// to avoid hitting the same error over and over again.
   bool HasReachedMaxIncludeDepth = false;
 
+  /// The number of currently-active calls to Lex.
+  ///
+  /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
+  /// require asking for multiple additional tokens. This counter makes it
+  /// possible for Lex to detect whether it's producing a token for the end
+  /// of phase 4 of translation or for some other situation.
+  unsigned LexLevel = 0;
+
 public:
   struct PreambleSkipInfo {
     SourceLocation HashTokenLoc;
@@ -1244,24 +1252,6 @@ public:
   /// Disable the last EnableBacktrackAtThisPos call.
   void CommitBacktrackedTokens();
 
-  struct CachedTokensRange {
-    CachedTokensTy::size_type Begin, End;
-  };
-
-private:
-  /// A range of cached tokens that should be erased after lexing
-  /// when backtracking requires the erasure of such cached tokens.
-  Optional<CachedTokensRange> CachedTokenRangeToErase;
-
-public:
-  /// Returns the range of cached tokens that were lexed since
-  /// EnableBacktrackAtThisPos() was previously called.
-  CachedTokensRange LastCachedTokenRange();
-
-  /// Erase the range of cached tokens that were lexed since
-  /// EnableBacktrackAtThisPos() was previously called.
-  void EraseCachedTokens(CachedTokensRange TokenRange);
-
   /// Make Preprocessor re-lex the tokens that were lexed since
   /// EnableBacktrackAtThisPos() was previously called.
   void Backtrack();
@@ -1353,6 +1343,7 @@ public:
   /// tokens after phase 5.  As such, it is equivalent to using
   /// 'Lex', not 'LexUnexpandedToken'.
   const Token &LookAhead(unsigned N) {
+    assert(LexLevel == 0 && "cannot use lookahead while lexing");
     if (CachedLexPos + N < CachedTokens.size())
       return CachedTokens[CachedLexPos+N];
     else
@@ -1379,8 +1370,16 @@ public:
   /// If BackTrack() is called afterwards, the token will remain at the
   /// insertion point.
   void EnterToken(const Token &Tok) {
-    EnterCachingLexMode();
-    CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
+    if (LexLevel) {
+      // It's not correct in general to enter caching lex mode while in the
+      // middle of a nested lexing action.
+      auto TokCopy = llvm::make_unique<Token[]>(1);
+      TokCopy[0] = Tok;
+      EnterTokenStream(std::move(TokCopy), 1, true);
+    } else {
+      EnterCachingLexMode();
+      CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
+    }
   }
 
   /// We notify the Preprocessor that if it is caching tokens (because
@@ -2062,6 +2061,7 @@ private:
   }
 
   void EnterCachingLexMode();
+  void EnterCachingLexModeUnchecked();
 
   void ExitCachingLexMode() {
     if (InCachingLexMode())
diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp
index 929d35ad51b..90238de8a48 100644
--- a/clang/lib/Lex/PPCaching.cpp
+++ b/clang/lib/Lex/PPCaching.cpp
@@ -23,6 +23,7 @@ using namespace clang;
 // be called multiple times and CommitBacktrackedTokens/Backtrack calls will
 // be combined with the EnableBacktrackAtThisPos calls in reverse order.
 void Preprocessor::EnableBacktrackAtThisPos() {
+  assert(LexLevel == 0 && "cannot use lookahead while lexing");
   BacktrackPositions.push_back(CachedLexPos);
   EnterCachingLexMode();
 }
@@ -34,29 +35,6 @@ void Preprocessor::CommitBacktrackedTokens() {
   BacktrackPositions.pop_back();
 }
 
-Preprocessor::CachedTokensRange Preprocessor::LastCachedTokenRange() {
-  assert(isBacktrackEnabled());
-  auto PrevCachedLexPos = BacktrackPositions.back();
-  return CachedTokensRange{PrevCachedLexPos, CachedLexPos};
-}
-
-void Preprocessor::EraseCachedTokens(CachedTokensRange TokenRange) {
-  assert(TokenRange.Begin <= TokenRange.End);
-  if (CachedLexPos == TokenRange.Begin && TokenRange.Begin != TokenRange.End) {
-    // We have backtracked to the start of the token range as we want to consume
-    // them again. Erase the tokens only after consuming then.
-    assert(!CachedTokenRangeToErase);
-    CachedTokenRangeToErase = TokenRange;
-    return;
-  }
-  // The cached tokens were committed, so they should be erased now.
-  assert(TokenRange.End == CachedLexPos);
-  CachedTokens.erase(CachedTokens.begin() + TokenRange.Begin,
-                     CachedTokens.begin() + TokenRange.End);
-  CachedLexPos = TokenRange.Begin;
-  ExitCachingLexMode();
-}
-
 // Make Preprocessor re-lex the tokens that were lexed since
 // EnableBacktrackAtThisPos() was previously called.
 void Preprocessor::Backtrack() {
@@ -71,15 +49,12 @@ void Preprocessor::CachingLex(Token &Result) {
   if (!InCachingLexMode())
     return;
 
+  // The assert in EnterCachingLexMode should prevent this from happening.
+  assert(LexLevel == 1 &&
+         "should not use token caching within the preprocessor");
+
   if (CachedLexPos < CachedTokens.size()) {
     Result = CachedTokens[CachedLexPos++];
-    // Erase the some of the cached tokens after they are consumed when
-    // asked to do so.
-    if (CachedTokenRangeToErase &&
-        CachedTokenRangeToErase->End == CachedLexPos) {
-      EraseCachedTokens(*CachedTokenRangeToErase);
-      CachedTokenRangeToErase = None;
-    }
     return;
   }
 
@@ -88,14 +63,14 @@ void Preprocessor::CachingLex(Token &Result) {
 
   if (isBacktrackEnabled()) {
     // Cache the lexed token.
-    EnterCachingLexMode();
+    EnterCachingLexModeUnchecked();
     CachedTokens.push_back(Result);
     ++CachedLexPos;
     return;
   }
 
   if (CachedLexPos < CachedTokens.size()) {
-    EnterCachingLexMode();
+    EnterCachingLexModeUnchecked();
   } else {
     // All cached tokens were consumed.
     CachedTokens.clear();
@@ -104,11 +79,23 @@ void Preprocessor::CachingLex(Token &Result) {
 }
 
 void Preprocessor::EnterCachingLexMode() {
+  // The caching layer sits on top of all the other lexers, so it's incorrect
+  // to cache tokens while inside a nested lex action. The cached tokens would
+  // be retained after returning to the enclosing lex action and, at best,
+  // would appear at the wrong position in the token stream.
+  assert(LexLevel == 0 &&
+         "entered caching lex mode while lexing something else");
+
   if (InCachingLexMode()) {
     assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind");
     return;
   }
 
+  EnterCachingLexModeUnchecked();
+}
+
+void Preprocessor::EnterCachingLexModeUnchecked() {
+  assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode");
   PushIncludeMacroStack();
   CurLexerKind = CLK_CachingLexer;
 }
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 3aae6ae245f..216ae39632d 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -829,10 +829,10 @@ void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
       return HandleIncludeDirective(HashLoc, Result);
     }
     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
-      Token P = LookAhead(0);
-      auto *II = P.getIdentifierInfo();
+      Lex(Result);
+      auto *II = Result.getIdentifierInfo();
       if (II && II->getName() == "hdrstop")
-        return HandlePragmaDirective(HashLoc, PIK_HashPragma);
+        return HandlePragmaHdrstop(Result);
     }
   }
   DiscardUntilEndOfDirective();
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index 286b863b354..1c87f3ecc09 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -144,84 +144,72 @@ void Preprocessor::HandlePragmaDirective(SourceLocation IntroducerLoc,
     DiscardUntilEndOfDirective();
 }
 
-namespace {
-
-/// Helper class for \see Preprocessor::Handle_Pragma.
-class LexingFor_PragmaRAII {
-  Preprocessor &PP;
-  bool InMacroArgPreExpansion;
-  bool Failed = false;
-  Token &OutTok;
-  Token PragmaTok;
-
-public:
-  LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion,
-                       Token &Tok)
-      : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) {
-    if (InMacroArgPreExpansion) {
-      PragmaTok = OutTok;
-      PP.EnableBacktrackAtThisPos();
-    }
-  }
-
-  ~LexingFor_PragmaRAII() {
-    if (InMacroArgPreExpansion) {
-      // When committing/backtracking the cached pragma tokens in a macro
-      // argument pre-expansion we want to ensure that either the tokens which
-      // have been committed will be removed from the cache or that the tokens
-      // over which we just backtracked won't remain in the cache after they're
-      // consumed and that the caching will stop after consuming them.
-      // Otherwise the caching will interfere with the way macro expansion
-      // works, because we will continue to cache tokens after consuming the
-      // backtracked tokens, which shouldn't happen when we're dealing with
-      // macro argument pre-expansion.
-      auto CachedTokenRange = PP.LastCachedTokenRange();
-      if (Failed) {
-        PP.CommitBacktrackedTokens();
-      } else {
-        PP.Backtrack();
-        OutTok = PragmaTok;
-      }
-      PP.EraseCachedTokens(CachedTokenRange);
-    }
-  }
-
-  void failed() {
-    Failed = true;
-  }
-};
-
-} // namespace
-
 /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
 /// return the first token after the directive.  The _Pragma token has just
 /// been read into 'Tok'.
 void Preprocessor::Handle_Pragma(Token &Tok) {
-  // This works differently if we are pre-expanding a macro argument.
-  // In that case we don't actually "activate" the pragma now, we only lex it
-  // until we are sure it is lexically correct and then we backtrack so that
-  // we activate the pragma whenever we encounter the tokens again in the token
-  // stream. This ensures that we will activate it in the correct location
-  // or that we will ignore it if it never enters the token stream, e.g:
+  // C11 6.10.3.4/3:
+  //   all pragma unary operator expressions within [a completely
+  //   macro-replaced preprocessing token sequence] are [...] processed [after
+  //   rescanning is complete]
   //
-  //     #define EMPTY(x)
-  //     #define INACTIVE(x) EMPTY(x)
-  //     INACTIVE(_Pragma("clang diagnostic ignored \"-Wconversion\""))
+  // This means that we execute _Pragma operators in two cases:
+  //
+  //  1) on token sequences that would otherwise be produced as the output of
+  //     phase 4 of preprocessing, and
+  //  2) on token sequences formed as the macro-replaced token sequence of a
+  //     macro argument
+  //
+  // Case #2 appears to be a wording bug: only _Pragmas that would survive to
+  // the end of phase 4 should actually be executed. Discussion on the WG14
+  // mailing list suggests that a _Pragma operator is notionally checked early,
+  // but only pragmas that survive to the end of phase 4 should be executed.
+  //
+  // In Case #2, we check the syntax now, but then put the tokens back into the
+  // token stream for later consumption.
+
+  struct TokenCollector {
+    Preprocessor &Self;
+    bool Collect;
+    SmallVector<Token, 3> Tokens;
+    Token &Tok;
+
+    void lex() {
+      if (Collect)
+        Tokens.push_back(Tok);
+      Self.Lex(Tok);
+    }
+
+    void revert() {
+      assert(Collect && "did not collect tokens");
+      assert(!Tokens.empty() && "collected unexpected number of tokens");
+
+      // Push the ( "string" ) tokens into the token stream.
+      auto Toks = llvm::make_unique<Token[]>(Tokens.size());
+      std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get());
+      Toks[Tokens.size() - 1] = Tok;
+      Self.EnterTokenStream(std::move(Toks), Tokens.size(),
+                            /*DisableMacroExpansion*/ true);
+
+      // ... and return the _Pragma token unchanged.
+      Tok = *Tokens.begin();
+    }
+  };
 
-  LexingFor_PragmaRAII _PragmaLexing(*this, InMacroArgPreExpansion, Tok);
+  TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok};
 
   // Remember the pragma token location.
   SourceLocation PragmaLoc = Tok.getLocation();
 
   // Read the '('.
-  Lex(Tok);
+  Toks.lex();
   if (Tok.isNot(tok::l_paren)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
   }
 
   // Read the '"..."'.
-  Lex(Tok);
+  Toks.lex();
   if (!tok::isStringLiteral(Tok.getKind())) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
     // Skip bad tokens, and the ')', if present.
@@ -233,7 +221,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
       Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
   }
 
   if (Tok.hasUDSuffix()) {
@@ -242,21 +230,24 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
     Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
   }
 
   // Remember the string.
   Token StrTok = Tok;
 
   // Read the ')'.
-  Lex(Tok);
+  Toks.lex();
   if (Tok.isNot(tok::r_paren)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
   }
 
-  if (InMacroArgPreExpansion)
+  // If we're expanding a macro argument, put the tokens back.
+  if (InMacroArgPreExpansion) {
+    Toks.revert();
     return;
+  }
 
   SourceLocation RParenLoc = Tok.getLocation();
   std::string StrVal = getSpelling(StrTok);
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index ff040a28d4b..d1c02c942da 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -862,6 +862,8 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
 }
 
 void Preprocessor::Lex(Token &Result) {
+  ++LexLevel;
+
   // We loop here until a lex function returns a token; this avoids recursion.
   bool ReturnedToken;
   do {
@@ -893,6 +895,7 @@ void Preprocessor::Lex(Token &Result) {
   }
 
   LastTokenWasAt = Result.is(tok::at);
+  --LexLevel;
 }
 
 /// Lex a header-name token (including one formed from header-name-tokens if
diff --git a/clang/test/Preprocessor/_Pragma-in-macro-arg.cpp b/clang/test/Preprocessor/_Pragma-in-macro-arg.cpp
new file mode 100644
index 00000000000..0d2dcd05124
--- /dev/null
+++ b/clang/test/Preprocessor/_Pragma-in-macro-arg.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 %s -verify -Wconversion
+
+#define P(X) _Pragma(#X)
+#define V(X) X
+
+#define X \
+  P(clang diagnostic push) \
+  P(clang diagnostic ignored "-Wconversion") \
+  ) = 1.2; \
+  P(clang diagnostic pop)
+
+void f() {
+  int a = 1.2; // expected-warning {{changes value}}
+
+  // Note, we intentionally enter a tentatively-parsed context here to trigger
+  // regular use of lookahead. This would go wrong if _Pragma checking in macro
+  // argument pre-expansion also tries to use token lookahead.
+  int (b
+  V(X)
+
+  int c = 1.2; // expected-warning {{changes value}}
+}