7 files changed, 150 insertions, 54 deletions
diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 5e3cfd2b3c1..9657ee4f1a1 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -417,7 +417,11 @@ public:
                                         unsigned Offset = 0);
 
   /// \brief Retrieve the memory buffer associated with the given file.
-  const llvm::MemoryBuffer *getMemoryBufferForFile(const FileEntry *File);
+  ///
+  /// \param Invalid If non-NULL, will be set \c true if an error
+  /// occurs while retrieving the memory buffer.
+  const llvm::MemoryBuffer *getMemoryBufferForFile(const FileEntry *File,
+                                                   bool *Invalid = 0);
 
   /// \brief Override the contents of the given source file by providing an
   /// already-allocated buffer.
@@ -438,8 +442,9 @@ public:
   /// getBuffer - Return the buffer for the specified FileID. If there is an
   /// error opening this buffer the first time, this manufactures a temporary
   /// buffer and returns a non-empty error string.
-  const llvm::MemoryBuffer *getBuffer(FileID FID) const {
-    return getSLocEntry(FID).getFile().getContentCache()->getBuffer(Diag);
+  const llvm::MemoryBuffer *getBuffer(FileID FID, bool *Invalid = 0) const {
+    return getSLocEntry(FID).getFile().getContentCache()->getBuffer(Diag,
+                                                                    Invalid);
   }
 
   /// getFileEntryForID - Returns the FileEntry record for the provided FileID.
@@ -571,31 +576,37 @@ public:
 
   /// getCharacterData - Return a pointer to the start of the specified location
   /// in the appropriate spelling MemoryBuffer.
-  const char *getCharacterData(SourceLocation SL) const;
+  ///
+  /// \param Invalid If non-NULL, will be set \c true if an error occurs.
+  const char *getCharacterData(SourceLocation SL, bool *Invalid = 0) const;
 
   /// getColumnNumber - Return the column # for the specified file position.
   /// This is significantly cheaper to compute than the line number.  This
   /// returns zero if the column number isn't known.  This may only be called on
   /// a file sloc, so you must choose a spelling or instantiation location
   /// before calling this method.
-  unsigned getColumnNumber(FileID FID, unsigned FilePos) const;
-  unsigned getSpellingColumnNumber(SourceLocation Loc) const;
-  unsigned getInstantiationColumnNumber(SourceLocation Loc) const;
+  unsigned getColumnNumber(FileID FID, unsigned FilePos, 
+                           bool *Invalid = 0) const;
+  unsigned getSpellingColumnNumber(SourceLocation Loc,
+                                   bool *Invalid = 0) const;
+  unsigned getInstantiationColumnNumber(SourceLocation Loc,
+                                        bool *Invalid = 0) const;
 
 
   /// getLineNumber - Given a SourceLocation, return the spelling line number
   /// for the position indicated.  This requires building and caching a table of
   /// line offsets for the MemoryBuffer, so this is not cheap: use only when
   /// about to emit a diagnostic.
-  unsigned getLineNumber(FileID FID, unsigned FilePos) const;
+  unsigned getLineNumber(FileID FID, unsigned FilePos, bool *Invalid = 0) const;
 
-  unsigned getInstantiationLineNumber(SourceLocation Loc) const;
-  unsigned getSpellingLineNumber(SourceLocation Loc) const;
+  unsigned getInstantiationLineNumber(SourceLocation Loc, 
+                                      bool *Invalid = 0) const;
+  unsigned getSpellingLineNumber(SourceLocation Loc, bool *Invalid = 0) const;
 
   /// Return the filename or buffer identifier of the buffer the location is in.
   /// Note that this name does not respect #line directives.  Use getPresumedLoc
   /// for normal clients.
-  const char *getBufferName(SourceLocation Loc) const;
+  const char *getBufferName(SourceLocation Loc, bool *Invalid = 0) const;
 
   /// getFileCharacteristic - return the file characteristic of the specified
   /// source location, indicating whether this is a normal file, a system
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index e29ece1da64..2b27a06070f 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -547,7 +547,9 @@ public:
   /// after trigraph expansion and escaped-newline folding.  In particular, this
   /// wants to get the true, uncanonicalized, spelling of things like digraphs
   /// UCNs, etc.
-  std::string getSpelling(const Token &Tok) const;
+  ///
+  /// \param Invalid If non-NULL, will be set \c true if an error occurs.
+  std::string getSpelling(const Token &Tok, bool *Invalid = 0) const;
 
   /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
   /// token is the characters used to represent the token in the source file
@@ -556,7 +558,8 @@ public:
   /// UCNs, etc.
   static std::string getSpelling(const Token &Tok,
                                  const SourceManager &SourceMgr,
-                                 const LangOptions &Features);
+                                 const LangOptions &Features, 
+                                 bool *Invalid = 0);
 
   /// getSpelling - This method is used to get the spelling of a token into a
   /// preallocated buffer, instead of as an std::string.  The caller is required
@@ -568,17 +571,20 @@ public:
   /// to point to a constant buffer with the data already in it (avoiding a
   /// copy).  The caller is not allowed to modify the returned buffer pointer
   /// if an internal buffer is returned.
-  unsigned getSpelling(const Token &Tok, const char *&Buffer) const;
+  unsigned getSpelling(const Token &Tok, const char *&Buffer, 
+                       bool *Invalid = 0) const;
 
   /// getSpelling - This method is used to get the spelling of a token into a
   /// SmallVector. Note that the returned StringRef may not point to the
   /// supplied buffer if a copy can be avoided.
   llvm::StringRef getSpelling(const Token &Tok,
-                              llvm::SmallVectorImpl<char> &Buffer) const;
+                              llvm::SmallVectorImpl<char> &Buffer, 
+                              bool *Invalid = 0) const;
 
   /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
   /// with length 1, return the character.
-  char getSpellingOfSingleCharacterNumericConstant(const Token &Tok) const {
+  char getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 
+                                                   bool *Invalid = 0) const {
     assert(Tok.is(tok::numeric_constant) &&
            Tok.getLength() == 1 && "Called on unsupported token");
     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index f0a0d4c1bb2..6335504a7dd 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -457,10 +457,11 @@ SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
 }
 
 const llvm::MemoryBuffer *
-SourceManager::getMemoryBufferForFile(const FileEntry *File) {
+SourceManager::getMemoryBufferForFile(const FileEntry *File,
+                                      bool *Invalid) {
   const SrcMgr::ContentCache *IR = getOrCreateContentCache(File);
   assert(IR && "getOrCreateContentCache() cannot return NULL");
-  return IR->getBuffer(Diag);
+  return IR->getBuffer(Diag, Invalid);
 }
 
 bool SourceManager::overrideFileContents(const FileEntry *SourceFile,
@@ -701,21 +702,34 @@ SourceManager::getInstantiationRange(SourceLocation Loc) const {
 
 /// getCharacterData - Return a pointer to the start of the specified location
 /// in the appropriate MemoryBuffer.
-const char *SourceManager::getCharacterData(SourceLocation SL) const {
+const char *SourceManager::getCharacterData(SourceLocation SL,
+                                            bool *Invalid) const {
   // Note that this is a hot function in the getSpelling() path, which is
   // heavily used by -E mode.
   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
 
   // Note that calling 'getBuffer()' may lazily page in a source file.
-  return getSLocEntry(LocInfo.first).getFile().getContentCache()
-              ->getBuffer(Diag)->getBufferStart() + LocInfo.second;
+  bool CharDataInvalid = false;
+  const llvm::MemoryBuffer *Buffer
+    = getSLocEntry(LocInfo.first).getFile().getContentCache()->getBuffer(Diag, 
+                                                              &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second);
 }
 
 
 /// getColumnNumber - Return the column # for the specified file position.
 /// this is significantly cheaper to compute than the line number.
-unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
-  const char *Buf = getBuffer(FID)->getBufferStart();
+unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
+                                        bool *Invalid) const {
+  bool MyInvalid = false;
+  const char *Buf = getBuffer(FID, &MyInvalid)->getBufferStart();
+  if (Invalid)
+    *Invalid = MyInvalid;
+
+  if (MyInvalid)
+    return 1;
 
   unsigned LineStart = FilePos;
   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
@@ -723,27 +737,30 @@ unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
   return FilePos-LineStart+1;
 }
 
-unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
+unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc,
+                                                bool *Invalid) const {
   if (Loc.isInvalid()) return 0;
   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
-  return getColumnNumber(LocInfo.first, LocInfo.second);
+  return getColumnNumber(LocInfo.first, LocInfo.second, Invalid);
 }
 
-unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
+unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc,
+                                                     bool *Invalid) const {
   if (Loc.isInvalid()) return 0;
   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
-  return getColumnNumber(LocInfo.first, LocInfo.second);
+  return getColumnNumber(LocInfo.first, LocInfo.second, Invalid);
 }
 
-
-
 static DISABLE_INLINE void ComputeLineNumbers(Diagnostic &Diag,
                                               ContentCache* FI,
-                                              llvm::BumpPtrAllocator &Alloc);
+                                              llvm::BumpPtrAllocator &Alloc,
+                                              bool &Invalid);
 static void ComputeLineNumbers(Diagnostic &Diag, ContentCache* FI, 
-                               llvm::BumpPtrAllocator &Alloc){
+                               llvm::BumpPtrAllocator &Alloc, bool &Invalid) {
   // Note that calling 'getBuffer()' may lazily page in the file.
-  const MemoryBuffer *Buffer = FI->getBuffer(Diag);
+  const MemoryBuffer *Buffer = FI->getBuffer(Diag, &Invalid);
+  if (Invalid)
+    return;
 
   // Find the file offsets of all of the *physical* source lines.  This does
   // not look at trigraphs, escaped newlines, or anything else tricky.
@@ -789,7 +806,8 @@ static void ComputeLineNumbers(Diagnostic &Diag, ContentCache* FI,
 /// for the position indicated.  This requires building and caching a table of
 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
 /// about to emit a diagnostic.
-unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
+unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos, 
+                                      bool *Invalid) const {
   ContentCache *Content;
   if (LastLineNoFileIDQuery == FID)
     Content = LastLineNoContentCache;
@@ -799,8 +817,15 @@ unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
 
   // If this is the first use of line information for this buffer, compute the
   /// SourceLineCache for it on demand.
-  if (Content->SourceLineCache == 0)
-    ComputeLineNumbers(Diag, Content, ContentCacheAlloc);
+  if (Content->SourceLineCache == 0) {
+    bool MyInvalid = false;
+    ComputeLineNumbers(Diag, Content, ContentCacheAlloc, MyInvalid);
+    if (Invalid)
+      *Invalid = MyInvalid;
+    if (MyInvalid)
+      return 1;
+  } else if (Invalid)
+    *Invalid = false;
 
   // Okay, we know we have a line number table.  Do a binary search to find the
   // line number that this character position lands on.
@@ -886,12 +911,14 @@ unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
   return LineNo;
 }
 
-unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
+unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc, 
+                                                   bool *Invalid) const {
   if (Loc.isInvalid()) return 0;
   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
   return getLineNumber(LocInfo.first, LocInfo.second);
 }
-unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
+unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc, 
+                                              bool *Invalid) const {
   if (Loc.isInvalid()) return 0;
   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
   return getLineNumber(LocInfo.first, LocInfo.second);
@@ -931,10 +958,11 @@ SourceManager::getFileCharacteristic(SourceLocation Loc) const {
 /// Return the filename or buffer identifier of the buffer the location is in.
 /// Note that this name does not respect #line directives.  Use getPresumedLoc
 /// for normal clients.
-const char *SourceManager::getBufferName(SourceLocation Loc) const {
+const char *SourceManager::getBufferName(SourceLocation Loc, 
+                                         bool *Invalid) const {
   if (Loc.isInvalid()) return "<invalid loc>";
 
-  return getBuffer(getFileID(Loc))->getBufferIdentifier();
+  return getBuffer(getFileID(Loc), Invalid)->getBufferIdentifier();
 }
 
 
@@ -1014,8 +1042,12 @@ SourceLocation SourceManager::getLocation(const FileEntry *SourceFile,
 
   // If this is the first use of line information for this buffer, compute the
   /// SourceLineCache for it on demand.
-  if (Content->SourceLineCache == 0)
-    ComputeLineNumbers(Diag, Content, ContentCacheAlloc);
+  if (Content->SourceLineCache == 0) {
+    bool MyInvalid = false;
+    ComputeLineNumbers(Diag, Content, ContentCacheAlloc, MyInvalid);
+    if (MyInvalid)
+      return SourceLocation();
+  }
 
   // Find the first file ID that corresponds to the given file.
   FileID FirstFID;
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 004e6755e5f..1cfa0e37450 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -806,7 +806,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
     // Get the spelling of the token, which eliminates trigraphs, etc.  We know
     // that ThisTokBuf points to a buffer that is big enough for the whole token
     // and 'spelled' tokens can only shrink.
-    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    bool StringInvalid = false;
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, 
+                                         &StringInvalid);
+    if (StringInvalid) {
+      hadError = 1;
+      continue;
+    }
+
     const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
 
     // TODO: Input character set mapping support.
@@ -904,8 +911,12 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
   llvm::SmallString<16> SpellingBuffer;
   SpellingBuffer.resize(Tok.getLength());
 
+  bool StringInvalid = false;
   const char *SpellingPtr = &SpellingBuffer[0];
-  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
+  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr, &StringInvalid);
+  if (StringInvalid) {
+    return 0;
+  }
 
   assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
 
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index ede129edcb6..756ce27a93d 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -170,7 +170,12 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     return true;
   case tok::numeric_constant: {
     llvm::SmallString<64> IntegerBuffer;
-    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer);
+    bool NumberInvalid = false;
+    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, 
+                                              &NumberInvalid);
+    if (NumberInvalid)
+      return true; // a diagnostic was already reported
+
     NumericLiteralParser Literal(Spelling.begin(), Spelling.end(),
                                  PeekTok.getLocation(), PP);
     if (Literal.hadError)
@@ -216,7 +221,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   }
   case tok::char_constant: {   // 'x'
     llvm::SmallString<32> CharBuffer;
-    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer);
+    bool CharInvalid = false;
+    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
+    if (CharInvalid)
+      return true;
 
     CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
                               PeekTok.getLocation(), PP);
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index a6efe7f5bcc..5584b18da15 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -282,11 +282,19 @@ bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const {
 /// UCNs, etc.
 std::string Preprocessor::getSpelling(const Token &Tok,
                                       const SourceManager &SourceMgr,
-                                      const LangOptions &Features) {
+                                      const LangOptions &Features, 
+                                      bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token contains nothing interesting, return it directly.
-  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  bool CharDataInvalid = false;
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+                                                    &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  if (CharDataInvalid)
+    return std::string();
+
   if (!Tok.needsCleaning())
     return std::string(TokStart, TokStart+Tok.getLength());
 
@@ -310,8 +318,8 @@ std::string Preprocessor::getSpelling(const Token &Tok,
 /// after trigraph expansion and escaped-newline folding.  In particular, this
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
-std::string Preprocessor::getSpelling(const Token &Tok) const {
-  return getSpelling(Tok, SourceMgr, Features);
+std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const {
+  return getSpelling(Tok, SourceMgr, Features, Invalid);
 }
 
 /// getSpelling - This method is used to get the spelling of a token into a
@@ -325,7 +333,7 @@ std::string Preprocessor::getSpelling(const Token &Tok) const {
 /// copy).  The caller is not allowed to modify the returned buffer pointer
 /// if an internal buffer is returned.
 unsigned Preprocessor::getSpelling(const Token &Tok,
-                                   const char *&Buffer) const {
+                                   const char *&Buffer, bool *Invalid) const {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token is an identifier, just return the string from the identifier
@@ -341,8 +349,16 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
   if (Tok.isLiteral())
     TokStart = Tok.getLiteralData();
 
-  if (TokStart == 0)
-    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  if (TokStart == 0) {
+    bool CharDataInvalid = false;
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+    if (Invalid)
+      *Invalid = CharDataInvalid;
+    if (CharDataInvalid) {
+      Buffer = "";
+      return 0;
+    }
+  }
 
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning()) {
@@ -368,7 +384,8 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
 /// SmallVector. Note that the returned StringRef may not point to the
 /// supplied buffer if a copy can be avoided.
 llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
-                                    llvm::SmallVectorImpl<char> &Buffer) const {
+                                          llvm::SmallVectorImpl<char> &Buffer,
+                                          bool *Invalid) const {
   // Try the fast path.
   if (const IdentifierInfo *II = Tok.getIdentifierInfo())
     return II->getName();
@@ -378,7 +395,7 @@ llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
     Buffer.resize(Tok.getLength());
 
   const char *Ptr = Buffer.data();
-  unsigned Len = getSpelling(Tok, Ptr);
+  unsigned Len = getSpelling(Tok, Ptr, Invalid);
   return llvm::StringRef(Ptr, Len);
 }
 
diff --git a/clang/test/PCH/changed-files.c b/clang/test/PCH/changed-files.c
new file mode 100644
index 00000000000..36453c48e71
--- /dev/null
+++ b/clang/test/PCH/changed-files.c
@@ -0,0 +1,11 @@
+const char *s0 = m0;
+
+// RUN: echo '#define m0 ""' > %t.h
+// RUN: %clang_cc1 -emit-pch -o %t.h.pch %t.h
+// RUN: echo '' > %t.h
+// RUN: not %clang_cc1 -include-pch %t.h.pch %s 2>&1 | grep "size of file"
+
+// RUN: echo '#define m0 000' > %t.h
+// RUN: %clang_cc1 -emit-pch -o %t.h.pch %t.h
+// RUN: echo '' > %t.h
+// RUN: not %clang_cc1 -include-pch %t.h.pch %s 2>&1 | grep "size of file"