diff options
Diffstat (limited to 'clang')
| -rw-r--r-- | clang/lib/Basic/SourceManager.cpp | 11 | ||||
| -rw-r--r-- | clang/lib/Lex/Lexer.cpp | 15 | 
2 files changed, 19 insertions, 7 deletions
| diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index b6939ec7d55..8262feba7f7 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -126,13 +126,12 @@ const llvm::MemoryBuffer *ContentCache::getBuffer(Diagnostic &Diag,      if (Invalid) *Invalid = true;      return Buffer.getPointer();    } -   +    // If the buffer is valid, check to see if it has a UTF Byte Order Mark -  // (BOM).  We only support UTF-8 without a BOM right now.  See +  // (BOM).  We only support UTF-8 with and without a BOM right now.  See    // http://en.wikipedia.org/wiki/Byte_order_mark for more information.    llvm::StringRef BufStr = Buffer.getPointer()->getBuffer(); -  const char *BOM = llvm::StringSwitch<const char *>(BufStr) -    .StartsWith("\xEF\xBB\xBF", "UTF-8") +  const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)      .StartsWith("\xFE\xFF", "UTF-16 (BE)")      .StartsWith("\xFF\xFE", "UTF-16 (LE)")      .StartsWith("\x00\x00\xFE\xFF", "UTF-32 (BE)") @@ -145,9 +144,9 @@ const llvm::MemoryBuffer *ContentCache::getBuffer(Diagnostic &Diag,      .StartsWith("\x84\x31\x95\x33", "GB-18030")      .Default(0); -  if (BOM) { +  if (InvalidBOM) {      Diag.Report(Loc, diag::err_unsupported_bom) -      << BOM << ContentsEntry->getName(); +      << InvalidBOM << ContentsEntry->getName();      Buffer.setInt(Buffer.getInt() | InvalidFlag);    } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 34b16c74777..ea2a2deb0f5 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -71,9 +71,22 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,           "We assume that the input buffer has a null character at the end"           " to simplify lexing!"); +  // Check whether we have a BOM in the beginning of the buffer. If yes - act +  // accordingly. Right now we support only UTF-8 with and without BOM, so, just +  // skip the UTF-8 BOM if it's present. +  if (BufferStart == BufferPtr) { +    // Determine the size of the BOM. +    size_t BOMLength = llvm::StringSwitch<size_t>(BufferStart) +      .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM +      .Default(0); + +    // Skip the BOM. +    BufferPtr += BOMLength; +  } +    Is_PragmaLexer = false;    IsInConflictMarker = false; -   +    // Start of the file is a start of line.    IsAtStartOfLine = true; | 

