[c++1z] Support for u8 character literals.

llvm-svn: 221576
author: Richard Smith <richard-llvm@metafoo.co.uk> 2014-11-08 06:08:42 +0000
committer: Richard Smith <richard-llvm@metafoo.co.uk> 2014-11-08 06:08:42 +0000
commit: 3e3a705062fe343dc397c0bf623aa383f14ce07c (patch)
tree: c22a037420f5961f40672567865afc4591809a78 /clang/lib/Lex
parent: cf29675d9501cfff2a37acb8af3eefa6cbba19c1 (diff)
download: bcm5719-llvm-3e3a705062fe343dc397c0bf623aa383f14ce07c.tar.gz
bcm5719-llvm-3e3a705062fe343dc397c0bf623aa383f14ce07c.zip
5 files changed, 29 insertions, 9 deletions
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 0aaad9bafba..c2e9716123c 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1889,17 +1889,20 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
 
 
 /// LexCharConstant - Lex the remainder of a character constant, after having
-/// lexed either ' or L' or u' or U'.
+/// lexed either ' or L' or u8' or u' or U'.
 bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
                             tok::TokenKind Kind) {
   // Does this character contain the \0 character?
   const char *NulCharacter = nullptr;
 
-  if (!isLexingRawMode() &&
-      (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant))
-    Diag(BufferPtr, getLangOpts().CPlusPlus
-           ? diag::warn_cxx98_compat_unicode_literal
-           : diag::warn_c99_compat_unicode_literal);
+  if (!isLexingRawMode()) {
+    if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
+      Diag(BufferPtr, getLangOpts().CPlusPlus
+                          ? diag::warn_cxx98_compat_unicode_literal
+                          : diag::warn_c99_compat_unicode_literal);
+    else if (Kind == tok::utf8_char_constant)
+      Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
+  }
 
   char C = getAndAdvanceChar(CurPtr, Result);
   if (C == '\'') {
@@ -3068,6 +3071,11 @@ LexNextToken:
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                                            SizeTmp2, Result),
                                tok::utf8_string_literal);
+        if (Char2 == '\'' && LangOpts.CPlusPlus1z)
+          return LexCharConstant(
+              Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                                  SizeTmp2, Result),
+              tok::utf8_char_constant);
 
         if (Char2 == 'R' && LangOpts.CPlusPlus11) {
           unsigned SizeTmp3;
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 096805c3cf5..03331fb33eb 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -28,6 +28,7 @@ static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
   default: llvm_unreachable("Unknown token type!");
   case tok::char_constant:
   case tok::string_literal:
+  case tok::utf8_char_constant:
   case tok::utf8_string_literal:
     return Target.getCharWidth();
   case tok::wide_char_constant:
@@ -1031,9 +1032,10 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
   const char *TokBegin = begin;
 
   // Skip over wide character determinant.
-  if (Kind != tok::char_constant) {
+  if (Kind != tok::char_constant)
+    ++begin;
+  if (Kind == tok::utf8_char_constant)
     ++begin;
-  }
 
   // Skip over the entry quote.
   assert(begin[0] == '\'' && "Invalid token lexed");
@@ -1077,6 +1079,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
   if (tok::wide_char_constant == Kind) {
     largest_character_for_kind =
         0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
+  } else if (tok::utf8_char_constant == Kind) {
+    largest_character_for_kind = 0x7F;
   } else if (tok::utf16_char_constant == Kind) {
     largest_character_for_kind = 0xFFFF;
   } else if (tok::utf32_char_constant == Kind) {
diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp
index 0fa32399bc9..9967f3f0e49 100644
--- a/clang/lib/Lex/MacroArgs.cpp
+++ b/clang/lib/Lex/MacroArgs.cpp
@@ -218,6 +218,7 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
     if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
         Tok.is(tok::char_constant) ||          // 'x'
         Tok.is(tok::wide_char_constant) ||     // L'x'.
+        Tok.is(tok::utf8_char_constant) ||     // u8'x'.
         Tok.is(tok::utf16_char_constant) ||    // u'x'.
         Tok.is(tok::utf32_char_constant)) {    // U'x'.
       bool Invalid = false;
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index a3f5d938ce0..9cf72cf8f8f 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -273,6 +273,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   }
   case tok::char_constant:          // 'x'
   case tok::wide_char_constant:     // L'x'
+  case tok::utf8_char_constant:     // u8'x'
   case tok::utf16_char_constant:    // u'x'
   case tok::utf32_char_constant: {  // U'x'
     // Complain about, and drop, any ud-suffix.
diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp
index 866cbb142c8..08327496ab8 100644
--- a/clang/lib/Lex/TokenConcatenation.cpp
+++ b/clang/lib/Lex/TokenConcatenation.cpp
@@ -99,6 +99,10 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
     TokenInfo[tok::utf32_char_constant ] |= aci_custom;
   }
 
+  // These tokens have custom code in C++1z mode.
+  if (PP.getLangOpts().CPlusPlus1z)
+    TokenInfo[tok::utf8_char_constant] |= aci_custom;
+
   // These tokens change behavior if followed by an '='.
   TokenInfo[tok::amp         ] |= aci_avoid_equal;           // &=
   TokenInfo[tok::plus        ] |= aci_avoid_equal;           // +=
@@ -213,6 +217,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
   case tok::utf32_string_literal:
   case tok::char_constant:
   case tok::wide_char_constant:
+  case tok::utf8_char_constant:
   case tok::utf16_char_constant:
   case tok::utf32_char_constant:
     if (!PP.getLangOpts().CPlusPlus11)
@@ -236,7 +241,8 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
     if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) ||
         Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) ||
         Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) ||
-        Tok.is(tok::utf16_char_constant) || Tok.is(tok::utf32_char_constant))
+        Tok.is(tok::utf8_char_constant) || Tok.is(tok::utf16_char_constant) ||
+        Tok.is(tok::utf32_char_constant))
       return true;
 
     // If this isn't identifier + string, we're done.
author	Richard Smith <richard-llvm@metafoo.co.uk>	2014-11-08 06:08:42 +0000
committer	Richard Smith <richard-llvm@metafoo.co.uk>	2014-11-08 06:08:42 +0000
commit	3e3a705062fe343dc397c0bf623aa383f14ce07c (patch)
tree	c22a037420f5961f40672567865afc4591809a78 /clang/lib/Lex
parent	cf29675d9501cfff2a37acb8af3eefa6cbba19c1 (diff)
download	bcm5719-llvm-3e3a705062fe343dc397c0bf623aa383f14ce07c.tar.gz bcm5719-llvm-3e3a705062fe343dc397c0bf623aa383f14ce07c.zip