summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/AST/Expr.h52
-rw-r--r--clang/include/clang/AST/Type.h2
-rw-r--r--clang/include/clang/Basic/DiagnosticLexKinds.td6
-rw-r--r--clang/include/clang/Basic/IdentifierTable.h7
-rw-r--r--clang/include/clang/Basic/TokenKinds.def12
-rw-r--r--clang/include/clang/Lex/Lexer.h6
-rw-r--r--clang/include/clang/Lex/LiteralSupport.h31
-rw-r--r--clang/include/clang/Lex/Token.h5
-rw-r--r--clang/include/clang/Lex/TokenConcatenation.h9
-rw-r--r--clang/include/clang/Parse/Parser.h5
-rw-r--r--clang/lib/AST/ASTImporter.cpp4
-rw-r--r--clang/lib/AST/Expr.cpp9
-rw-r--r--clang/lib/AST/StmtDumper.cpp9
-rw-r--r--clang/lib/AST/StmtPrinter.cpp18
-rw-r--r--clang/lib/AST/StmtProfile.cpp4
-rw-r--r--clang/lib/AST/Type.cpp12
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp16
-rw-r--r--clang/lib/Lex/Lexer.cpp77
-rw-r--r--clang/lib/Lex/LiteralSupport.cpp157
-rw-r--r--clang/lib/Lex/MacroArgs.cpp8
-rw-r--r--clang/lib/Lex/PPDirectives.cpp4
-rw-r--r--clang/lib/Lex/PPExpressions.cpp16
-rw-r--r--clang/lib/Lex/Pragma.cpp6
-rw-r--r--clang/lib/Lex/TokenConcatenation.cpp64
-rw-r--r--clang/lib/Parse/ParseCXXInlineMethods.cpp3
-rw-r--r--clang/lib/Parse/ParseExpr.cpp6
-rw-r--r--clang/lib/Parse/ParseTentative.cpp6
-rw-r--r--clang/lib/Parse/Parser.cpp3
-rw-r--r--clang/lib/Rewrite/HTMLRewrite.cpp9
-rw-r--r--clang/lib/Rewrite/RewriteObjC.cpp19
-rw-r--r--clang/lib/Sema/SemaChecking.cpp4
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp12
-rw-r--r--clang/lib/Sema/SemaExpr.cpp39
-rw-r--r--clang/lib/Sema/SemaExprCXX.cpp20
-rw-r--r--clang/lib/Sema/SemaExprObjC.cpp6
-rw-r--r--clang/lib/Sema/SemaInit.cpp32
-rw-r--r--clang/lib/Sema/SemaStmt.cpp8
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp18
-rw-r--r--clang/lib/Serialization/ASTReaderStmt.cpp4
-rw-r--r--clang/lib/Serialization/ASTWriterStmt.cpp4
-rw-r--r--clang/test/CXX/lex/lex.literal/lex.ccon/p1.cpp7
-rw-r--r--clang/test/CodeGen/char-literal.c44
-rw-r--r--clang/test/CodeGen/string-literal.c23
-rw-r--r--clang/test/Lexer/wchar.c4
-rw-r--r--clang/test/Parser/char-literal-printing.c37
-rw-r--r--clang/test/SemaCXX/type-convert-construct.cpp7
46 files changed, 608 insertions, 246 deletions
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index f623fd1d525..9e4c0f09153 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -1112,29 +1112,39 @@ public:
};
class CharacterLiteral : public Expr {
+public:
+ enum CharacterKind {
+ Ascii,
+ Wide,
+ UTF16,
+ UTF32
+ };
+
+private:
unsigned Value;
SourceLocation Loc;
- bool IsWide;
+ unsigned Kind : 2;
public:
// type should be IntTy
- CharacterLiteral(unsigned value, bool iswide, QualType type, SourceLocation l)
+ CharacterLiteral(unsigned value, CharacterKind kind, QualType type,
+ SourceLocation l)
: Expr(CharacterLiteralClass, type, VK_RValue, OK_Ordinary, false, false,
false, false),
- Value(value), Loc(l), IsWide(iswide) {
+ Value(value), Loc(l), Kind(kind) {
}
/// \brief Construct an empty character literal.
CharacterLiteral(EmptyShell Empty) : Expr(CharacterLiteralClass, Empty) { }
SourceLocation getLocation() const { return Loc; }
- bool isWide() const { return IsWide; }
+ CharacterKind getKind() const { return static_cast<CharacterKind>(Kind); }
SourceRange getSourceRange() const { return SourceRange(Loc); }
unsigned getValue() const { return Value; }
void setLocation(SourceLocation Location) { Loc = Location; }
- void setWide(bool W) { IsWide = W; }
+ void setKind(CharacterKind kind) { Kind = kind; }
void setValue(unsigned Val) { Value = Val; }
static bool classof(const Stmt *T) {
@@ -1243,13 +1253,23 @@ public:
/// In this case, getByteLength() will return 6, but the string literal will
/// have type "char[2]".
class StringLiteral : public Expr {
+public:
+ enum StringKind {
+ Ascii,
+ Wide,
+ UTF8,
+ UTF16,
+ UTF32
+ };
+
+private:
friend class ASTStmtReader;
const char *StrData;
unsigned ByteLength;
- bool IsWide;
- bool IsPascal;
unsigned NumConcatenated;
+ unsigned Kind : 3;
+ bool IsPascal : 1;
SourceLocation TokLocs[1];
StringLiteral(QualType Ty) :
@@ -1259,14 +1279,15 @@ class StringLiteral : public Expr {
public:
/// This is the "fully general" constructor that allows representation of
/// strings formed from multiple concatenated tokens.
- static StringLiteral *Create(ASTContext &C, StringRef Str, bool Wide,
+ static StringLiteral *Create(ASTContext &C, StringRef Str, StringKind Kind,
bool Pascal, QualType Ty,
const SourceLocation *Loc, unsigned NumStrs);
/// Simple constructor for string literals made from one token.
- static StringLiteral *Create(ASTContext &C, StringRef Str, bool Wide,
- bool Pascal, QualType Ty, SourceLocation Loc) {
- return Create(C, Str, Wide, Pascal, Ty, &Loc, 1);
+ static StringLiteral *Create(ASTContext &C, StringRef Str, StringKind Kind,
+ bool Pascal, QualType Ty,
+ SourceLocation Loc) {
+ return Create(C, Str, Kind, Pascal, Ty, &Loc, 1);
}
/// \brief Construct an empty string literal.
@@ -1281,9 +1302,14 @@ public:
/// \brief Sets the string data to the given string data.
void setString(ASTContext &C, StringRef Str);
- bool isWide() const { return IsWide; }
+ StringKind getKind() const { return static_cast<StringKind>(Kind); }
+ bool isAscii() const { return Kind == Ascii; }
+ bool isWide() const { return Kind == Wide; }
+ bool isUTF8() const { return Kind == UTF8; }
+ bool isUTF16() const { return Kind == UTF16; }
+ bool isUTF32() const { return Kind == UTF32; }
bool isPascal() const { return IsPascal; }
-
+
bool containsNonAsciiOrNull() const {
StringRef Str = getString();
for (unsigned i = 0, e = Str.size(); i != e; ++i)
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 8a842da4404..2b726102267 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1368,6 +1368,8 @@ public:
bool isBooleanType() const;
bool isCharType() const;
bool isWideCharType() const;
+ bool isChar16Type() const;
+ bool isChar32Type() const;
bool isAnyCharacterType() const;
bool isIntegralType(ASTContext &Ctx) const;
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 9e431a2d21c..e23921be0bf 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -77,8 +77,8 @@ def err_invalid_suffix_integer_constant : Error<
"invalid suffix '%0' on integer constant">;
def err_invalid_suffix_float_constant : Error<
"invalid suffix '%0' on floating constant">;
-def warn_extraneous_wide_char_constant : Warning<
- "extraneous characters in wide character constant ignored">;
+def warn_extraneous_char_constant : Warning<
+ "extraneous characters in character constant ignored">;
def warn_char_constant_too_large : Warning<
"character constant too long for its type">;
def err_exponent_has_no_digits : Error<"exponent has no digits">;
@@ -102,6 +102,8 @@ def warn_ucn_escape_too_large : ExtWarn<
"character unicode escape sequence too long for its type">;
def warn_ucn_not_valid_in_c89 : ExtWarn<
"unicode escape sequences are only valid in C99 or C++">;
+def err_unsupported_string_concat : Error<
+ "unsupported non-standard concatenation of string literals">;
//===----------------------------------------------------------------------===//
// PTH Diagnostics
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index be1fa196c0d..3390f7809d0 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -50,8 +50,8 @@ namespace clang {
/// set, and all tok::identifier tokens have a pointer to one of these.
class IdentifierInfo {
// Note: DON'T make TokenID a 'tok::TokenKind'; MSVC will treat it as a
- // signed char and TokenKinds > 127 won't be handled correctly.
- unsigned TokenID : 8; // Front-end token ID or tok::identifier.
+ // signed char and TokenKinds > 255 won't be handled correctly.
+ unsigned TokenID : 9; // Front-end token ID or tok::identifier.
// Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
// First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
// are for builtins.
@@ -65,7 +65,7 @@ class IdentifierInfo {
// file and wasn't modified since.
bool RevertedTokenID : 1; // True if RevertTokenIDToIdentifier was
// called.
- // 6 bits left in 32-bit word.
+ // 5 bits left in 32-bit word.
void *FETokenInfo; // Managed by the language front-end.
llvm::StringMapEntry<IdentifierInfo*> *Entry;
@@ -409,6 +409,7 @@ public:
IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
IdentifierInfo &II = get(Name);
II.TokenID = TokenCode;
+ assert(II.TokenID == TokenCode && "TokenCode too large");
return II;
}
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 86172b83ff4..d057559889a 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -114,13 +114,23 @@ TOK(raw_identifier) // Used only in raw lexing mode.
TOK(numeric_constant) // 0x123
// C99 6.4.4: Character Constants
-TOK(char_constant) // 'a' L'b'
+TOK(char_constant) // 'a'
+TOK(wide_char_constant) // L'b'
+
+// C++0x Character Constants
+TOK(utf16_char_constant) // u'a'
+TOK(utf32_char_constant) // U'a'
// C99 6.4.5: String Literals.
TOK(string_literal) // "foo"
TOK(wide_string_literal) // L"foo"
TOK(angle_string_literal)// <foo>
+// C++0x String Literals.
+TOK(utf8_string_literal) // u8"foo"
+TOK(utf16_string_literal)// u"foo"
+TOK(utf32_string_literal)// U"foo"
+
// C99 6.4.6: Punctuators.
PUNCTUATOR(l_square, "[")
PUNCTUATOR(r_square, "]")
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 990c1eedbb2..2c25597433e 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -471,9 +471,11 @@ private:
// Helper functions to lex the remainder of a token of the specific type.
void LexIdentifier (Token &Result, const char *CurPtr);
void LexNumericConstant (Token &Result, const char *CurPtr);
- void LexStringLiteral (Token &Result, const char *CurPtr,bool Wide);
+ void LexStringLiteral (Token &Result, const char *CurPtr,
+ tok::TokenKind Kind);
void LexAngledStringLiteral(Token &Result, const char *CurPtr);
- void LexCharConstant (Token &Result, const char *CurPtr);
+ void LexCharConstant (Token &Result, const char *CurPtr,
+ tok::TokenKind Kind);
bool LexEndOfFile (Token &Result, const char *CurPtr);
bool SkipWhitespace (Token &Result, const char *CurPtr);
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index 6486c38a406..15057299b2a 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/DataTypes.h"
+#include "clang/Basic/TokenKinds.h"
#include <cctype>
namespace clang {
@@ -124,15 +125,19 @@ private:
/// character literal.
class CharLiteralParser {
uint64_t Value;
- bool IsWide;
+ tok::TokenKind Kind;
bool IsMultiChar;
bool HadError;
public:
CharLiteralParser(const char *begin, const char *end,
- SourceLocation Loc, Preprocessor &PP);
+ SourceLocation Loc, Preprocessor &PP,
+ tok::TokenKind kind);
bool hadError() const { return HadError; }
- bool isWide() const { return IsWide; }
+ bool isAscii() const { return Kind == tok::char_constant; }
+ bool isWide() const { return Kind == tok::wide_char_constant; }
+ bool isUTF16() const { return Kind == tok::utf16_char_constant; }
+ bool isUTF32() const { return Kind == tok::utf32_char_constant; }
bool isMultiChar() const { return IsMultiChar; }
uint64_t getValue() const { return Value; }
};
@@ -148,7 +153,8 @@ class StringLiteralParser {
unsigned MaxTokenLength;
unsigned SizeBound;
- unsigned wchar_tByteWidth;
+ unsigned CharByteWidth;
+ tok::TokenKind Kind;
llvm::SmallString<512> ResultBuf;
char *ResultPtr; // cursor
public:
@@ -158,14 +164,13 @@ public:
const SourceManager &sm, const LangOptions &features,
const TargetInfo &target, Diagnostic *diags = 0)
: SM(sm), Features(features), Target(target), Diags(diags),
- MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),
- ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {
+ MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
+ ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
init(StringToks, NumStringToks);
}
bool hadError;
- bool AnyWide;
bool Pascal;
StringRef GetString() const {
@@ -174,9 +179,7 @@ public:
unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
unsigned GetNumStringChars() const {
- if (AnyWide)
- return GetStringLength() / wchar_tByteWidth;
- return GetStringLength();
+ return GetStringLength() / CharByteWidth;
}
/// getOffsetOfStringByte - This function returns the offset of the
/// specified byte of the string data represented by Token. This handles
@@ -185,7 +188,13 @@ public:
/// If the Diagnostics pointer is non-null, then this will do semantic
/// checking of the string literal and emit errors and warnings.
unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
-
+
+ bool isAscii() { return Kind == tok::string_literal; }
+ bool isWide() { return Kind == tok::wide_string_literal; }
+ bool isUTF8() { return Kind == tok::utf8_string_literal; }
+ bool isUTF16() { return Kind == tok::utf16_string_literal; }
+ bool isUTF32() { return Kind == tok::utf32_string_literal; }
+
private:
void init(const Token *StringToks, unsigned NumStringToks);
};
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index 9cf11d9a64c..e6dd1607e88 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -96,7 +96,10 @@ public:
/// constant, string, etc.
bool isLiteral() const {
return is(tok::numeric_constant) || is(tok::char_constant) ||
- is(tok::string_literal) || is(tok::wide_string_literal) ||
+ is(tok::wide_char_constant) || is(tok::utf16_char_constant) ||
+ is(tok::utf32_char_constant) || is(tok::string_literal) ||
+ is(tok::wide_string_literal) || is(tok::utf8_string_literal) ||
+ is(tok::utf16_string_literal) || is(tok::utf32_string_literal) ||
is(tok::angle_string_literal);
}
diff --git a/clang/include/clang/Lex/TokenConcatenation.h b/clang/include/clang/Lex/TokenConcatenation.h
index 094990a6e31..551300f402c 100644
--- a/clang/include/clang/Lex/TokenConcatenation.h
+++ b/clang/include/clang/Lex/TokenConcatenation.h
@@ -63,12 +63,9 @@ namespace clang {
const Token &Tok) const;
private:
- /// StartsWithL - Return true if the spelling of this token starts with 'L'.
- bool StartsWithL(const Token &Tok) const;
-
- /// IsIdentifierL - Return true if the spelling of this token is literally
- /// 'L'.
- bool IsIdentifierL(const Token &Tok) const;
+ /// IsIdentifierStringPrefix - Return true if the spelling of the token
+ /// is literally 'L', 'u', 'U', or 'u8'.
+ bool IsIdentifierStringPrefix(const Token &Tok) const;
};
} // end clang namespace
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 5d9376c1f7e..83b0cd455e9 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -265,7 +265,10 @@ private:
///
bool isTokenStringLiteral() const {
return Tok.getKind() == tok::string_literal ||
- Tok.getKind() == tok::wide_string_literal;
+ Tok.getKind() == tok::wide_string_literal ||
+ Tok.getKind() == tok::utf8_string_literal ||
+ Tok.getKind() == tok::utf16_string_literal ||
+ Tok.getKind() == tok::utf32_string_literal;
}
/// \brief Returns true if the current token is a '=' or '==' and
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 2ea79912d11..d6e7d77d0fe 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -3814,8 +3814,8 @@ Expr *ASTNodeImporter::VisitCharacterLiteral(CharacterLiteral *E) {
if (T.isNull())
return 0;
- return new (Importer.getToContext()) CharacterLiteral(E->getValue(),
- E->isWide(), T,
+ return new (Importer.getToContext()) CharacterLiteral(E->getValue(),
+ E->getKind(), T,
Importer.Import(E->getLocation()));
}
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 58fb32d278b..5e795be56d1 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -533,8 +533,7 @@ double FloatingLiteral::getValueAsApproximateDouble() const {
}
StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str,
- bool Wide,
- bool Pascal, QualType Ty,
+ StringKind Kind, bool Pascal, QualType Ty,
const SourceLocation *Loc,
unsigned NumStrs) {
// Allocate enough space for the StringLiteral plus an array of locations for
@@ -549,7 +548,7 @@ StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str,
memcpy(AStrData, Str.data(), Str.size());
SL->StrData = AStrData;
SL->ByteLength = Str.size();
- SL->IsWide = Wide;
+ SL->Kind = Kind;
SL->IsPascal = Pascal;
SL->TokLocs[0] = Loc[0];
SL->NumConcatenated = NumStrs;
@@ -587,8 +586,8 @@ void StringLiteral::setString(ASTContext &C, StringRef Str) {
SourceLocation StringLiteral::
getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
const LangOptions &Features, const TargetInfo &Target) const {
- assert(!isWide() && "This doesn't work for wide strings yet");
-
+ assert(Kind == StringLiteral::Ascii && "This only works for ASCII strings");
+
// Loop over all of the tokens in this string until we find the one that
// contains the byte we're looking for.
unsigned TokNo = 0;
diff --git a/clang/lib/AST/StmtDumper.cpp b/clang/lib/AST/StmtDumper.cpp
index 7218af570f9..ce4ae8e7732 100644
--- a/clang/lib/AST/StmtDumper.cpp
+++ b/clang/lib/AST/StmtDumper.cpp
@@ -443,8 +443,13 @@ void StmtDumper::VisitStringLiteral(StringLiteral *Str) {
DumpExpr(Str);
// FIXME: this doesn't print wstrings right.
OS << " ";
- if (Str->isWide())
- OS << "L";
+ switch (Str->getKind()) {
+ case StringLiteral::Ascii: break; // No prefix
+ case StringLiteral::Wide: OS << 'L'; break;
+ case StringLiteral::UTF8: OS << "u8"; break;
+ case StringLiteral::UTF16: OS << 'u'; break;
+ case StringLiteral::UTF32: OS << 'U'; break;
+ }
OS << '"';
OS.write_escaped(Str->getString());
OS << '"';
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 8fcad14ec23..79f14bc6581 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -599,8 +599,14 @@ void StmtPrinter::VisitPredefinedExpr(PredefinedExpr *Node) {
void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
unsigned value = Node->getValue();
- if (Node->isWide())
- OS << "L";
+
+ switch (Node->getKind()) {
+ case CharacterLiteral::Ascii: break; // no prefix.
+ case CharacterLiteral::Wide: OS << 'L'; break;
+ case CharacterLiteral::UTF16: OS << 'u'; break;
+ case CharacterLiteral::UTF32: OS << 'U'; break;
+ }
+
switch (value) {
case '\\':
OS << "'\\\\'";
@@ -672,7 +678,13 @@ void StmtPrinter::VisitImaginaryLiteral(ImaginaryLiteral *Node) {
}
void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
- if (Str->isWide()) OS << 'L';
+ switch (Str->getKind()) {
+ case StringLiteral::Ascii: break; // no prefix.
+ case StringLiteral::Wide: OS << 'L'; break;
+ case StringLiteral::UTF8: OS << "u8"; break;
+ case StringLiteral::UTF16: OS << 'u'; break;
+ case StringLiteral::UTF32: OS << 'U'; break;
+ }
OS << '"';
// FIXME: this doesn't print wstrings right.
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 120c9e50a92..12321ef0d6f 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -252,7 +252,7 @@ void StmtProfiler::VisitIntegerLiteral(const IntegerLiteral *S) {
void StmtProfiler::VisitCharacterLiteral(const CharacterLiteral *S) {
VisitExpr(S);
- ID.AddBoolean(S->isWide());
+ ID.AddInteger(S->getKind());
ID.AddInteger(S->getValue());
}
@@ -269,7 +269,7 @@ void StmtProfiler::VisitImaginaryLiteral(const ImaginaryLiteral *S) {
void StmtProfiler::VisitStringLiteral(const StringLiteral *S) {
VisitExpr(S);
ID.AddString(S->getString());
- ID.AddBoolean(S->isWide());
+ ID.AddInteger(S->getKind());
}
void StmtProfiler::VisitParenExpr(const ParenExpr *S) {
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 7cd3be2fb48..2555ab31fb2 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -635,6 +635,18 @@ bool Type::isWideCharType() const {
return false;
}
+bool Type::isChar16Type() const {
+ if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
+ return BT->getKind() == BuiltinType::Char16;
+ return false;
+}
+
+bool Type::isChar32Type() const {
+ if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
+ return BT->getKind() == BuiltinType::Char32;
+ return false;
+}
+
/// \brief Determine whether this type is any of the built-in character
/// types.
bool Type::isAnyCharacterType() const {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 290fe242c91..ce32325acaa 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1877,8 +1877,20 @@ std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) {
// Resize the string to the right size.
uint64_t RealLen = CAT->getSize().getZExtValue();
- if (E->isWide())
+ switch (E->getKind()) {
+ case StringLiteral::Ascii:
+ case StringLiteral::UTF8:
+ break;
+ case StringLiteral::Wide:
RealLen *= Context.Target.getWCharWidth() / Context.getCharWidth();
+ break;
+ case StringLiteral::UTF16:
+ RealLen *= Context.Target.getChar16Width() / Context.getCharWidth();
+ break;
+ case StringLiteral::UTF32:
+ RealLen *= Context.Target.getChar32Width() / Context.getCharWidth();
+ break;
+ }
std::string Str = E->getString().str();
Str.resize(RealLen, '\0');
@@ -1893,7 +1905,7 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) {
// FIXME: This can be more efficient.
// FIXME: We shouldn't need to bitcast the constant in the wide string case.
llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S));
- if (S->isWide()) {
+ if (S->isWide() || S->isUTF16() || S->isUTF32()) {
llvm::Type *DestTy =
llvm::PointerType::getUnqual(getTypes().ConvertType(S->getType()));
C = llvm::ConstantExpr::getBitCast(C, DestTy);
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 6c7169f89bd..44674a93d74 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1267,8 +1267,9 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
}
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
-/// either " or L".
-void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
+/// either " or L" or u8" or u" or U".
+void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
const char *NulCharacter = 0; // Does this string contain the \0 character?
char C = getAndAdvanceChar(CurPtr, Result);
@@ -1299,8 +1300,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
// Update the location of the token as well as the BufferPtr instance var.
const char *TokStart = BufferPtr;
- FormTokenWithChars(Result, CurPtr,
- Wide ? tok::wide_string_literal : tok::string_literal);
+ FormTokenWithChars(Result, CurPtr, Kind);
Result.setLiteralData(TokStart);
}
@@ -1339,8 +1339,9 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
/// LexCharConstant - Lex the remainder of a character constant, after having
-/// lexed either ' or L'.
-void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
+/// lexed either ' or L' or u' or U'.
+void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
const char *NulCharacter = 0; // Does this character contain the \0 character?
char C = getAndAdvanceChar(CurPtr, Result);
@@ -1377,7 +1378,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
// Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
- FormTokenWithChars(Result, CurPtr, tok::char_constant);
+ FormTokenWithChars(Result, CurPtr, Kind);
Result.setLiteralData(TokStart);
}
@@ -2185,6 +2186,55 @@ LexNextToken:
MIOpt.ReadToken();
return LexNumericConstant(Result, CurPtr);
+ case 'u': // Identifier (uber) or C++0x UTF-8 or UTF-16 string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (Features.CPlusPlus0x) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ // UTF-16 string literal
+ if (Char == '"')
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf16_string_literal);
+
+ // UTF-16 character constant
+ if (Char == '\'')
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf16_char_constant);
+
+ // UTF-8 string literal
+ if (Char == '8' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf8_string_literal);
+ }
+
+ // treat u like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
+ case 'U': // Identifier (Uber) or C++0x UTF-32 string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (Features.CPlusPlus0x) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ // UTF-32 string literal
+ if (Char == '"')
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf32_string_literal);
+
+ // UTF-32 character constant
+ if (Char == '\'')
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf32_char_constant);
+ }
+
+ // treat U like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
@@ -2193,21 +2243,22 @@ LexNextToken:
// Wide string literal.
if (Char == '"')
return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
- true);
+ tok::wide_string_literal);
// Wide character constant.
if (Char == '\'')
- return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::wide_char_constant);
// FALL THROUGH, treating L like the start of an identifier.
// C99 6.4.2: Identifiers.
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
- case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': /*'U'*/
case 'V': case 'W': case 'X': case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
- case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/
case 'v': case 'w': case 'x': case 'y': case 'z':
case '_':
// Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -2230,13 +2281,13 @@ LexNextToken:
case '\'':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- return LexCharConstant(Result, CurPtr);
+ return LexCharConstant(Result, CurPtr, tok::char_constant);
// C99 6.4.5: String Literals.
case '"':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- return LexStringLiteral(Result, CurPtr, false);
+ return LexStringLiteral(Result, CurPtr, tok::string_literal);
// C99 6.4.6: Punctuators.
case '?':
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index f8a2a55117c..82493408e61 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -28,12 +28,31 @@ static int HexDigitValue(char C) {
return -1;
}
+static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
+ switch (kind) {
+ default: assert(0 && "Unknown token type!");
+ case tok::char_constant:
+ case tok::string_literal:
+ case tok::utf8_string_literal:
+ return Target.getCharWidth();
+ case tok::wide_char_constant:
+ case tok::wide_string_literal:
+ return Target.getWCharWidth();
+ case tok::utf16_char_constant:
+ case tok::utf16_string_literal:
+ return Target.getChar16Width();
+ case tok::utf32_char_constant:
+ case tok::utf32_string_literal:
+ return Target.getChar32Width();
+ }
+}
+
/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
/// either a character or a string literal.
static unsigned ProcessCharEscape(const char *&ThisTokBuf,
const char *ThisTokEnd, bool &HadError,
- FullSourceLoc Loc, bool IsWide,
- Diagnostic *Diags, const TargetInfo &Target) {
+ FullSourceLoc Loc, unsigned CharWidth,
+ Diagnostic *Diags) {
// Skip the '\' char.
++ThisTokBuf;
@@ -98,9 +117,6 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
}
// See if any bits will be truncated when evaluated as a character.
- unsigned CharWidth =
- IsWide ? Target.getWCharWidth() : Target.getCharWidth();
-
if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
Overflow = true;
ResultChar &= ~0U >> (32-CharWidth);
@@ -128,9 +144,6 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
// Check for overflow. Reject '\777', but not L'\777'.
- unsigned CharWidth =
- IsWide ? Target.getWCharWidth() : Target.getCharWidth();
-
if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
if (Diags)
Diags->Report(Loc, diag::warn_octal_escape_too_large);
@@ -219,8 +232,8 @@ static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
/// we will likely rework our support for UCN's.
static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
char *&ResultBuf, bool &HadError,
- FullSourceLoc Loc, bool wide, Diagnostic *Diags,
- const LangOptions &Features) {
+ FullSourceLoc Loc, unsigned CharByteWidth,
+ Diagnostic *Diags, const LangOptions &Features) {
typedef uint32_t UTF32;
UTF32 UcnVal = 0;
unsigned short UcnLen = 0;
@@ -230,19 +243,22 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
return;
}
- if (wide) {
- (void)UcnLen;
- assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
+ assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&
+ "only character widths of 1, 2, or 4 bytes supported");
- if (!Features.ShortWChar) {
- // Note: our internal rep of wide char tokens is always little-endian.
- *ResultBuf++ = (UcnVal & 0x000000FF);
- *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
- *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
- *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
- return;
- }
+ (void)UcnLen;
+ assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
+
+ if (CharByteWidth == 4) {
+ // Note: our internal rep of wide char tokens is always little-endian.
+ *ResultBuf++ = (UcnVal & 0x000000FF);
+ *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
+ *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
+ *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
+ return;
+ }
+ if (CharByteWidth == 2) {
// Convert to UTF16.
if (UcnVal < (UTF32)0xFFFF) {
*ResultBuf++ = (UcnVal & 0x000000FF);
@@ -261,6 +277,9 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
*ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;
return;
}
+
+ assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
+
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
// The conversion below was inspired by:
// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
@@ -695,13 +714,18 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
- SourceLocation Loc, Preprocessor &PP) {
+ SourceLocation Loc, Preprocessor &PP,
+ tok::TokenKind kind) {
// At this point we know that the character matches the regex "L?'.*'".
HadError = false;
- // Determine if this is a wide character.
- IsWide = begin[0] == 'L';
- if (IsWide) ++begin;
+ Kind = kind;
+
+ // Determine if this is a wide or UTF character.
+ if (Kind == tok::wide_char_constant || Kind == tok::utf16_char_constant ||
+ Kind == tok::utf32_char_constant) {
+ ++begin;
+ }
// Skip over the entry quote.
assert(begin[0] == '\'' && "Invalid token lexed");
@@ -742,17 +766,17 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
ResultChar = utf32;
} else {
// Otherwise, this is a non-UCN escape character. Process it.
+ unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
ResultChar = ProcessCharEscape(begin, end, HadError,
FullSourceLoc(Loc,PP.getSourceManager()),
- IsWide,
- &PP.getDiagnostics(), PP.getTargetInfo());
+ CharWidth, &PP.getDiagnostics());
}
}
// If this is a multi-character constant (e.g. 'abc'), handle it. These are
// implementation defined (C99 6.4.4.4p10).
if (NumCharsSoFar) {
- if (IsWide) {
+ if (!isAscii()) {
// Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
LitVal = 0;
} else {
@@ -774,8 +798,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
if (NumCharsSoFar > 1) {
// Warn about discarding the top bits for multi-char wide-character
// constants (L'abcd').
- if (IsWide)
- PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
+ if (!isAscii())
+ PP.Diag(Loc, diag::warn_extraneous_char_constant);
else if (NumCharsSoFar != 4)
PP.Diag(Loc, diag::ext_multichar_character_literal);
else
@@ -787,14 +811,15 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
// Transfer the value from APInt to uint64_t
Value = LitVal.getZExtValue();
- if (IsWide && PP.getLangOptions().ShortWChar && Value > 0xFFFF)
+ if (((isWide() && PP.getLangOptions().ShortWChar) || isUTF16()) &&
+ Value > 0xFFFF)
PP.Diag(Loc, diag::warn_ucn_escape_too_large);
// If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
// if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
// character constants are not sign extended in the this implementation:
// '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
- if (!IsWide && NumCharsSoFar == 1 && (Value & 128) &&
+ if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
PP.getLangOptions().CharIsSigned)
Value = (signed char)Value;
}
@@ -839,8 +864,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
Preprocessor &PP, bool Complain)
: SM(PP.getSourceManager()), Features(PP.getLangOptions()),
Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
- MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),
- ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {
+ MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
+ ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
init(StringToks, NumStringToks);
}
@@ -860,7 +885,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
MaxTokenLength = StringToks[0].getLength();
assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
SizeBound = StringToks[0].getLength()-2; // -2 for "".
- AnyWide = StringToks[0].is(tok::wide_string_literal);
+ Kind = StringToks[0].getKind();
hadError = false;
@@ -881,8 +906,18 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
if (StringToks[i].getLength() > MaxTokenLength)
MaxTokenLength = StringToks[i].getLength();
- // Remember if we see any wide strings.
- AnyWide |= StringToks[i].is(tok::wide_string_literal);
+ // Remember if we see any wide or utf-8/16/32 strings.
+ // Also check for illegal concatenations.
+ if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
+ if (isAscii()) {
+ Kind = StringToks[i].getKind();
+ } else {
+ if (Diags)
+ Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),
+ diag::err_unsupported_string_concat);
+ hadError = true;
+ }
+ }
}
// Include space for the null terminator.
@@ -890,19 +925,14 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
// TODO: K&R warning: "traditional C rejects string constant concatenation"
- // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
- // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
- wchar_tByteWidth = ~0U;
- if (AnyWide) {
- wchar_tByteWidth = Target.getWCharWidth();
- assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
- wchar_tByteWidth /= 8;
- }
+ // Get the width in bytes of char/wchar_t/char16_t/char32_t
+ CharByteWidth = getCharWidth(Kind, Target);
+ assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
+ CharByteWidth /= 8;
// The output buffer size needs to be large enough to hold wide characters.
// This is a worst-case assumption which basically corresponds to L"" "long".
- if (AnyWide)
- SizeBound *= wchar_tByteWidth;
+ SizeBound *= CharByteWidth;
// Size the temporary buffer to hold the result string data.
ResultBuf.resize(SizeBound);
@@ -927,18 +957,19 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
&StringInvalid);
if (StringInvalid) {
- hadError = 1;
+ hadError = true;
continue;
}
const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
- bool wide = false;
// TODO: Input character set mapping support.
// Skip L marker for wide strings.
- if (ThisTokBuf[0] == 'L') {
- wide = true;
+ if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
++ThisTokBuf;
+ // Skip 8 of u8 marker for utf8 strings.
+ if (ThisTokBuf[0] == '8')
+ ++ThisTokBuf;
}
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
@@ -967,7 +998,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
// Copy the character span over.
unsigned Len = ThisTokBuf-InStart;
- if (!AnyWide) {
+ if (CharByteWidth == 1) {
memcpy(ResultPtr, InStart, Len);
ResultPtr += Len;
} else {
@@ -975,7 +1006,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
for (; Len; --Len, ++InStart) {
*ResultPtr++ = InStart[0];
// Add zeros at the end.
- for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+ for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
*ResultPtr++ = 0;
}
}
@@ -985,29 +1016,26 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
- wide, Diags, Features);
+ CharByteWidth, Diags, Features);
continue;
}
// Otherwise, this is a non-UCN escape character. Process it.
unsigned ResultChar =
ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
FullSourceLoc(StringToks[i].getLocation(), SM),
- AnyWide, Diags, Target);
+ CharByteWidth*8, Diags);
// Note: our internal rep of wide char tokens is always little-endian.
*ResultPtr++ = ResultChar & 0xFF;
- if (AnyWide) {
- for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
- *ResultPtr++ = ResultChar >> i*8;
- }
+ for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
+ *ResultPtr++ = ResultChar >> i*8;
}
}
if (Pascal) {
ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
- if (AnyWide)
- ResultBuf[0] /= wchar_tByteWidth;
+ ResultBuf[0] /= CharByteWidth;
// Verify that pascal strings aren't too large.
if (GetStringLength() > 256) {
@@ -1016,7 +1044,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
diag::err_pascal_string_too_long)
<< SourceRange(StringToks[0].getLocation(),
StringToks[NumStringToks-1].getLocation());
- hadError = 1;
+ hadError = true;
return;
}
} else if (Diags) {
@@ -1050,7 +1078,8 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
if (StringInvalid)
return 0;
- assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
+ assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
+ SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
const char *SpellingStart = SpellingPtr;
@@ -1075,7 +1104,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
bool HadError = false;
ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
FullSourceLoc(Tok.getLocation(), SM),
- false, Diags, Target);
+ CharByteWidth*8, Diags);
assert(!HadError && "This method isn't valid on erroneous strings");
--ByteNo;
}
diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp
index 968c15e3c27..ccd0b705c8b 100644
--- a/clang/lib/Lex/MacroArgs.cpp
+++ b/clang/lib/Lex/MacroArgs.cpp
@@ -208,7 +208,13 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
// by 6.10.3.2p2.
if (Tok.is(tok::string_literal) || // "foo"
Tok.is(tok::wide_string_literal) || // L"foo"
- Tok.is(tok::char_constant)) { // 'x' and L'x'.
+ Tok.is(tok::utf8_string_literal) || // u8"foo"
+ Tok.is(tok::utf16_string_literal) || // u"foo"
+ Tok.is(tok::utf32_string_literal) || // U"foo"
+ Tok.is(tok::char_constant) || // 'x'
+ Tok.is(tok::wide_char_constant) || // L'x'.
+ Tok.is(tok::utf16_char_constant) || // u'x'.
+ Tok.is(tok::utf32_char_constant)) { // U'x'.
bool Invalid = false;
std::string TokStr = PP.getSpelling(Tok, &Invalid);
if (!Invalid) {
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 212ffeef1b1..383c6f5aa16 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -777,7 +777,7 @@ void Preprocessor::HandleLineDirective(Token &Tok) {
} else {
// Parse and validate the string, converting it into a unique ID.
StringLiteralParser Literal(&StrTok, 1, *this);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return DiscardUntilEndOfDirective();
if (Literal.Pascal) {
@@ -910,7 +910,7 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {
} else {
// Parse and validate the string, converting it into a unique ID.
StringLiteralParser Literal(&StrTok, 1, *this);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return DiscardUntilEndOfDirective();
if (Literal.Pascal) {
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 08e2705ef1b..25816923c80 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -236,7 +236,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
PP.LexNonComment(PeekTok);
return false;
}
- case tok::char_constant: { // 'x'
+ case tok::char_constant: // 'x'
+ case tok::wide_char_constant: { // L'x'
+ case tok::utf16_char_constant: // u'x'
+ case tok::utf32_char_constant: // U'x'
llvm::SmallString<32> CharBuffer;
bool CharInvalid = false;
StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
@@ -244,7 +247,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
return true;
CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
- PeekTok.getLocation(), PP);
+ PeekTok.getLocation(), PP, PeekTok.getKind());
if (Literal.hadError())
return true; // A diagnostic was already emitted.
@@ -255,6 +258,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
NumBits = TI.getIntWidth();
else if (Literal.isWide())
NumBits = TI.getWCharWidth();
+ else if (Literal.isUTF16())
+ NumBits = TI.getChar16Width();
+ else if (Literal.isUTF32())
+ NumBits = TI.getChar32Width();
else
NumBits = TI.getCharWidth();
@@ -262,8 +269,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
llvm::APSInt Val(NumBits);
// Set the value.
Val = Literal.getValue();
- // Set the signedness.
- Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);
+ // Set the signedness. UTF-16 and UTF-32 are always unsigned
+ if (!Literal.isUTF16() && !Literal.isUTF32())
+ Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);
if (Result.Val.getBitWidth() > Val.getBitWidth()) {
Result.Val = Val.extend(Result.Val.getBitWidth());
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index d94e2e8305f..1d0b5e4f2d0 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -444,7 +444,7 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {
// Concatenate and parse the strings.
StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return;
if (Literal.Pascal) {
@@ -520,7 +520,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {
// Concatenate and parse the strings.
StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return;
if (Literal.Pascal) {
@@ -902,7 +902,7 @@ public:
// Concatenate and parse the strings.
StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return;
if (Literal.Pascal) {
diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp
index 3e9e8550313..19baf80aad3 100644
--- a/clang/lib/Lex/TokenConcatenation.cpp
+++ b/clang/lib/Lex/TokenConcatenation.cpp
@@ -17,42 +17,39 @@
using namespace clang;
-/// StartsWithL - Return true if the spelling of this token starts with 'L'.
-bool TokenConcatenation::StartsWithL(const Token &Tok) const {
- if (!Tok.needsCleaning()) {
- SourceManager &SM = PP.getSourceManager();
- return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
- }
-
- if (Tok.getLength() < 256) {
- char Buffer[256];
- const char *TokPtr = Buffer;
- PP.getSpelling(Tok, TokPtr);
- return TokPtr[0] == 'L';
- }
-
- return PP.getSpelling(Tok)[0] == 'L';
-}
+/// IsIdentifierStringPrefix - Return true if the spelling of the token
+/// is literally 'L', 'u', 'U', or 'u8'.
+bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
+ const LangOptions &LangOpts = PP.getLangOptions();
-/// IsIdentifierL - Return true if the spelling of this token is literally
-/// 'L'.
-bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {
if (!Tok.needsCleaning()) {
- if (Tok.getLength() != 1)
+ if (Tok.getLength() != 1 && Tok.getLength() != 2)
return false;
SourceManager &SM = PP.getSourceManager();
- return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
+ const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
+ if (Tok.getLength() == 1)
+ return Ptr[0] == 'L' ||
+ (LangOpts.CPlusPlus0x && (Ptr[0] == 'u' || Ptr[0] == 'U'));
+ if (Tok.getLength() == 2)
+ return LangOpts.CPlusPlus0x && Ptr[0] == 'u' && Ptr[1] == '8';
}
if (Tok.getLength() < 256) {
char Buffer[256];
const char *TokPtr = Buffer;
- if (PP.getSpelling(Tok, TokPtr) != 1)
- return false;
- return TokPtr[0] == 'L';
+ unsigned length = PP.getSpelling(Tok, TokPtr);
+ if (length == 1)
+ return TokPtr[0] == 'L' ||
+ (LangOpts.CPlusPlus0x && (TokPtr[0] == 'u' || TokPtr[0] == 'U'));
+ if (length == 2)
+ return LangOpts.CPlusPlus0x && TokPtr[0] == 'u' && TokPtr[1] == '8';
+ return false;
}
- return PP.getSpelling(Tok) == "L";
+ std::string TokStr = PP.getSpelling(Tok);
+ return TokStr == "L" || (LangOpts.CPlusPlus0x && (TokStr == "u8" ||
+ TokStr == "u" ||
+ TokStr == "U"));
}
TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
@@ -179,24 +176,19 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
if (Tok.is(tok::numeric_constant))
return GetFirstChar(PP, Tok) != '.';
- if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* ||
- Tok.is(tok::wide_char_literal)*/)
+ if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) ||
+ Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) ||
+ Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) ||
+ Tok.is(tok::utf16_char_constant) || Tok.is(tok::utf32_char_constant))
return true;
// If this isn't identifier + string, we're done.
if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
return false;
- // FIXME: need a wide_char_constant!
-
- // If the string was a wide string L"foo" or wide char L'f', it would
- // concat with the previous identifier into fooL"bar". Avoid this.
- if (StartsWithL(Tok))
- return true;
-
// Otherwise, this is a narrow character or string. If the *identifier*
- // is a literal 'L', avoid pasting L "foo" -> L"foo".
- return IsIdentifierL(PrevTok);
+ // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
+ return IsIdentifierStringPrefix(PrevTok);
case tok::numeric_constant:
return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp
index f5c69981ca3..e16448080c7 100644
--- a/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -553,6 +553,9 @@ bool Parser::ConsumeAndStoreUntil(tok::TokenKind T1, tok::TokenKind T2,
case tok::string_literal:
case tok::wide_string_literal:
+ case tok::utf8_string_literal:
+ case tok::utf16_string_literal:
+ case tok::utf32_string_literal:
Toks.push_back(Tok);
ConsumeStringToken();
break;
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 869d9de47e1..3cd1f3987aa 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -769,6 +769,9 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
break;
}
case tok::char_constant: // constant: character-constant
+ case tok::wide_char_constant:
+ case tok::utf16_char_constant:
+ case tok::utf32_char_constant:
Res = Actions.ActOnCharacterConstant(Tok);
ConsumeToken();
break;
@@ -780,6 +783,9 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
break;
case tok::string_literal: // primary-expression: string-literal
case tok::wide_string_literal:
+ case tok::utf8_string_literal:
+ case tok::utf16_string_literal:
+ case tok::utf32_string_literal:
Res = ParseStringLiteralExpression();
break;
case tok::kw__Generic: // primary-expression: generic-selection [C1X 6.5.1]
diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp
index 2ba0fc673f6..3f245a376cc 100644
--- a/clang/lib/Parse/ParseTentative.cpp
+++ b/clang/lib/Parse/ParseTentative.cpp
@@ -605,8 +605,14 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
// Obviously starts an expression.
case tok::numeric_constant:
case tok::char_constant:
+ case tok::wide_char_constant:
+ case tok::utf16_char_constant:
+ case tok::utf32_char_constant:
case tok::string_literal:
case tok::wide_string_literal:
+ case tok::utf8_string_literal:
+ case tok::utf16_string_literal:
+ case tok::utf32_string_literal:
case tok::l_square:
case tok::l_paren:
case tok::amp:
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 9dc867c92bb..5bb4165fbb5 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -298,6 +298,9 @@ bool Parser::SkipUntil(const tok::TokenKind *Toks, unsigned NumToks,
case tok::string_literal:
case tok::wide_string_literal:
+ case tok::utf8_string_literal:
+ case tok::utf16_string_literal:
+ case tok::utf32_string_literal:
ConsumeStringToken();
break;
diff --git a/clang/lib/Rewrite/HTMLRewrite.cpp b/clang/lib/Rewrite/HTMLRewrite.cpp
index 27f383f46ce..ad2491c8fbd 100644
--- a/clang/lib/Rewrite/HTMLRewrite.cpp
+++ b/clang/lib/Rewrite/HTMLRewrite.cpp
@@ -397,8 +397,15 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
"<span class='comment'>", "</span>");
break;
+ case tok::utf8_string_literal:
+ // Chop off the u part of u8 prefix
+ ++TokOffs;
+ --TokLen;
+ // FALL THROUGH to chop the 8
case tok::wide_string_literal:
- // Chop off the L prefix
+ case tok::utf16_string_literal:
+ case tok::utf32_string_literal:
+ // Chop off the L, u, U or 8 prefix
++TokOffs;
--TokLen;
// FALL THROUGH.
diff --git a/clang/lib/Rewrite/RewriteObjC.cpp b/clang/lib/Rewrite/RewriteObjC.cpp
index 585b43cf548..a8fefb01656 100644
--- a/clang/lib/Rewrite/RewriteObjC.cpp
+++ b/clang/lib/Rewrite/RewriteObjC.cpp
@@ -2111,8 +2111,8 @@ Stmt *RewriteObjC::RewriteAtEncode(ObjCEncodeExpr *Exp) {
std::string StrEncoding;
Context->getObjCEncodingForType(Exp->getEncodedType(), StrEncoding);
Expr *Replacement = StringLiteral::Create(*Context, StrEncoding,
- false, false, StrType,
- SourceLocation());
+ StringLiteral::Ascii, false,
+ StrType, SourceLocation());
ReplaceStmt(Exp, Replacement);
// Replace this subexpr in the parent.
@@ -2129,8 +2129,8 @@ Stmt *RewriteObjC::RewriteAtSelector(ObjCSelectorExpr *Exp) {
QualType argType = Context->getPointerType(Context->CharTy);
SelExprs.push_back(StringLiteral::Create(*Context,
Exp->getSelector().getAsString(),
- false, false, argType,
- SourceLocation()));
+ StringLiteral::Ascii, false,
+ argType, SourceLocation()));
CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
&SelExprs[0], SelExprs.size());
ReplaceStmt(Exp, SelExp);
@@ -2797,7 +2797,8 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
QualType argType = Context->getPointerType(Context->CharTy);
ClsExprs.push_back(StringLiteral::Create(*Context,
ClassDecl->getIdentifier()->getName(),
- false, false, argType, SourceLocation()));
+ StringLiteral::Ascii, false,
+ argType, SourceLocation()));
CallExpr *Cls = SynthesizeCallToFunctionDecl(GetMetaClassFunctionDecl,
&ClsExprs[0],
ClsExprs.size(),
@@ -2875,7 +2876,7 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
IdentifierInfo *clsName = Class->getIdentifier();
ClsExprs.push_back(StringLiteral::Create(*Context,
clsName->getName(),
- false, false,
+ StringLiteral::Ascii, false,
argType, SourceLocation()));
CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
&ClsExprs[0],
@@ -2906,7 +2907,8 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
QualType argType = Context->getPointerType(Context->CharTy);
ClsExprs.push_back(StringLiteral::Create(*Context,
ClassDecl->getIdentifier()->getName(),
- false, false, argType, SourceLocation()));
+ StringLiteral::Ascii, false, argType,
+ SourceLocation()));
CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
&ClsExprs[0],
ClsExprs.size(),
@@ -2987,7 +2989,8 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
QualType argType = Context->getPointerType(Context->CharTy);
SelExprs.push_back(StringLiteral::Create(*Context,
Exp->getSelector().getAsString(),
- false, false, argType, SourceLocation()));
+ StringLiteral::Ascii, false,
+ argType, SourceLocation()));
CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
&SelExprs[0], SelExprs.size(),
StartLoc,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2e4198b5202..28085ef6eab 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -605,7 +605,7 @@ bool Sema::CheckObjCString(Expr *Arg) {
Arg = Arg->IgnoreParenCasts();
StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
- if (!Literal || Literal->isWide()) {
+ if (!Literal || !Literal->isAscii()) {
Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
<< Arg->getSourceRange();
return true;
@@ -1805,7 +1805,7 @@ void Sema::CheckFormatString(const StringLiteral *FExpr,
bool isPrintf) {
// CHECK: is the format string a wide literal?
- if (FExpr->isWide()) {
+ if (!FExpr->isAscii()) {
Diag(FExpr->getLocStart(),
diag::warn_format_string_is_wide_literal)
<< OrigFormatExpr->getSourceRange();
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 9e20bc90184..2cbd83a8fd8 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -712,7 +712,7 @@ static void handleWeakRefAttr(Sema &S, Decl *D, const AttributeList &Attr) {
Arg = Arg->IgnoreParenCasts();
StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
- if (Str == 0 || Str->isWide()) {
+ if (!Str || !Str->isAscii()) {
S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
<< "weakref" << 1;
return;
@@ -737,7 +737,7 @@ static void handleAliasAttr(Sema &S, Decl *D, const AttributeList &Attr) {
Arg = Arg->IgnoreParenCasts();
StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
- if (Str == 0 || Str->isWide()) {
+ if (!Str || !Str->isAscii()) {
S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
<< "alias" << 1;
return;
@@ -1162,7 +1162,7 @@ static void handleVisibilityAttr(Sema &S, Decl *D, const AttributeList &Attr) {
Arg = Arg->IgnoreParenCasts();
StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
- if (Str == 0 || Str->isWide()) {
+ if (!Str || !Str->isAscii()) {
S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
<< "visibility" << 1;
return;
@@ -2464,7 +2464,7 @@ static void handleCallConvAttr(Sema &S, Decl *D, const AttributeList &Attr) {
case AttributeList::AT_pcs: {
Expr *Arg = Attr.getArg(0);
StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
- if (Str == 0 || Str->isWide()) {
+ if (!Str || !Str->isAscii()) {
S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
<< "pcs" << 1;
Attr.setInvalid();
@@ -2519,7 +2519,7 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC) {
case AttributeList::AT_pcs: {
Expr *Arg = attr.getArg(0);
StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
- if (Str == 0 || Str->isWide()) {
+ if (!Str || !Str->isAscii()) {
Diag(attr.getLoc(), diag::err_attribute_argument_n_not_string)
<< "pcs" << 1;
attr.setInvalid();
@@ -2868,7 +2868,7 @@ static void handleUuidAttr(Sema &S, Decl *D, const AttributeList &Attr) {
Expr *Arg = Attr.getArg(0);
StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
- if (Str == 0 || Str->isWide()) {
+ if (!Str || !Str->isAscii()) {
S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
<< "uuid" << 1;
return;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 4a9b4bcfdf0..dedf7b0d778 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -997,11 +997,25 @@ Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) {
StringTokLocs.push_back(StringToks[i].getLocation());
QualType StrTy = Context.CharTy;
- if (Literal.AnyWide)
+ if (Literal.isWide())
StrTy = Context.getWCharType();
+ else if (Literal.isUTF16())
+ StrTy = Context.Char16Ty;
+ else if (Literal.isUTF32())
+ StrTy = Context.Char32Ty;
else if (Literal.Pascal)
StrTy = Context.UnsignedCharTy;
+ StringLiteral::StringKind Kind = StringLiteral::Ascii;
+ if (Literal.isWide())
+ Kind = StringLiteral::Wide;
+ else if (Literal.isUTF8())
+ Kind = StringLiteral::UTF8;
+ else if (Literal.isUTF16())
+ Kind = StringLiteral::UTF16;
+ else if (Literal.isUTF32())
+ Kind = StringLiteral::UTF32;
+
// A C++ string literal has a const-qualified element type (C++ 2.13.4p1).
if (getLangOptions().CPlusPlus || getLangOptions().ConstStrings)
StrTy.addConst();
@@ -1015,7 +1029,7 @@ Sema::ActOnStringLiteral(const Token *StringToks, unsigned NumStringToks) {
// Pass &StringTokLocs[0], StringTokLocs.size() to factory!
return Owned(StringLiteral::Create(Context, Literal.GetString(),
- Literal.AnyWide, Literal.Pascal, StrTy,
+ Kind, Literal.Pascal, StrTy,
&StringTokLocs[0],
StringTokLocs.size()));
}
@@ -2412,7 +2426,7 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok) {
return ExprError();
CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(),
- PP);
+ PP, Tok.getKind());
if (Literal.hadError())
return ExprError();
@@ -2421,14 +2435,25 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok) {
Ty = Context.IntTy; // 'x' and L'x' -> int in C.
else if (Literal.isWide())
Ty = Context.WCharTy; // L'x' -> wchar_t in C++.
+ else if (Literal.isUTF16())
+ Ty = Context.Char16Ty; // u'x' -> char16_t in C++0x.
+ else if (Literal.isUTF32())
+ Ty = Context.Char32Ty; // U'x' -> char32_t in C++0x.
else if (Literal.isMultiChar())
Ty = Context.IntTy; // 'wxyz' -> int in C++.
else
Ty = Context.CharTy; // 'x' -> char in C++
- return Owned(new (Context) CharacterLiteral(Literal.getValue(),
- Literal.isWide(),
- Ty, Tok.getLocation()));
+ CharacterLiteral::CharacterKind Kind = CharacterLiteral::Ascii;
+ if (Literal.isWide())
+ Kind = CharacterLiteral::Wide;
+ else if (Literal.isUTF16())
+ Kind = CharacterLiteral::UTF16;
+ else if (Literal.isUTF32())
+ Kind = CharacterLiteral::UTF32;
+
+ return Owned(new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty,
+ Tok.getLocation()));
}
ExprResult Sema::ActOnNumericConstant(const Token &Tok) {
@@ -8624,7 +8649,7 @@ static void MakeObjCStringLiteralFixItHint(Sema& SemaRef, QualType DstType,
// Strip off any parens and casts.
StringLiteral *SL = dyn_cast<StringLiteral>(SrcExpr->IgnoreParenCasts());
- if (!SL || SL->isWide())
+ if (!SL || !SL->isAscii())
return;
Hint = FixItHint::CreateInsertion(SL->getLocStart(), "@");
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 94a5bafa7c7..1812510942e 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -2041,12 +2041,20 @@ Sema::IsStringLiteralToNonConstPointerConversion(Expr *From, QualType ToType) {
= ToPtrType->getPointeeType()->getAs<BuiltinType>()) {
// This conversion is considered only when there is an
// explicit appropriate pointer target type (C++ 4.2p2).
- if (!ToPtrType->getPointeeType().hasQualifiers() &&
- ((StrLit->isWide() && ToPointeeType->isWideCharType()) ||
- (!StrLit->isWide() &&
- (ToPointeeType->getKind() == BuiltinType::Char_U ||
- ToPointeeType->getKind() == BuiltinType::Char_S))))
- return true;
+ if (!ToPtrType->getPointeeType().hasQualifiers()) {
+ switch (StrLit->getKind()) {
+ case StringLiteral::UTF8:
+ case StringLiteral::UTF16:
+ case StringLiteral::UTF32:
+ // We don't allow UTF literals to be implicitly converted
+ break;
+ case StringLiteral::Ascii:
+ return (ToPointeeType->getKind() == BuiltinType::Char_U ||
+ ToPointeeType->getKind() == BuiltinType::Char_S);
+ case StringLiteral::Wide:
+ return ToPointeeType->isWideCharType();
+ }
+ }
}
return false;
diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp
index fccea7c0e1b..e88726b6d2a 100644
--- a/clang/lib/Sema/SemaExprObjC.cpp
+++ b/clang/lib/Sema/SemaExprObjC.cpp
@@ -47,8 +47,8 @@ ExprResult Sema::ParseObjCStringLiteral(SourceLocation *AtLocs,
for (unsigned i = 0; i != NumStrings; ++i) {
S = Strings[i];
- // ObjC strings can't be wide.
- if (S->isWide()) {
+ // ObjC strings can't be wide or UTF.
+ if (!S->isAscii()) {
Diag(S->getLocStart(), diag::err_cfstring_literal_not_string_constant)
<< S->getSourceRange();
return true;
@@ -64,7 +64,7 @@ ExprResult Sema::ParseObjCStringLiteral(SourceLocation *AtLocs,
// Create the aggregate string with the appropriate content and location
// information.
S = StringLiteral::Create(Context, StrBuf,
- /*Wide=*/false, /*Pascal=*/false,
+ StringLiteral::Ascii, /*Pascal=*/false,
Context.getPointerType(Context.CharTy),
&StrLocs[0], StrLocs.size());
}
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index adf88c62ccd..c406ad98405 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -49,20 +49,30 @@ static Expr *IsStringInit(Expr *Init, const ArrayType *AT,
if (SL == 0) return 0;
QualType ElemTy = Context.getCanonicalType(AT->getElementType());
- // char array can be initialized with a narrow string.
- // Only allow char x[] = "foo"; not char x[] = L"foo";
- if (!SL->isWide())
+
+ switch (SL->getKind()) {
+ case StringLiteral::Ascii:
+ case StringLiteral::UTF8:
+ // char array can be initialized with a narrow string.
+ // Only allow char x[] = "foo"; not char x[] = L"foo";
return ElemTy->isCharType() ? Init : 0;
+ case StringLiteral::UTF16:
+ return ElemTy->isChar16Type() ? Init : 0;
+ case StringLiteral::UTF32:
+ return ElemTy->isChar32Type() ? Init : 0;
+ case StringLiteral::Wide:
+ // wchar_t array can be initialized with a wide string: C99 6.7.8p15 (with
+ // correction from DR343): "An array with element type compatible with a
+ // qualified or unqualified version of wchar_t may be initialized by a wide
+ // string literal, optionally enclosed in braces."
+ if (Context.typesAreCompatible(Context.getWCharType(),
+ ElemTy.getUnqualifiedType()))
+ return Init;
- // wchar_t array can be initialized with a wide string: C99 6.7.8p15 (with
- // correction from DR343): "An array with element type compatible with a
- // qualified or unqualified version of wchar_t may be initialized by a wide
- // string literal, optionally enclosed in braces."
- if (Context.typesAreCompatible(Context.getWCharType(),
- ElemTy.getUnqualifiedType()))
- return Init;
+ return 0;
+ }
- return 0;
+ llvm_unreachable("missed a StringLiteral kind?");
}
static Expr *IsStringInit(Expr *init, QualType declType, ASTContext &Context) {
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 0fd3f039823..56161ed9b45 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -1952,13 +1952,13 @@ StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, bool IsSimple,
SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
// The parser verifies that there is a string literal here.
- if (AsmString->isWide())
+ if (!AsmString->isAscii())
return StmtError(Diag(AsmString->getLocStart(),diag::err_asm_wide_character)
<< AsmString->getSourceRange());
for (unsigned i = 0; i != NumOutputs; i++) {
StringLiteral *Literal = Constraints[i];
- if (Literal->isWide())
+ if (!Literal->isAscii())
return StmtError(Diag(Literal->getLocStart(),diag::err_asm_wide_character)
<< Literal->getSourceRange());
@@ -1987,7 +1987,7 @@ StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, bool IsSimple,
for (unsigned i = NumOutputs, e = NumOutputs + NumInputs; i != e; i++) {
StringLiteral *Literal = Constraints[i];
- if (Literal->isWide())
+ if (!Literal->isAscii())
return StmtError(Diag(Literal->getLocStart(),diag::err_asm_wide_character)
<< Literal->getSourceRange());
@@ -2034,7 +2034,7 @@ StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, bool IsSimple,
// Check that the clobbers are valid.
for (unsigned i = 0; i != NumClobbers; i++) {
StringLiteral *Literal = Clobbers[i];
- if (Literal->isWide())
+ if (!Literal->isAscii())
return StmtError(Diag(Literal->getLocStart(),diag::err_asm_wide_character)
<< Literal->getSourceRange());
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index ceab7e93ac6..006017f5a47 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4131,10 +4131,22 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg,
assert(Arg.getKind() == TemplateArgument::Integral &&
"Operation is only valid for integral template arguments");
QualType T = Arg.getIntegralType();
- if (T->isCharType() || T->isWideCharType())
+ if (T->isAnyCharacterType()) {
+ CharacterLiteral::CharacterKind Kind;
+ if (T->isWideCharType())
+ Kind = CharacterLiteral::Wide;
+ else if (T->isChar16Type())
+ Kind = CharacterLiteral::UTF16;
+ else if (T->isChar32Type())
+ Kind = CharacterLiteral::UTF32;
+ else
+ Kind = CharacterLiteral::Ascii;
+
return Owned(new (Context) CharacterLiteral(
- Arg.getAsIntegral()->getZExtValue(),
- T->isWideCharType(), T, Loc));
+ Arg.getAsIntegral()->getZExtValue(),
+ Kind, T, Loc));
+ }
+
if (T->isBooleanType())
return Owned(new (Context) CXXBoolLiteralExpr(
Arg.getAsIntegral()->getBoolValue(),
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 3559ccecd32..7a3c589c211 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -371,7 +371,7 @@ void ASTStmtReader::VisitStringLiteral(StringLiteral *E) {
assert(Record[Idx] == E->getNumConcatenated() &&
"Wrong number of concatenated tokens!");
++Idx;
- E->IsWide = Record[Idx++];
+ E->Kind = static_cast<StringLiteral::StringKind>(Record[Idx++]);
E->IsPascal = Record[Idx++];
// Read string data
@@ -388,7 +388,7 @@ void ASTStmtReader::VisitCharacterLiteral(CharacterLiteral *E) {
VisitExpr(E);
E->setValue(Record[Idx++]);
E->setLocation(ReadSourceLocation(Record, Idx));
- E->setWide(Record[Idx++]);
+ E->setKind(static_cast<CharacterLiteral::CharacterKind>(Record[Idx++]));
}
void ASTStmtReader::VisitParenExpr(ParenExpr *E) {
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 0b5bc1fcfe2..f0636a1aa1c 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -324,7 +324,7 @@ void ASTStmtWriter::VisitStringLiteral(StringLiteral *E) {
VisitExpr(E);
Record.push_back(E->getByteLength());
Record.push_back(E->getNumConcatenated());
- Record.push_back(E->isWide());
+ Record.push_back(E->getKind());
Record.push_back(E->isPascal());
// FIXME: String data should be stored as a blob at the end of the
// StringLiteral. However, we can't do so now because we have no
@@ -340,7 +340,7 @@ void ASTStmtWriter::VisitCharacterLiteral(CharacterLiteral *E) {
VisitExpr(E);
Record.push_back(E->getValue());
Writer.AddSourceLocation(E->getLocation(), Record);
- Record.push_back(E->isWide());
+ Record.push_back(E->getKind());
AbbrevToUse = Writer.getCharacterLiteralAbbrev();
diff --git a/clang/test/CXX/lex/lex.literal/lex.ccon/p1.cpp b/clang/test/CXX/lex/lex.literal/lex.ccon/p1.cpp
index 7b65f7ee832..6df035d63f1 100644
--- a/clang/test/CXX/lex/lex.literal/lex.ccon/p1.cpp
+++ b/clang/test/CXX/lex/lex.literal/lex.ccon/p1.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++0x -fsyntax-only -verify %s
+// Runs in c++0x mode so that char16_t and char32_t are available.
// Check types of char literals
extern char a;
@@ -7,3 +8,7 @@ extern int b;
extern __typeof('asdf') b;
extern wchar_t c;
extern __typeof(L'a') c;
+extern char16_t d;
+extern __typeof(u'a') d;
+extern char32_t e;
+extern __typeof(U'a') e;
diff --git a/clang/test/CodeGen/char-literal.c b/clang/test/CodeGen/char-literal.c
index 322041c0049..014f6eb4fb0 100644
--- a/clang/test/CodeGen/char-literal.c
+++ b/clang/test/CodeGen/char-literal.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
-// Runs in c++ mode so that wchar_t is available.
+// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// Runs in c++0x mode so that wchar_t, char16_t, and char32_t are available.
int main() {
// CHECK: store i8 97
@@ -16,6 +16,20 @@ int main() {
// CHECK: store i32 98
wchar_t wb = L'ab';
+ // CHECK: store i16 97
+ char16_t ua = u'a';
+
+ // Should pick second character.
+ // CHECK: store i16 98
+ char16_t ub = u'ab';
+
+ // CHECK: store i32 97
+ char32_t Ua = U'a';
+
+ // Should pick second character.
+ // CHECK: store i32 98
+ char32_t Ub = U'ab';
+
// Should pick last character and store its lowest byte.
// This does not match gcc, which takes the last character, converts it to
// utf8, and then picks the second-lowest byte of that (they probably store
@@ -26,10 +40,36 @@ int main() {
// CHECK: store i32 61451
wchar_t wc = L'\uF00B';
+ // -4085 == 0xf00b
+ // CHECK: store i16 -4085
+ char16_t uc = u'\uF00B';
+
+ // CHECK: store i32 61451
+ char32_t Uc = U'\uF00B';
+
// CHECK: store i32 1110027
wchar_t wd = L'\U0010F00B';
+ // Should take lower word of the 4byte UNC sequence. This does not match
+ // gcc. I don't understand what gcc does (it looks like it converts to utf16,
+ // then takes the second (!) utf16 word, swaps the lower two nibbles, and
+ // stores that?).
+ // CHECK: store i16 -4085
+ char16_t ud = u'\U0010F00B'; // has utf16 encoding dbc8 dcb0
+
+ // CHECK: store i32 1110027
+ char32_t Ud = U'\U0010F00B';
+
// Should pick second character.
// CHECK: store i32 1110027
wchar_t we = L'\u1234\U0010F00B';
+
+ // Should pick second character.
+ // CHECK: store i16 -4085
+ char16_t ue = u'\u1234\U0010F00B';
+
+ // Should pick second character.
+ // CHECK: store i32 1110027
+ char32_t Ue = U'\u1234\U0010F00B';
+
}
diff --git a/clang/test/CodeGen/string-literal.c b/clang/test/CodeGen/string-literal.c
index cc6c0943d95..6d14330a0b9 100644
--- a/clang/test/CodeGen/string-literal.c
+++ b/clang/test/CodeGen/string-literal.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// Runs in c++0x mode so that wchar_t, char16_t, and char32_t are available.
int main() {
// CHECK: internal unnamed_addr constant [10 x i8] c"abc\00\00\00\00\00\00\00", align 1
@@ -9,8 +10,24 @@ int main() {
char b[10] = "\u1120\u0220\U00102030";
// CHECK: private unnamed_addr constant [12 x i8] c"A\00\00\00B\00\00\00\00\00\00\00", align 1
- void *foo = L"AB";
+ const wchar_t *foo = L"AB";
// CHECK: private unnamed_addr constant [12 x i8] c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 1
- void *bar = L"\u1234\U0010F00B";
+ const wchar_t *bar = L"\u1234\U0010F00B";
+
+ // CHECK: private unnamed_addr constant [12 x i8] c"C\00\00\00D\00\00\00\00\00\00\00", align 1
+ const char32_t *c = U"CD";
+
+ // CHECK: private unnamed_addr constant [12 x i8] c"5\12\00\00\0C\F0\10\00\00\00\00\00", align 1
+ const char32_t *d = U"\u1235\U0010F00C";
+
+ // CHECK: private unnamed_addr constant [6 x i8] c"E\00F\00\00\00", align 1
+ const char16_t *e = u"EF";
+
+ // This should convert to utf16.
+ // CHECK: private unnamed_addr constant [10 x i8] c" \11 \02\C8\DB0\DC\00\00", align 1
+ const char16_t *f = u"\u1120\u0220\U00102030";
+
+ // CHECK: private unnamed_addr constant [4 x i8] c"def\00", align 1
+ const char *g = u8"def";
}
diff --git a/clang/test/Lexer/wchar.c b/clang/test/Lexer/wchar.c
index ac82c1f73b4..648a38ef3f9 100644
--- a/clang/test/Lexer/wchar.c
+++ b/clang/test/Lexer/wchar.c
@@ -5,8 +5,8 @@ void f() {
(void)L'\U00010000'; // expected-warning {{character unicode escape sequence too long for its type}}
- (void)L'ab'; // expected-warning {{extraneous characters in wide character constant ignored}}
+ (void)L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
- (void)L'a\u1000'; // expected-warning {{extraneous characters in wide character constant ignored}}
+ (void)L'a\u1000'; // expected-warning {{extraneous characters in character constant ignored}}
}
diff --git a/clang/test/Parser/char-literal-printing.c b/clang/test/Parser/char-literal-printing.c
index 5843e5f4015..a0cafd64410 100644
--- a/clang/test/Parser/char-literal-printing.c
+++ b/clang/test/Parser/char-literal-printing.c
@@ -1,6 +1,5 @@
-// RUN: %clang_cc1 -ast-print %s
-
-#include <stddef.h>
+// RUN: %clang_cc1 -x c++ -std=c++0x -ast-print %s
+// Runs in c++0x mode so that wchar_t, char16_t, and char32_t are available.
char test1(void) { return '\\'; }
wchar_t test2(void) { return L'\\'; }
@@ -29,3 +28,35 @@ char test23(void) { return '\x3'; }
wchar_t test24(void) { return L'\x3'; }
wchar_t test25(void) { return L'\x333'; }
+
+char16_t test26(void) { return u'\\'; }
+char16_t test27(void) { return u'\''; }
+char16_t test28(void) { return u'\a'; }
+char16_t test29(void) { return u'\b'; }
+char16_t test30(void) { return u'\e'; }
+char16_t test31(void) { return u'\f'; }
+char16_t test32(void) { return u'\n'; }
+char16_t test33(void) { return u'\r'; }
+char16_t test34(void) { return u'\t'; }
+char16_t test35(void) { return u'\v'; }
+
+char16_t test36(void) { return u'c'; }
+char16_t test37(void) { return u'\x3'; }
+
+char16_t test38(void) { return u'\x333'; }
+
+char32_t test39(void) { return U'\\'; }
+char32_t test40(void) { return U'\''; }
+char32_t test41(void) { return U'\a'; }
+char32_t test42(void) { return U'\b'; }
+char32_t test43(void) { return U'\e'; }
+char32_t test44(void) { return U'\f'; }
+char32_t test45(void) { return U'\n'; }
+char32_t test46(void) { return U'\r'; }
+char32_t test47(void) { return U'\t'; }
+char32_t test48(void) { return U'\v'; }
+
+char32_t test49(void) { return U'c'; }
+char32_t test50(void) { return U'\x3'; }
+
+char32_t test51(void) { return U'\x333'; }
diff --git a/clang/test/SemaCXX/type-convert-construct.cpp b/clang/test/SemaCXX/type-convert-construct.cpp
index 479af21476b..a367633e485 100644
--- a/clang/test/SemaCXX/type-convert-construct.cpp
+++ b/clang/test/SemaCXX/type-convert-construct.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=gnu++0x -fsyntax-only -verify %s
+// Runs in c++0x mode so that char16_t and char32_t are available.
void f() {
float v1 = float(1);
@@ -14,4 +15,8 @@ void f() {
str = "a string"; // expected-warning{{conversion from string literal to 'char *' is deprecated}}
wchar_t *wstr;
wstr = L"a wide string"; // expected-warning{{conversion from string literal to 'wchar_t *' is deprecated}}
+ char16_t *ustr;
+ ustr = u"a UTF-16 string"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [16]'}}
+ char32_t *Ustr;
+ Ustr = U"a UTF-32 string"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [16]'}}
}
OpenPOWER on IntegriCloud