summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/AST/ASTContext.h1
-rw-r--r--clang/include/clang/AST/BuiltinTypes.def3
-rw-r--r--clang/include/clang/AST/Type.h1
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td10
-rw-r--r--clang/include/clang/Basic/LangOptions.def1
-rw-r--r--clang/include/clang/Basic/Specifiers.h1
-rw-r--r--clang/include/clang/Basic/TokenKinds.def4
-rw-r--r--clang/include/clang/Driver/Options.td4
-rw-r--r--clang/include/clang/Sema/DeclSpec.h1
-rw-r--r--clang/include/clang/Sema/Initialization.h6
-rw-r--r--clang/include/clang/Serialization/ASTBitCodes.h3
-rw-r--r--clang/lib/AST/ASTContext.cpp6
-rw-r--r--clang/lib/AST/ExprConstant.cpp1
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp3
-rw-r--r--clang/lib/AST/MicrosoftMangle.cpp1
-rw-r--r--clang/lib/AST/NSAPI.cpp1
-rw-r--r--clang/lib/AST/Type.cpp10
-rw-r--r--clang/lib/AST/TypeLoc.cpp2
-rw-r--r--clang/lib/Analysis/PrintfFormatString.cpp1
-rw-r--r--clang/lib/Basic/IdentifierTable.cpp16
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp1
-rw-r--r--clang/lib/CodeGen/CodeGenTypes.cpp1
-rw-r--r--clang/lib/CodeGen/ItaniumCXXABI.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp3
-rw-r--r--clang/lib/Format/FormatToken.cpp1
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp1
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp6
-rw-r--r--clang/lib/Index/USRGeneration.cpp2
-rw-r--r--clang/lib/Lex/PPExpressions.cpp2
-rw-r--r--clang/lib/Parse/ParseDecl.cpp7
-rw-r--r--clang/lib/Parse/ParseExpr.cpp1
-rw-r--r--clang/lib/Parse/ParseExprCXX.cpp3
-rw-r--r--clang/lib/Parse/ParseTentative.cpp3
-rw-r--r--clang/lib/Sema/DeclSpec.cpp6
-rw-r--r--clang/lib/Sema/SemaDecl.cpp3
-rw-r--r--clang/lib/Sema/SemaDeclCXX.cpp7
-rw-r--r--clang/lib/Sema/SemaExpr.cpp4
-rw-r--r--clang/lib/Sema/SemaInit.cpp48
-rw-r--r--clang/lib/Sema/SemaOverload.cpp2
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp4
-rw-r--r--clang/lib/Sema/SemaTemplateVariadic.cpp1
-rw-r--r--clang/lib/Sema/SemaType.cpp5
-rw-r--r--clang/lib/Serialization/ASTCommon.cpp3
-rw-r--r--clang/lib/Serialization/ASTReader.cpp3
-rw-r--r--clang/test/CodeGenCXX/char8_t.cpp8
-rw-r--r--clang/test/Lexer/char8_t.cpp17
-rw-r--r--clang/test/Lexer/cxx-features.cpp7
-rw-r--r--clang/test/SemaCXX/char8_t.cpp44
48 files changed, 254 insertions, 19 deletions
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 40561b39d11..af39a1f3573 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -999,6 +999,7 @@ public:
CanQualType WCharTy; // [C++ 3.9.1p5].
CanQualType WideCharTy; // Same as WCharTy in C++, integer type in C99.
CanQualType WIntTy; // [C99 7.24.1], integer type unchanged by default promotions.
+ CanQualType Char8Ty; // [C++20 proposal]
CanQualType Char16Ty; // [C++0x 3.9.1p5], integer type in C99.
CanQualType Char32Ty; // [C++0x 3.9.1p5], integer type in C99.
CanQualType SignedCharTy, ShortTy, IntTy, LongTy, LongLongTy, Int128Ty;
diff --git a/clang/include/clang/AST/BuiltinTypes.def b/clang/include/clang/AST/BuiltinTypes.def
index e4f5f7db2f7..4d4ed792208 100644
--- a/clang/include/clang/AST/BuiltinTypes.def
+++ b/clang/include/clang/AST/BuiltinTypes.def
@@ -72,6 +72,9 @@ UNSIGNED_TYPE(UChar, UnsignedCharTy)
// 'wchar_t' for targets where it's unsigned
SHARED_SINGLETON_TYPE(UNSIGNED_TYPE(WChar_U, WCharTy))
+// 'char8_t' in C++20 (proposed)
+UNSIGNED_TYPE(Char8, Char8Ty)
+
// 'char16_t' in C++
UNSIGNED_TYPE(Char16, Char16Ty)
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index ab6e113f2bd..95e4cad1558 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1777,6 +1777,7 @@ public:
bool isBooleanType() const;
bool isCharType() const;
bool isWideCharType() const;
+ bool isChar8Type() const;
bool isChar16Type() const;
bool isChar32Type() const;
bool isAnyCharacterType() const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index cb3f606cc14..4410c324695 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2421,6 +2421,9 @@ def err_template_different_associated_constraints : Error<
def warn_cxx98_compat_unicode_type : Warning<
"'%0' type specifier is incompatible with C++98">,
InGroup<CXX98Compat>, DefaultIgnore;
+def warn_cxx17_compat_unicode_type : Warning<
+ "'char8_t' type specifier is incompatible with C++ standards before C++20">,
+ InGroup<CXXPre2aCompat>, DefaultIgnore;
// __make_integer_seq
def err_integer_sequence_negative_length : Error<
@@ -5822,6 +5825,13 @@ def err_array_init_wide_string_into_char : Error<
"initializing char array with wide string literal">;
def err_array_init_incompat_wide_string_into_wchar : Error<
"initializing wide char array with incompatible wide string literal">;
+def err_array_init_plain_string_into_char8_t : Error<
+ "initializing 'char8_t' array with plain string literal">;
+def note_array_init_plain_string_into_char8_t : Note<
+ "add 'u8' prefix to form a 'char8_t' string literal">;
+def err_array_init_utf8_string_into_char : Error<
+ "initialization of char array with UTF-8 string literal is not permitted "
+ "by '-fchar8_t'">;
def err_array_init_different_type : Error<
"cannot initialize array %diff{of type $ with array of type $|"
"with different type of array}0,1">;
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 87173c8d547..f868110719e 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -106,6 +106,7 @@ LANGOPT(LineComment , 1, 0, "'//' comments")
LANGOPT(Bool , 1, 0, "bool, true, and false keywords")
LANGOPT(Half , 1, 0, "half keyword")
LANGOPT(WChar , 1, CPlusPlus, "wchar_t keyword")
+LANGOPT(Char8 , 1, 0, "char8_t keyword")
LANGOPT(DeclSpecKeyword , 1, 0, "__declspec keyword")
BENIGN_LANGOPT(DollarIdents , 1, 1, "'$' in identifiers")
BENIGN_LANGOPT(AsmPreprocessor, 1, 0, "preprocessor in asm mode")
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 377534baab0..d17e5a8145a 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -47,6 +47,7 @@ namespace clang {
TST_void,
TST_char,
TST_wchar, // C++ wchar_t
+ TST_char8, // C++20 char8_t (proposed)
TST_char16, // C++11 char16_t
TST_char32, // C++11 char32_t
TST_int,
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index f18e06f5b7d..6bf945b7856 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -260,6 +260,7 @@ PUNCTUATOR(caretcaret, "^^")
// BOOLSUPPORT - This is a keyword if 'bool' is a built-in type
// HALFSUPPORT - This is a keyword if 'half' is a built-in type
// WCHARSUPPORT - This is a keyword if 'wchar_t' is a built-in type
+// CHAR8SUPPORT - This is a keyword if 'char8_t' is a built-in type
//
KEYWORD(auto , KEYALL)
KEYWORD(break , KEYALL)
@@ -380,6 +381,9 @@ KEYWORD(co_yield , KEYCOROUTINES)
MODULES_KEYWORD(module)
MODULES_KEYWORD(import)
+// C++ char8_t proposal
+KEYWORD(char8_t , CHAR8SUPPORT)
+
// C11 Extension
KEYWORD(_Float16 , KEYALL)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 80facb79a48..944a51ce161 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1525,6 +1525,10 @@ def frtti : Flag<["-"], "frtti">, Group<f_Group>;
def : Flag<["-"], "fsched-interblock">, Group<clang_ignored_f_Group>;
def fshort_enums : Flag<["-"], "fshort-enums">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Allocate to an enum type only as many bytes as it needs for the declared range of possible values">;
+def fchar8__t : Flag<["-"], "fchar8_t">, Group<f_Group>, Flags<[CC1Option]>,
+ HelpText<"Enable C++ builtin type char8_t">;
+def fno_char8__t : Flag<["-"], "fno-char8_t">, Group<f_Group>,
+ HelpText<"Disable C++ builtin type char8_t">;
def fshort_wchar : Flag<["-"], "fshort-wchar">, Group<f_Group>,
HelpText<"Force wchar_t to be a short unsigned int">;
def fno_short_wchar : Flag<["-"], "fno-short-wchar">, Group<f_Group>,
diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h
index e9b116fb71e..03fd90c32bc 100644
--- a/clang/include/clang/Sema/DeclSpec.h
+++ b/clang/include/clang/Sema/DeclSpec.h
@@ -273,6 +273,7 @@ public:
static const TST TST_void = clang::TST_void;
static const TST TST_char = clang::TST_char;
static const TST TST_wchar = clang::TST_wchar;
+ static const TST TST_char8 = clang::TST_char8;
static const TST TST_char16 = clang::TST_char16;
static const TST TST_char32 = clang::TST_char32;
static const TST TST_int = clang::TST_int;
diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h
index d11c1ee2a0f..c0f38420d2b 100644
--- a/clang/include/clang/Sema/Initialization.h
+++ b/clang/include/clang/Sema/Initialization.h
@@ -952,6 +952,12 @@ public:
/// literal.
FK_IncompatWideStringIntoWideChar,
+ /// \brief Initializing char8_t array with plain string literal.
+ FK_PlainStringIntoUTF8Char,
+
+ /// \brief Initializing char array with UTF-8 string literal.
+ FK_UTF8StringIntoPlainChar,
+
/// \brief Array type mismatch.
FK_ArrayTypeMismatch,
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 1f4e0347bc4..4dbcd573bbb 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -936,6 +936,9 @@ namespace serialization {
/// \brief The '_Float16' type
PREDEF_TYPE_FLOAT16_ID = 44,
+ /// \brief The C++ 'char8_t' type.
+ PREDEF_TYPE_CHAR8_ID = 45,
+
/// \brief OpenCL image types with auto numeration
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
PREDEF_TYPE_##Id##_ID,
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index d61ca589d5b..d51071440e7 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1151,6 +1151,9 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
WIntTy = getFromTargetType(Target.getWIntType());
+ // C++20 (proposed)
+ InitBuiltinType(Char8Ty, BuiltinType::Char8);
+
if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
InitBuiltinType(Char16Ty, BuiltinType::Char16);
else // C99
@@ -1739,6 +1742,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
case BuiltinType::Char_U:
case BuiltinType::UChar:
case BuiltinType::SChar:
+ case BuiltinType::Char8:
Width = Target->getCharWidth();
Align = Target->getCharAlign();
break;
@@ -5456,6 +5460,7 @@ QualType ASTContext::getPromotedIntegerType(QualType Promotable) const {
// FIXME: Is there some better way to compute this?
if (BT->getKind() == BuiltinType::WChar_S ||
BT->getKind() == BuiltinType::WChar_U ||
+ BT->getKind() == BuiltinType::Char8 ||
BT->getKind() == BuiltinType::Char16 ||
BT->getKind() == BuiltinType::Char32) {
bool FromIsSigned = BT->getKind() == BuiltinType::WChar_S;
@@ -6202,6 +6207,7 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C,
switch (kind) {
case BuiltinType::Void: return 'v';
case BuiltinType::Bool: return 'B';
+ case BuiltinType::Char8:
case BuiltinType::Char_U:
case BuiltinType::UChar: return 'C';
case BuiltinType::Char16:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 81577248612..c540dfbbf07 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -7326,6 +7326,7 @@ static int EvaluateBuiltinClassifyType(const CallExpr *E,
return pointer_type_class;
case BuiltinType::WChar_U:
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::ObjCId:
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 609d0ebc849..610400d8322 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -2525,6 +2525,9 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
case BuiltinType::WChar_U:
Out << 'w';
break;
+ case BuiltinType::Char8:
+ Out << "Du";
+ break;
case BuiltinType::Char16:
Out << "Ds";
break;
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 1039ae87ab6..e72804b2c3b 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1918,6 +1918,7 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
Out << "$$T";
break;
+ case BuiltinType::Char8:
case BuiltinType::Float16:
mangleArtificalTagType(TTK_Struct, "_Float16", {"__clang"});
break;
diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp
index 8adaef1fb64..eb807f0cc53 100644
--- a/clang/lib/AST/NSAPI.cpp
+++ b/clang/lib/AST/NSAPI.cpp
@@ -436,6 +436,7 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const {
case BuiltinType::Void:
case BuiltinType::WChar_U:
case BuiltinType::WChar_S:
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::Int128:
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index a2a60772b77..571dc2abfa0 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -1763,6 +1763,12 @@ bool Type::isWideCharType() const {
return false;
}
+bool Type::isChar8Type() const {
+ if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
+ return BT->getKind() == BuiltinType::Char8;
+ return false;
+}
+
bool Type::isChar16Type() const {
if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType))
return BT->getKind() == BuiltinType::Char16;
@@ -1785,6 +1791,7 @@ bool Type::isAnyCharacterType() const {
case BuiltinType::Char_U:
case BuiltinType::UChar:
case BuiltinType::WChar_U:
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::Char_S:
@@ -2419,6 +2426,7 @@ bool Type::isPromotableIntegerType() const {
case BuiltinType::UShort:
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
return true;
@@ -2655,6 +2663,8 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
case WChar_S:
case WChar_U:
return Policy.MSWChar ? "__wchar_t" : "wchar_t";
+ case Char8:
+ return "char8_t";
case Char16:
return "char16_t";
case Char32:
diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp
index 0ac50b31ace..57349b46f54 100644
--- a/clang/lib/AST/TypeLoc.cpp
+++ b/clang/lib/AST/TypeLoc.cpp
@@ -317,6 +317,8 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const {
case BuiltinType::Char_U:
case BuiltinType::Char_S:
return TST_char;
+ case BuiltinType::Char8:
+ return TST_char8;
case BuiltinType::Char16:
return TST_char16;
case BuiltinType::Char32:
diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp
index dfaed26564e..2043970ccd4 100644
--- a/clang/lib/Analysis/PrintfFormatString.cpp
+++ b/clang/lib/Analysis/PrintfFormatString.cpp
@@ -647,6 +647,7 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
case BuiltinType::Bool:
case BuiltinType::WChar_U:
case BuiltinType::WChar_S:
+ case BuiltinType::Char8: // FIXME: Treat like 'char'?
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::UInt128:
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index 6b0133208ba..025104c63be 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -115,14 +115,15 @@ namespace {
KEYNOOPENCL = 0x02000,
WCHARSUPPORT = 0x04000,
HALFSUPPORT = 0x08000,
- KEYCONCEPTS = 0x10000,
- KEYOBJC2 = 0x20000,
- KEYZVECTOR = 0x40000,
- KEYCOROUTINES = 0x80000,
- KEYMODULES = 0x100000,
- KEYCXX2A = 0x200000,
+ CHAR8SUPPORT = 0x10000,
+ KEYCONCEPTS = 0x20000,
+ KEYOBJC2 = 0x40000,
+ KEYZVECTOR = 0x80000,
+ KEYCOROUTINES = 0x100000,
+ KEYMODULES = 0x200000,
+ KEYCXX2A = 0x400000,
KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX2A,
- KEYALL = (0x3fffff & ~KEYNOMS18 &
+ KEYALL = (0x7fffff & ~KEYNOMS18 &
~KEYNOOPENCL) // KEYNOMS18 and KEYNOOPENCL are used to exclude.
};
@@ -151,6 +152,7 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
if (LangOpts.Bool && (Flags & BOOLSUPPORT)) return KS_Enabled;
if (LangOpts.Half && (Flags & HALFSUPPORT)) return KS_Enabled;
if (LangOpts.WChar && (Flags & WCHARSUPPORT)) return KS_Enabled;
+ if (LangOpts.Char8 && (Flags & CHAR8SUPPORT)) return KS_Enabled;
if (LangOpts.AltiVec && (Flags & KEYALTIVEC)) return KS_Enabled;
if (LangOpts.OpenCL && (Flags & KEYOPENCL)) return KS_Enabled;
if (!LangOpts.CPlusPlus && (Flags & KEYNOCXX)) return KS_Enabled;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 474018c065e..12209723999 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -665,6 +665,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
case BuiltinType::SChar:
Encoding = llvm::dwarf::DW_ATE_signed_char;
break;
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
Encoding = llvm::dwarf::DW_ATE_UTF;
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index ccb6df9a580..ce1fdf9b125 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -437,6 +437,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::ULongLong:
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
ResultType = llvm::IntegerType::get(getLLVMContext(),
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index f92d7ec0632..0e35633e1d6 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2706,6 +2706,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::LongDouble:
case BuiltinType::Float16:
case BuiltinType::Float128:
+ case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
case BuiltinType::Int128:
@@ -3567,7 +3568,8 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) {
getContext().UnsignedInt128Ty, getContext().HalfTy,
getContext().FloatTy, getContext().DoubleTy,
getContext().LongDoubleTy, getContext().Float128Ty,
- getContext().Char16Ty, getContext().Char32Ty
+ getContext().Char8Ty, getContext().Char16Ty,
+ getContext().Char32Ty
};
for (const QualType &FundamentalType : FundamentalTypes)
EmitFundamentalRTTIDescriptor(FundamentalType, DLLExport);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 331b4beaa6b..5e5dfdeecae 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2687,6 +2687,9 @@ static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T,
CmdArgs.push_back("-fno-signed-char");
}
+ if (Args.hasFlag(options::OPT_fchar8__t, options::OPT_fno_char8__t, false))
+ CmdArgs.push_back("-fchar8_t");
+
if (const Arg *A = Args.getLastArg(options::OPT_fshort_wchar,
options::OPT_fno_short_wchar)) {
if (A->getOption().matches(options::OPT_fshort_wchar)) {
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index 10ac392abbf..c63f0129c48 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -57,6 +57,7 @@ bool FormatToken::isSimpleTypeSpecifier() const {
case tok::kw_bool:
case tok::kw___underlying_type:
case tok::annot_typename:
+ case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_typeof:
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 44f29ba3b10..a6dc1678d1d 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2362,6 +2362,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
Opts.ImplicitModules = !Args.hasArg(OPT_fno_implicit_modules);
Opts.CharIsSigned = Opts.OpenCL || !Args.hasArg(OPT_fno_signed_char);
Opts.WChar = Opts.CPlusPlus && !Args.hasArg(OPT_fno_wchar);
+ Opts.Char8 = Args.hasArg(OPT_fchar8__t);
if (const Arg *A = Args.getLastArg(OPT_fwchar_type_EQ)) {
Opts.WCharSize = llvm::StringSwitch<unsigned>(A->getValue())
.Case("char", 1)
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 192862db657..8a87b9f35e3 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -559,6 +559,10 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
Builder.defineMacro("__cpp_experimental_concepts", "1");
if (LangOpts.CoroutinesTS)
Builder.defineMacro("__cpp_coroutines", "201703L");
+
+ // Potential future breaking changes.
+ if (LangOpts.Char8)
+ Builder.defineMacro("__cpp_char8_t", "201803");
}
static void InitializePredefinedMacros(const TargetInfo &TI,
@@ -939,6 +943,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
InlineWidthBits));
DEFINE_LOCK_FREE_MACRO(BOOL, Bool);
DEFINE_LOCK_FREE_MACRO(CHAR, Char);
+ if (LangOpts.Char8)
+ DEFINE_LOCK_FREE_MACRO(CHAR8_T, Char); // Treat char8_t like char.
DEFINE_LOCK_FREE_MACRO(CHAR16_T, Char16);
DEFINE_LOCK_FREE_MACRO(CHAR32_T, Char32);
DEFINE_LOCK_FREE_MACRO(WCHAR_T, WChar);
diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp
index ee1c950eb69..ba536c748c3 100644
--- a/clang/lib/Index/USRGeneration.cpp
+++ b/clang/lib/Index/USRGeneration.cpp
@@ -650,6 +650,8 @@ void USRGenerator::VisitType(QualType T) {
c = 'b'; break;
case BuiltinType::UChar:
c = 'c'; break;
+ case BuiltinType::Char8:
+ c = 'u'; break; // FIXME: Check this doesn't collide
case BuiltinType::Char16:
c = 'q'; break;
case BuiltinType::Char32:
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index d8431827e9c..b1ed0e10c6f 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -363,7 +363,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
NumBits = TI.getChar16Width();
else if (Literal.isUTF32())
NumBits = TI.getChar32Width();
- else
+ else // char or char8_t
NumBits = TI.getCharWidth();
// Set the width.
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index fc0ca613c73..e0948b37170 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -3587,6 +3587,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
isInvalid = DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec,
DiagID, Policy);
break;
+ case tok::kw_char8_t:
+ isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char8, Loc, PrevSpec,
+ DiagID, Policy);
+ break;
case tok::kw_char16_t:
isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec,
DiagID, Policy);
@@ -4585,6 +4589,7 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const {
case tok::kw_void:
case tok::kw_char:
case tok::kw_wchar_t:
+ case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_int:
@@ -4661,6 +4666,7 @@ bool Parser::isTypeSpecifierQualifier() {
case tok::kw_void:
case tok::kw_char:
case tok::kw_wchar_t:
+ case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_int:
@@ -4817,6 +4823,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
case tok::kw_void:
case tok::kw_char:
case tok::kw_wchar_t:
+ case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index ca5d3bce9cf..1cc98568ded 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1224,6 +1224,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
case tok::annot_decltype:
case tok::kw_char:
case tok::kw_wchar_t:
+ case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_bool:
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 8e39adf8a5d..276dea1add1 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -1962,6 +1962,9 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
case tok::kw_wchar_t:
DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec, DiagID, Policy);
break;
+ case tok::kw_char8_t:
+ DS.SetTypeSpecType(DeclSpec::TST_char8, Loc, PrevSpec, DiagID, Policy);
+ break;
case tok::kw_char16_t:
DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec, DiagID, Policy);
break;
diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp
index ebd6f0f5b8e..17ff9f9be6d 100644
--- a/clang/lib/Parse/ParseTentative.cpp
+++ b/clang/lib/Parse/ParseTentative.cpp
@@ -1052,6 +1052,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
case tok::kw_class:
case tok::kw_typename:
case tok::kw_wchar_t:
+ case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw__Decimal32:
@@ -1523,6 +1524,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
case tok::kw_char:
case tok::kw_wchar_t:
+ case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_bool:
@@ -1614,6 +1616,7 @@ bool Parser::isCXXDeclarationSpecifierAType() {
// simple-type-specifier
case tok::kw_char:
case tok::kw_wchar_t:
+ case tok::kw_char8_t:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_bool:
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index 2fad5a18ba6..26d62021380 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -329,6 +329,7 @@ bool Declarator::isDeclarationOfFunction() const {
case TST_auto_type:
case TST_bool:
case TST_char:
+ case TST_char8:
case TST_char16:
case TST_char32:
case TST_class:
@@ -499,6 +500,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T,
case DeclSpec::TST_void: return "void";
case DeclSpec::TST_char: return "char";
case DeclSpec::TST_wchar: return Policy.MSWChar ? "__wchar_t" : "wchar_t";
+ case DeclSpec::TST_char8: return "char8_t";
case DeclSpec::TST_char16: return "char16_t";
case DeclSpec::TST_char32: return "char32_t";
case DeclSpec::TST_int: return "int";
@@ -1202,7 +1204,9 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
StorageClassSpec == SCS_auto)
S.Diag(StorageClassSpecLoc, diag::warn_auto_storage_class)
<< FixItHint::CreateRemoval(StorageClassSpecLoc);
- if (TypeSpecType == TST_char16 || TypeSpecType == TST_char32)
+ if (TypeSpecType == TST_char8)
+ S.Diag(TSTLoc, diag::warn_cxx17_compat_unicode_type);
+ else if (TypeSpecType == TST_char16 || TypeSpecType == TST_char32)
S.Diag(TSTLoc, diag::warn_cxx98_compat_unicode_type)
<< (TypeSpecType == TST_char16 ? "char16_t" : "char32_t");
if (Constexpr_specified)
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 58676101d82..732853de48c 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -148,6 +148,9 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
case tok::kw_decltype:
return getLangOpts().CPlusPlus;
+ case tok::kw_char8_t:
+ return getLangOpts().Char8;
+
default:
break;
}
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 2cf16221f75..4339c9a8de1 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13187,6 +13187,7 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) {
ParamType->isSpecificBuiltinType(BuiltinType::LongDouble) ||
Context.hasSameType(ParamType, Context.CharTy) ||
Context.hasSameType(ParamType, Context.WideCharTy) ||
+ Context.hasSameType(ParamType, Context.Char8Ty) ||
Context.hasSameType(ParamType, Context.Char16Ty) ||
Context.hasSameType(ParamType, Context.Char32Ty)) {
} else if (const PointerType *Ptr = ParamType->getAs<PointerType>()) {
@@ -13247,10 +13248,12 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) {
}
QualType InnerType = PointeeType.getUnqualifiedType();
- // Only const char *, const wchar_t*, const char16_t*, and const char32_t*
- // are allowed as the first parameter to a two-parameter function
+ // Only const char *, const wchar_t*, const char8_t*, const char16_t*, and
+ // const char32_t* are allowed as the first parameter to a two-parameter
+ // function
if (!(Context.hasSameType(InnerType, Context.CharTy) ||
Context.hasSameType(InnerType, Context.WideCharTy) ||
+ Context.hasSameType(InnerType, Context.Char8Ty) ||
Context.hasSameType(InnerType, Context.Char16Ty) ||
Context.hasSameType(InnerType, Context.Char32Ty))) {
Diag((*Param)->getSourceRange().getBegin(),
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index b1ecbfad4c3..58e70a4ceaa 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1535,6 +1535,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
CharTy = Context.getWideCharType();
Kind = StringLiteral::Wide;
} else if (Literal.isUTF8()) {
+ if (getLangOpts().Char8)
+ CharTy = Context.Char8Ty;
Kind = StringLiteral::UTF8;
} else if (Literal.isUTF16()) {
CharTy = Context.Char16Ty;
@@ -3094,6 +3096,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
QualType Ty;
if (Literal.isWide())
Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++.
+ else if (Literal.isUTF8() && getLangOpts().Char8)
+ Ty = Context.Char8Ty; // u8'x' -> char8_t when it exists.
else if (Literal.isUTF16())
Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11.
else if (Literal.isUTF32())
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index e44eaa5162a..be33326cd42 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -49,6 +49,8 @@ enum StringInitFailureKind {
SIF_NarrowStringIntoWideChar,
SIF_WideStringIntoChar,
SIF_IncompatWideStringIntoWideChar,
+ SIF_UTF8StringIntoPlainChar,
+ SIF_PlainStringIntoUTF8Char,
SIF_Other
};
@@ -77,12 +79,21 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT,
Context.getCanonicalType(AT->getElementType()).getUnqualifiedType();
switch (SL->getKind()) {
- case StringLiteral::Ascii:
case StringLiteral::UTF8:
+ // char8_t array can be initialized with a UTF-8 string.
+ if (ElemTy->isChar8Type())
+ return SIF_None;
+ LLVM_FALLTHROUGH;
+ case StringLiteral::Ascii:
// char array can be initialized with a narrow string.
// Only allow char x[] = "foo"; not char x[] = L"foo";
if (ElemTy->isCharType())
- return SIF_None;
+ return (SL->getKind() == StringLiteral::UTF8 &&
+ Context.getLangOpts().Char8)
+ ? SIF_UTF8StringIntoPlainChar
+ : SIF_None;
+ if (ElemTy->isChar8Type())
+ return SIF_PlainStringIntoUTF8Char;
if (IsWideCharCompatible(ElemTy, Context))
return SIF_NarrowStringIntoWideChar;
return SIF_Other;
@@ -94,7 +105,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT,
case StringLiteral::UTF16:
if (Context.typesAreCompatible(Context.Char16Ty, ElemTy))
return SIF_None;
- if (ElemTy->isCharType())
+ if (ElemTy->isCharType() || ElemTy->isChar8Type())
return SIF_WideStringIntoChar;
if (IsWideCharCompatible(ElemTy, Context))
return SIF_IncompatWideStringIntoWideChar;
@@ -102,7 +113,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT,
case StringLiteral::UTF32:
if (Context.typesAreCompatible(Context.Char32Ty, ElemTy))
return SIF_None;
- if (ElemTy->isCharType())
+ if (ElemTy->isCharType() || ElemTy->isChar8Type())
return SIF_WideStringIntoChar;
if (IsWideCharCompatible(ElemTy, Context))
return SIF_IncompatWideStringIntoWideChar;
@@ -110,7 +121,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT,
case StringLiteral::Wide:
if (Context.typesAreCompatible(Context.getWideCharType(), ElemTy))
return SIF_None;
- if (ElemTy->isCharType())
+ if (ElemTy->isCharType() || ElemTy->isChar8Type())
return SIF_WideStringIntoChar;
if (IsWideCharCompatible(ElemTy, Context))
return SIF_IncompatWideStringIntoWideChar;
@@ -3185,6 +3196,8 @@ bool InitializationSequence::isAmbiguous() const {
case FK_NarrowStringIntoWideCharArray:
case FK_WideStringIntoCharArray:
case FK_IncompatWideStringIntoWideChar:
+ case FK_PlainStringIntoUTF8Char:
+ case FK_UTF8StringIntoPlainChar:
case FK_AddressOfOverloadFailed: // FIXME: Could do better
case FK_NonConstLValueReferenceBindingToTemporary:
case FK_NonConstLValueReferenceBindingToBitfield:
@@ -5362,6 +5375,12 @@ void InitializationSequence::InitializeFrom(Sema &S,
case SIF_IncompatWideStringIntoWideChar:
SetFailed(FK_IncompatWideStringIntoWideChar);
return;
+ case SIF_PlainStringIntoUTF8Char:
+ SetFailed(FK_PlainStringIntoUTF8Char);
+ return;
+ case SIF_UTF8StringIntoPlainChar:
+ SetFailed(FK_UTF8StringIntoPlainChar);
+ return;
case SIF_Other:
break;
}
@@ -7591,6 +7610,17 @@ bool InitializationSequence::Diagnose(Sema &S,
S.Diag(Kind.getLocation(),
diag::err_array_init_incompat_wide_string_into_wchar);
break;
+ case FK_PlainStringIntoUTF8Char:
+ S.Diag(Kind.getLocation(),
+ diag::err_array_init_plain_string_into_char8_t);
+ S.Diag(Args.front()->getLocStart(),
+ diag::note_array_init_plain_string_into_char8_t)
+ << FixItHint::CreateInsertion(Args.front()->getLocStart(), "u8");
+ break;
+ case FK_UTF8StringIntoPlainChar:
+ S.Diag(Kind.getLocation(),
+ diag::err_array_init_utf8_string_into_char);
+ break;
case FK_ArrayTypeMismatch:
case FK_NonConstantArrayInit:
S.Diag(Kind.getLocation(),
@@ -8000,6 +8030,14 @@ void InitializationSequence::dump(raw_ostream &OS) const {
OS << "incompatible wide string into wide char array";
break;
+ case FK_PlainStringIntoUTF8Char:
+ OS << "plain string literal into char8_t array";
+ break;
+
+ case FK_UTF8StringIntoPlainChar:
+ OS << "u8 string literal into char array";
+ break;
+
case FK_ArrayTypeMismatch:
OS << "array type mismatch";
break;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 64cd52c1f65..4b5898883b3 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -7707,6 +7707,8 @@ class BuiltinOperatorOverloadBuilder {
ArithmeticTypes.push_back(S.Context.BoolTy);
ArithmeticTypes.push_back(S.Context.CharTy);
ArithmeticTypes.push_back(S.Context.WCharTy);
+ if (S.Context.getLangOpts().Char8)
+ ArithmeticTypes.push_back(S.Context.Char8Ty);
ArithmeticTypes.push_back(S.Context.Char16Ty);
ArithmeticTypes.push_back(S.Context.Char32Ty);
ArithmeticTypes.push_back(S.Context.SignedCharTy);
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index d98176ad2e5..04fdfaea75f 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -6771,11 +6771,11 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg,
Expr *E;
if (T->isAnyCharacterType()) {
- // This does not need to handle u8 character literals because those are
- // of type char, and so can also be covered by an ASCII character literal.
CharacterLiteral::CharacterKind Kind;
if (T->isWideCharType())
Kind = CharacterLiteral::Wide;
+ else if (T->isChar8Type() && getLangOpts().Char8)
+ Kind = CharacterLiteral::UTF8;
else if (T->isChar16Type())
Kind = CharacterLiteral::UTF16;
else if (T->isChar32Type())
diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp
index 7aa0e317df0..37a9c260034 100644
--- a/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -822,6 +822,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
case TST_void:
case TST_char:
case TST_wchar:
+ case TST_char8:
case TST_char16:
case TST_char32:
case TST_int:
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 0e71047f00e..a6491e56acd 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -1277,6 +1277,11 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
Result = Context.getUnsignedWCharType();
}
break;
+ case DeclSpec::TST_char8:
+ assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified &&
+ "Unknown TSS value");
+ Result = Context.Char8Ty;
+ break;
case DeclSpec::TST_char16:
assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified &&
"Unknown TSS value");
diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp
index 535aacb8c49..54cea92042a 100644
--- a/clang/lib/Serialization/ASTCommon.cpp
+++ b/clang/lib/Serialization/ASTCommon.cpp
@@ -100,6 +100,9 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) {
case BuiltinType::NullPtr:
ID = PREDEF_TYPE_NULLPTR_ID;
break;
+ case BuiltinType::Char8:
+ ID = PREDEF_TYPE_CHAR8_ID;
+ break;
case BuiltinType::Char16:
ID = PREDEF_TYPE_CHAR16_ID;
break;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index a1de22b68ec..4b0220abc30 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -6841,6 +6841,9 @@ QualType ASTReader::GetType(TypeID ID) {
case PREDEF_TYPE_NULLPTR_ID:
T = Context.NullPtrTy;
break;
+ case PREDEF_TYPE_CHAR8_ID:
+ T = Context.Char8Ty;
+ break;
case PREDEF_TYPE_CHAR16_ID:
T = Context.Char16Ty;
break;
diff --git a/clang/test/CodeGenCXX/char8_t.cpp b/clang/test/CodeGenCXX/char8_t.cpp
new file mode 100644
index 00000000000..e4dba584a6e
--- /dev/null
+++ b/clang/test/CodeGenCXX/char8_t.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++17 -emit-llvm -fchar8_t -triple x86_64-linux %s -o - | FileCheck %s
+
+// CHECK: define void @_Z1fDu(
+void f(char8_t c) {}
+
+// CHECK: define void @_Z1gIiEvDTplplcvT__ELA4_KDuELDu114EE
+template<typename T> void g(decltype(T() + u8"foo" + u8'r')) {}
+template void g<int>(const char8_t*);
diff --git a/clang/test/Lexer/char8_t.cpp b/clang/test/Lexer/char8_t.cpp
new file mode 100644
index 00000000000..20f820e2401
--- /dev/null
+++ b/clang/test/Lexer/char8_t.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -std=c++2a -verify %s
+// RUN: %clang_cc1 -std=c++2a -verify %s -fchar8_t
+
+#if defined(__cpp_char8_t) && __is_identifier(char8_t)
+#error char8_t is an identifier under -fchar8_t
+#endif
+
+#if !defined(__cpp_char8_t) && !__is_identifier(char8_t)
+#error char8_t is a keyword under -fno-char8_t
+#endif
+
+char8_t c8t;
+#ifndef __cpp_char8_t
+// expected-error@-2 {{unknown type}}
+#else
+// expected-no-diagnostics
+#endif
diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp
index a7d12e2e14f..352f08e73b3 100644
--- a/clang/test/Lexer/cxx-features.cpp
+++ b/clang/test/Lexer/cxx-features.cpp
@@ -6,6 +6,7 @@
// RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -fsized-deallocation -fconcepts-ts -DCONCEPTS_TS=1 -verify %s
// RUN: %clang_cc1 -fno-rtti -fno-threadsafe-statics -verify %s -DNO_EXCEPTIONS -DNO_RTTI -DNO_THREADSAFE_STATICS -fsized-deallocation
// RUN: %clang_cc1 -fcoroutines-ts -DNO_EXCEPTIONS -DCOROUTINES -verify -fsized-deallocation %s
+// RUN: %clang_cc1 -fchar8_t -DNO_EXCEPTIONS -DCHAR8_T -verify -fsized-deallocation %s
// expected-no-diagnostics
@@ -242,3 +243,9 @@
#if defined(COROUTINES) ? check(coroutines, 201703L, 201703L, 201703L, 201703L) : check(coroutines, 0, 0, 0, 0)
#error "wrong value for __cpp_coroutines"
#endif
+
+// --- not-yet-standard features --
+
+#if defined(CHAR8_T) ? check(char8_t, 201803, 201803, 201803, 201803) : check(char8_t, 0, 0, 0, 0)
+#error "wrong value for __cpp_char8_t"
+#endif
diff --git a/clang/test/SemaCXX/char8_t.cpp b/clang/test/SemaCXX/char8_t.cpp
new file mode 100644
index 00000000000..5eb3d7062d2
--- /dev/null
+++ b/clang/test/SemaCXX/char8_t.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -fchar8_t -std=c++2a -verify %s
+
+char8_t a = u8'a';
+char8_t b[] = u8"foo";
+char8_t c = 'a';
+char8_t d[] = "foo"; // expected-error {{initializing 'char8_t' array with plain string literal}} expected-note {{add 'u8' prefix}}
+
+char e = u8'a';
+char f[] = u8"foo"; // expected-error {{initialization of char array with UTF-8 string literal is not permitted by '-fchar8_t'}}
+char g = 'a';
+char h[] = "foo";
+
+void disambig() {
+ char8_t (a) = u8'x';
+}
+
+void operator""_a(char);
+void operator""_a(const char*, decltype(sizeof(0)));
+
+void test_udl1() {
+ int &x = u8'a'_a; // expected-error {{no matching literal operator}}
+ float &y = u8"a"_a; // expected-error {{no matching literal operator}}
+}
+
+int &operator""_a(char8_t);
+float &operator""_a(const char8_t*, decltype(sizeof(0)));
+
+void test_udl2() {
+ int &x = u8'a'_a;
+ float &y = u8"a"_a;
+}
+
+template<typename E, typename T> void check(T &&t) {
+ using Check = E;
+ using Check = T;
+}
+void check_deduction() {
+ check<char8_t>(u8'a');
+ check<const char8_t(&)[5]>(u8"a\u1000");
+}
+
+static_assert(sizeof(char8_t) == 1);
+static_assert(char8_t(-1) > 0);
+static_assert(u8"\u0080"[0] > 0);
OpenPOWER on IntegriCloud