diff options
| author | Richard Smith <richard-llvm@metafoo.co.uk> | 2018-05-01 05:02:45 +0000 |
|---|---|---|
| committer | Richard Smith <richard-llvm@metafoo.co.uk> | 2018-05-01 05:02:45 +0000 |
| commit | 3a8244df6fb88a6670470e603445c72f224db9e3 (patch) | |
| tree | 197016a8600cd39037941ce7079e0d9867edacb0 /clang/lib | |
| parent | 33dc01d105c08644c5b08c8c37879c6528edfdea (diff) | |
| download | bcm5719-llvm-3a8244df6fb88a6670470e603445c72f224db9e3.tar.gz bcm5719-llvm-3a8244df6fb88a6670470e603445c72f224db9e3.zip | |
Implement P0482R2, support for char8_t type.
This is not yet part of any C++ working draft, and so is controlled by the flag
-fchar8_t rather than a -std= flag. (The GCC implementation is controlled by a
flag with the same name.)
This implementation is experimental, and will be removed or revised
substantially to match the proposal as it makes its way through the C++
committee.
llvm-svn: 331244
Diffstat (limited to 'clang/lib')
33 files changed, 143 insertions, 19 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index d61ca589d5b..d51071440e7 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1151,6 +1151,9 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, WIntTy = getFromTargetType(Target.getWIntType()); + // C++20 (proposed) + InitBuiltinType(Char8Ty, BuiltinType::Char8); + if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++ InitBuiltinType(Char16Ty, BuiltinType::Char16); else // C99 @@ -1739,6 +1742,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { case BuiltinType::Char_U: case BuiltinType::UChar: case BuiltinType::SChar: + case BuiltinType::Char8: Width = Target->getCharWidth(); Align = Target->getCharAlign(); break; @@ -5456,6 +5460,7 @@ QualType ASTContext::getPromotedIntegerType(QualType Promotable) const { // FIXME: Is there some better way to compute this? if (BT->getKind() == BuiltinType::WChar_S || BT->getKind() == BuiltinType::WChar_U || + BT->getKind() == BuiltinType::Char8 || BT->getKind() == BuiltinType::Char16 || BT->getKind() == BuiltinType::Char32) { bool FromIsSigned = BT->getKind() == BuiltinType::WChar_S; @@ -6202,6 +6207,7 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C, switch (kind) { case BuiltinType::Void: return 'v'; case BuiltinType::Bool: return 'B'; + case BuiltinType::Char8: case BuiltinType::Char_U: case BuiltinType::UChar: return 'C'; case BuiltinType::Char16: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 81577248612..c540dfbbf07 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7326,6 +7326,7 @@ static int EvaluateBuiltinClassifyType(const CallExpr *E, return pointer_type_class; case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::ObjCId: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 609d0ebc849..610400d8322 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2525,6 +2525,9 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { case BuiltinType::WChar_U: Out << 'w'; break; + case BuiltinType::Char8: + Out << "Du"; + break; case BuiltinType::Char16: Out << "Ds"; break; diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 1039ae87ab6..e72804b2c3b 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1918,6 +1918,7 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers, Out << "$$T"; break; + case BuiltinType::Char8: case BuiltinType::Float16: mangleArtificalTagType(TTK_Struct, "_Float16", {"__clang"}); break; diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp index 8adaef1fb64..eb807f0cc53 100644 --- a/clang/lib/AST/NSAPI.cpp +++ b/clang/lib/AST/NSAPI.cpp @@ -436,6 +436,7 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const { case BuiltinType::Void: case BuiltinType::WChar_U: case BuiltinType::WChar_S: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::Int128: diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index a2a60772b77..571dc2abfa0 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -1763,6 +1763,12 @@ bool Type::isWideCharType() const { return false; } +bool Type::isChar8Type() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() == BuiltinType::Char8; + return false; +} + bool Type::isChar16Type() const { if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType)) return BT->getKind() == BuiltinType::Char16; @@ -1785,6 +1791,7 @@ bool Type::isAnyCharacterType() const { case BuiltinType::Char_U: case BuiltinType::UChar: case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::Char_S: @@ -2419,6 +2426,7 @@ bool Type::isPromotableIntegerType() const { case BuiltinType::UShort: case BuiltinType::WChar_S: case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: return true; @@ -2655,6 +2663,8 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { case WChar_S: case WChar_U: return Policy.MSWChar ? "__wchar_t" : "wchar_t"; + case Char8: + return "char8_t"; case Char16: return "char16_t"; case Char32: diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp index 0ac50b31ace..57349b46f54 100644 --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -317,6 +317,8 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const { case BuiltinType::Char_U: case BuiltinType::Char_S: return TST_char; + case BuiltinType::Char8: + return TST_char8; case BuiltinType::Char16: return TST_char16; case BuiltinType::Char32: diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp index dfaed26564e..2043970ccd4 100644 --- a/clang/lib/Analysis/PrintfFormatString.cpp +++ b/clang/lib/Analysis/PrintfFormatString.cpp @@ -647,6 +647,7 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, case BuiltinType::Bool: case BuiltinType::WChar_U: case BuiltinType::WChar_S: + case BuiltinType::Char8: // FIXME: Treat like 'char'? case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::UInt128: diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 6b0133208ba..025104c63be 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -115,14 +115,15 @@ namespace { KEYNOOPENCL = 0x02000, WCHARSUPPORT = 0x04000, HALFSUPPORT = 0x08000, - KEYCONCEPTS = 0x10000, - KEYOBJC2 = 0x20000, - KEYZVECTOR = 0x40000, - KEYCOROUTINES = 0x80000, - KEYMODULES = 0x100000, - KEYCXX2A = 0x200000, + CHAR8SUPPORT = 0x10000, + KEYCONCEPTS = 0x20000, + KEYOBJC2 = 0x40000, + KEYZVECTOR = 0x80000, + KEYCOROUTINES = 0x100000, + KEYMODULES = 0x200000, + KEYCXX2A = 0x400000, KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX2A, - KEYALL = (0x3fffff & ~KEYNOMS18 & + KEYALL = (0x7fffff & ~KEYNOMS18 & ~KEYNOOPENCL) // KEYNOMS18 and KEYNOOPENCL are used to exclude. }; @@ -151,6 +152,7 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, if (LangOpts.Bool && (Flags & BOOLSUPPORT)) return KS_Enabled; if (LangOpts.Half && (Flags & HALFSUPPORT)) return KS_Enabled; if (LangOpts.WChar && (Flags & WCHARSUPPORT)) return KS_Enabled; + if (LangOpts.Char8 && (Flags & CHAR8SUPPORT)) return KS_Enabled; if (LangOpts.AltiVec && (Flags & KEYALTIVEC)) return KS_Enabled; if (LangOpts.OpenCL && (Flags & KEYOPENCL)) return KS_Enabled; if (!LangOpts.CPlusPlus && (Flags & KEYNOCXX)) return KS_Enabled; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 474018c065e..12209723999 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -665,6 +665,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::SChar: Encoding = llvm::dwarf::DW_ATE_signed_char; break; + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: Encoding = llvm::dwarf::DW_ATE_UTF; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index ccb6df9a580..ce1fdf9b125 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -437,6 +437,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::ULongLong: case BuiltinType::WChar_S: case BuiltinType::WChar_U: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: ResultType = llvm::IntegerType::get(getLLVMContext(), diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index f92d7ec0632..0e35633e1d6 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2706,6 +2706,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::LongDouble: case BuiltinType::Float16: case BuiltinType::Float128: + case BuiltinType::Char8: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::Int128: @@ -3567,7 +3568,8 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) { getContext().UnsignedInt128Ty, getContext().HalfTy, getContext().FloatTy, getContext().DoubleTy, getContext().LongDoubleTy, getContext().Float128Ty, - getContext().Char16Ty, getContext().Char32Ty + getContext().Char8Ty, getContext().Char16Ty, + getContext().Char32Ty }; for (const QualType &FundamentalType : FundamentalTypes) EmitFundamentalRTTIDescriptor(FundamentalType, DLLExport); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 331b4beaa6b..5e5dfdeecae 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2687,6 +2687,9 @@ static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T, CmdArgs.push_back("-fno-signed-char"); } + if (Args.hasFlag(options::OPT_fchar8__t, options::OPT_fno_char8__t, false)) + CmdArgs.push_back("-fchar8_t"); + if (const Arg *A = Args.getLastArg(options::OPT_fshort_wchar, options::OPT_fno_short_wchar)) { if (A->getOption().matches(options::OPT_fshort_wchar)) { diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 10ac392abbf..c63f0129c48 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -57,6 +57,7 @@ bool FormatToken::isSimpleTypeSpecifier() const { case tok::kw_bool: case tok::kw___underlying_type: case tok::annot_typename: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_typeof: diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 44f29ba3b10..a6dc1678d1d 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2362,6 +2362,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.ImplicitModules = !Args.hasArg(OPT_fno_implicit_modules); Opts.CharIsSigned = Opts.OpenCL || !Args.hasArg(OPT_fno_signed_char); Opts.WChar = Opts.CPlusPlus && !Args.hasArg(OPT_fno_wchar); + Opts.Char8 = Args.hasArg(OPT_fchar8__t); if (const Arg *A = Args.getLastArg(OPT_fwchar_type_EQ)) { Opts.WCharSize = llvm::StringSwitch<unsigned>(A->getValue()) .Case("char", 1) diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 192862db657..8a87b9f35e3 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -559,6 +559,10 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_experimental_concepts", "1"); if (LangOpts.CoroutinesTS) Builder.defineMacro("__cpp_coroutines", "201703L"); + + // Potential future breaking changes. + if (LangOpts.Char8) + Builder.defineMacro("__cpp_char8_t", "201803"); } static void InitializePredefinedMacros(const TargetInfo &TI, @@ -939,6 +943,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI, InlineWidthBits)); DEFINE_LOCK_FREE_MACRO(BOOL, Bool); DEFINE_LOCK_FREE_MACRO(CHAR, Char); + if (LangOpts.Char8) + DEFINE_LOCK_FREE_MACRO(CHAR8_T, Char); // Treat char8_t like char. DEFINE_LOCK_FREE_MACRO(CHAR16_T, Char16); DEFINE_LOCK_FREE_MACRO(CHAR32_T, Char32); DEFINE_LOCK_FREE_MACRO(WCHAR_T, WChar); diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp index ee1c950eb69..ba536c748c3 100644 --- a/clang/lib/Index/USRGeneration.cpp +++ b/clang/lib/Index/USRGeneration.cpp @@ -650,6 +650,8 @@ void USRGenerator::VisitType(QualType T) { c = 'b'; break; case BuiltinType::UChar: c = 'c'; break; + case BuiltinType::Char8: + c = 'u'; break; // FIXME: Check this doesn't collide case BuiltinType::Char16: c = 'q'; break; case BuiltinType::Char32: diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index d8431827e9c..b1ed0e10c6f 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -363,7 +363,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, NumBits = TI.getChar16Width(); else if (Literal.isUTF32()) NumBits = TI.getChar32Width(); - else + else // char or char8_t NumBits = TI.getCharWidth(); // Set the width. diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index fc0ca613c73..e0948b37170 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -3587,6 +3587,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, isInvalid = DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec, DiagID, Policy); break; + case tok::kw_char8_t: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char8, Loc, PrevSpec, + DiagID, Policy); + break; case tok::kw_char16_t: isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec, DiagID, Policy); @@ -4585,6 +4589,7 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const { case tok::kw_void: case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_int: @@ -4661,6 +4666,7 @@ bool Parser::isTypeSpecifierQualifier() { case tok::kw_void: case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_int: @@ -4817,6 +4823,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { case tok::kw_void: case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index ca5d3bce9cf..1cc98568ded 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -1224,6 +1224,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::annot_decltype: case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_bool: diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 8e39adf8a5d..276dea1add1 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1962,6 +1962,9 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) { case tok::kw_wchar_t: DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec, DiagID, Policy); break; + case tok::kw_char8_t: + DS.SetTypeSpecType(DeclSpec::TST_char8, Loc, PrevSpec, DiagID, Policy); + break; case tok::kw_char16_t: DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec, DiagID, Policy); break; diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index ebd6f0f5b8e..17ff9f9be6d 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -1052,6 +1052,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) { case tok::kw_class: case tok::kw_typename: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw__Decimal32: @@ -1523,6 +1524,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult, case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_bool: @@ -1614,6 +1616,7 @@ bool Parser::isCXXDeclarationSpecifierAType() { // simple-type-specifier case tok::kw_char: case tok::kw_wchar_t: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_bool: diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp index 2fad5a18ba6..26d62021380 100644 --- a/clang/lib/Sema/DeclSpec.cpp +++ b/clang/lib/Sema/DeclSpec.cpp @@ -329,6 +329,7 @@ bool Declarator::isDeclarationOfFunction() const { case TST_auto_type: case TST_bool: case TST_char: + case TST_char8: case TST_char16: case TST_char32: case TST_class: @@ -499,6 +500,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T, case DeclSpec::TST_void: return "void"; case DeclSpec::TST_char: return "char"; case DeclSpec::TST_wchar: return Policy.MSWChar ? "__wchar_t" : "wchar_t"; + case DeclSpec::TST_char8: return "char8_t"; case DeclSpec::TST_char16: return "char16_t"; case DeclSpec::TST_char32: return "char32_t"; case DeclSpec::TST_int: return "int"; @@ -1202,7 +1204,9 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) { StorageClassSpec == SCS_auto) S.Diag(StorageClassSpecLoc, diag::warn_auto_storage_class) << FixItHint::CreateRemoval(StorageClassSpecLoc); - if (TypeSpecType == TST_char16 || TypeSpecType == TST_char32) + if (TypeSpecType == TST_char8) + S.Diag(TSTLoc, diag::warn_cxx17_compat_unicode_type); + else if (TypeSpecType == TST_char16 || TypeSpecType == TST_char32) S.Diag(TSTLoc, diag::warn_cxx98_compat_unicode_type) << (TypeSpecType == TST_char16 ? "char16_t" : "char32_t"); if (Constexpr_specified) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 58676101d82..732853de48c 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -148,6 +148,9 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const { case tok::kw_decltype: return getLangOpts().CPlusPlus; + case tok::kw_char8_t: + return getLangOpts().Char8; + default: break; } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 2cf16221f75..4339c9a8de1 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -13187,6 +13187,7 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) { ParamType->isSpecificBuiltinType(BuiltinType::LongDouble) || Context.hasSameType(ParamType, Context.CharTy) || Context.hasSameType(ParamType, Context.WideCharTy) || + Context.hasSameType(ParamType, Context.Char8Ty) || Context.hasSameType(ParamType, Context.Char16Ty) || Context.hasSameType(ParamType, Context.Char32Ty)) { } else if (const PointerType *Ptr = ParamType->getAs<PointerType>()) { @@ -13247,10 +13248,12 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) { } QualType InnerType = PointeeType.getUnqualifiedType(); - // Only const char *, const wchar_t*, const char16_t*, and const char32_t* - // are allowed as the first parameter to a two-parameter function + // Only const char *, const wchar_t*, const char8_t*, const char16_t*, and + // const char32_t* are allowed as the first parameter to a two-parameter + // function if (!(Context.hasSameType(InnerType, Context.CharTy) || Context.hasSameType(InnerType, Context.WideCharTy) || + Context.hasSameType(InnerType, Context.Char8Ty) || Context.hasSameType(InnerType, Context.Char16Ty) || Context.hasSameType(InnerType, Context.Char32Ty))) { Diag((*Param)->getSourceRange().getBegin(), diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index b1ecbfad4c3..58e70a4ceaa 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1535,6 +1535,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) { CharTy = Context.getWideCharType(); Kind = StringLiteral::Wide; } else if (Literal.isUTF8()) { + if (getLangOpts().Char8) + CharTy = Context.Char8Ty; Kind = StringLiteral::UTF8; } else if (Literal.isUTF16()) { CharTy = Context.Char16Ty; @@ -3094,6 +3096,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) { QualType Ty; if (Literal.isWide()) Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++. + else if (Literal.isUTF8() && getLangOpts().Char8) + Ty = Context.Char8Ty; // u8'x' -> char8_t when it exists. else if (Literal.isUTF16()) Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11. else if (Literal.isUTF32()) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index e44eaa5162a..be33326cd42 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -49,6 +49,8 @@ enum StringInitFailureKind { SIF_NarrowStringIntoWideChar, SIF_WideStringIntoChar, SIF_IncompatWideStringIntoWideChar, + SIF_UTF8StringIntoPlainChar, + SIF_PlainStringIntoUTF8Char, SIF_Other }; @@ -77,12 +79,21 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, Context.getCanonicalType(AT->getElementType()).getUnqualifiedType(); switch (SL->getKind()) { - case StringLiteral::Ascii: case StringLiteral::UTF8: + // char8_t array can be initialized with a UTF-8 string. + if (ElemTy->isChar8Type()) + return SIF_None; + LLVM_FALLTHROUGH; + case StringLiteral::Ascii: // char array can be initialized with a narrow string. // Only allow char x[] = "foo"; not char x[] = L"foo"; if (ElemTy->isCharType()) - return SIF_None; + return (SL->getKind() == StringLiteral::UTF8 && + Context.getLangOpts().Char8) + ? SIF_UTF8StringIntoPlainChar + : SIF_None; + if (ElemTy->isChar8Type()) + return SIF_PlainStringIntoUTF8Char; if (IsWideCharCompatible(ElemTy, Context)) return SIF_NarrowStringIntoWideChar; return SIF_Other; @@ -94,7 +105,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, case StringLiteral::UTF16: if (Context.typesAreCompatible(Context.Char16Ty, ElemTy)) return SIF_None; - if (ElemTy->isCharType()) + if (ElemTy->isCharType() || ElemTy->isChar8Type()) return SIF_WideStringIntoChar; if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; @@ -102,7 +113,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, case StringLiteral::UTF32: if (Context.typesAreCompatible(Context.Char32Ty, ElemTy)) return SIF_None; - if (ElemTy->isCharType()) + if (ElemTy->isCharType() || ElemTy->isChar8Type()) return SIF_WideStringIntoChar; if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; @@ -110,7 +121,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, case StringLiteral::Wide: if (Context.typesAreCompatible(Context.getWideCharType(), ElemTy)) return SIF_None; - if (ElemTy->isCharType()) + if (ElemTy->isCharType() || ElemTy->isChar8Type()) return SIF_WideStringIntoChar; if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; @@ -3185,6 +3196,8 @@ bool InitializationSequence::isAmbiguous() const { case FK_NarrowStringIntoWideCharArray: case FK_WideStringIntoCharArray: case FK_IncompatWideStringIntoWideChar: + case FK_PlainStringIntoUTF8Char: + case FK_UTF8StringIntoPlainChar: case FK_AddressOfOverloadFailed: // FIXME: Could do better case FK_NonConstLValueReferenceBindingToTemporary: case FK_NonConstLValueReferenceBindingToBitfield: @@ -5362,6 +5375,12 @@ void InitializationSequence::InitializeFrom(Sema &S, case SIF_IncompatWideStringIntoWideChar: SetFailed(FK_IncompatWideStringIntoWideChar); return; + case SIF_PlainStringIntoUTF8Char: + SetFailed(FK_PlainStringIntoUTF8Char); + return; + case SIF_UTF8StringIntoPlainChar: + SetFailed(FK_UTF8StringIntoPlainChar); + return; case SIF_Other: break; } @@ -7591,6 +7610,17 @@ bool InitializationSequence::Diagnose(Sema &S, S.Diag(Kind.getLocation(), diag::err_array_init_incompat_wide_string_into_wchar); break; + case FK_PlainStringIntoUTF8Char: + S.Diag(Kind.getLocation(), + diag::err_array_init_plain_string_into_char8_t); + S.Diag(Args.front()->getLocStart(), + diag::note_array_init_plain_string_into_char8_t) + << FixItHint::CreateInsertion(Args.front()->getLocStart(), "u8"); + break; + case FK_UTF8StringIntoPlainChar: + S.Diag(Kind.getLocation(), + diag::err_array_init_utf8_string_into_char); + break; case FK_ArrayTypeMismatch: case FK_NonConstantArrayInit: S.Diag(Kind.getLocation(), @@ -8000,6 +8030,14 @@ void InitializationSequence::dump(raw_ostream &OS) const { OS << "incompatible wide string into wide char array"; break; + case FK_PlainStringIntoUTF8Char: + OS << "plain string literal into char8_t array"; + break; + + case FK_UTF8StringIntoPlainChar: + OS << "u8 string literal into char array"; + break; + case FK_ArrayTypeMismatch: OS << "array type mismatch"; break; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 64cd52c1f65..4b5898883b3 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7707,6 +7707,8 @@ class BuiltinOperatorOverloadBuilder { ArithmeticTypes.push_back(S.Context.BoolTy); ArithmeticTypes.push_back(S.Context.CharTy); ArithmeticTypes.push_back(S.Context.WCharTy); + if (S.Context.getLangOpts().Char8) + ArithmeticTypes.push_back(S.Context.Char8Ty); ArithmeticTypes.push_back(S.Context.Char16Ty); ArithmeticTypes.push_back(S.Context.Char32Ty); ArithmeticTypes.push_back(S.Context.SignedCharTy); diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index d98176ad2e5..04fdfaea75f 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -6771,11 +6771,11 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg, Expr *E; if (T->isAnyCharacterType()) { - // This does not need to handle u8 character literals because those are - // of type char, and so can also be covered by an ASCII character literal. CharacterLiteral::CharacterKind Kind; if (T->isWideCharType()) Kind = CharacterLiteral::Wide; + else if (T->isChar8Type() && getLangOpts().Char8) + Kind = CharacterLiteral::UTF8; else if (T->isChar16Type()) Kind = CharacterLiteral::UTF16; else if (T->isChar32Type()) diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index 7aa0e317df0..37a9c260034 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -822,6 +822,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) { case TST_void: case TST_char: case TST_wchar: + case TST_char8: case TST_char16: case TST_char32: case TST_int: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0e71047f00e..a6491e56acd 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1277,6 +1277,11 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { Result = Context.getUnsignedWCharType(); } break; + case DeclSpec::TST_char8: + assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified && + "Unknown TSS value"); + Result = Context.Char8Ty; + break; case DeclSpec::TST_char16: assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified && "Unknown TSS value"); diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index 535aacb8c49..54cea92042a 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -100,6 +100,9 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) { case BuiltinType::NullPtr: ID = PREDEF_TYPE_NULLPTR_ID; break; + case BuiltinType::Char8: + ID = PREDEF_TYPE_CHAR8_ID; + break; case BuiltinType::Char16: ID = PREDEF_TYPE_CHAR16_ID; break; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a1de22b68ec..4b0220abc30 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6841,6 +6841,9 @@ QualType ASTReader::GetType(TypeID ID) { case PREDEF_TYPE_NULLPTR_ID: T = Context.NullPtrTy; break; + case PREDEF_TYPE_CHAR8_ID: + T = Context.Char8Ty; + break; case PREDEF_TYPE_CHAR16_ID: T = Context.Char16Ty; break; |

