diff options
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/AST/Mangle.h | 5 | ||||
-rw-r--r-- | clang/lib/AST/ItaniumMangle.cpp | 10 | ||||
-rw-r--r-- | clang/lib/AST/MicrosoftMangle.cpp | 169 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 78 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.h | 5 | ||||
-rw-r--r-- | clang/test/CodeGen/wchar-const.c | 2 |
6 files changed, 248 insertions, 21 deletions
diff --git a/clang/include/clang/AST/Mangle.h b/clang/include/clang/AST/Mangle.h index 6281716c962..28bcd8b30bd 100644 --- a/clang/include/clang/AST/Mangle.h +++ b/clang/include/clang/AST/Mangle.h @@ -31,9 +31,10 @@ namespace clang { class FunctionDecl; class NamedDecl; class ObjCMethodDecl; - class VarDecl; + class StringLiteral; struct ThisAdjustment; struct ThunkInfo; + class VarDecl; /// MangleBuffer - a convenient class for storing a name which is /// either the result of a mangling or is a constant string with @@ -117,6 +118,7 @@ public: bool shouldMangleDeclName(const NamedDecl *D); virtual bool shouldMangleCXXName(const NamedDecl *D) = 0; + virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0; // FIXME: consider replacing raw_ostream & with something like SmallString &. void mangleName(const NamedDecl *D, raw_ostream &); @@ -135,6 +137,7 @@ public: raw_ostream &) = 0; virtual void mangleCXXDtor(const CXXDestructorDecl *D, CXXDtorType Type, raw_ostream &) = 0; + virtual void mangleStringLiteral(const StringLiteral *SL, raw_ostream &) = 0; void mangleGlobalBlock(const BlockDecl *BD, const NamedDecl *ID, diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index a1106e781fc..cec24615ebd 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -21,6 +21,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/TypeLoc.h" @@ -126,6 +127,9 @@ public: /// @{ bool shouldMangleCXXName(const NamedDecl *D) override; + bool shouldMangleStringLiteral(const StringLiteral *) override { + return false; + } void mangleCXXName(const NamedDecl *D, raw_ostream &) override; void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, raw_ostream &) override; @@ -153,6 +157,8 @@ public: void mangleItaniumThreadLocalWrapper(const VarDecl *D, raw_ostream &) override; + void mangleStringLiteral(const StringLiteral *, raw_ostream &) override; + bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { // Lambda closure types are already numbered. if (isLambda(ND)) @@ -3774,6 +3780,10 @@ void ItaniumMangleContextImpl::mangleTypeName(QualType Ty, raw_ostream &Out) { mangleCXXRTTIName(Ty, Out); } +void ItaniumMangleContextImpl::mangleStringLiteral(const StringLiteral *, raw_ostream &) { + llvm_unreachable("Can't mangle string literals"); +} + ItaniumMangleContext * ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) { return new ItaniumMangleContextImpl(Context, Diags); diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index d982df04fe9..f5b9e23fa7a 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -20,6 +20,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/VTableBuilder.h" #include "clang/Basic/ABI.h" @@ -27,6 +28,9 @@ #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/MathExtras.h" + +#include <algorithm> using namespace clang; @@ -93,6 +97,7 @@ public: MicrosoftMangleContextImpl(ASTContext &Context, DiagnosticsEngine &Diags) : MicrosoftMangleContext(Context, Diags) {} bool shouldMangleCXXName(const NamedDecl *D) override; + bool shouldMangleStringLiteral(const StringLiteral *SL) override; void mangleCXXName(const NamedDecl *D, raw_ostream &Out) override; void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD, raw_ostream &) override; void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, @@ -118,6 +123,7 @@ public: void mangleDynamicInitializer(const VarDecl *D, raw_ostream &Out) override; void mangleDynamicAtExitDestructor(const VarDecl *D, raw_ostream &Out) override; + void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) override; bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { // Lambda closure types are already numbered. if (isLambda(ND)) @@ -321,6 +327,13 @@ bool MicrosoftMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) { return true; } +bool +MicrosoftMangleContextImpl::shouldMangleStringLiteral(const StringLiteral *SL) { + return SL->isAscii() || SL->isWide(); + // TODO: This needs to be updated when MSVC gains support for Unicode + // literals. +} + void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) { // MSVC doesn't mangle C++ names the same way it mangles extern "C" names. @@ -2315,6 +2328,162 @@ MicrosoftMangleContextImpl::mangleDynamicAtExitDestructor(const VarDecl *D, mangleInitFiniStub(D, Out, 'F'); } +void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL, + raw_ostream &Out) { + // <char-type> ::= 0 # char + // ::= 1 # wchar_t + // ::= ??? # char16_t/char32_t will need a mangling too... + // + // <literal-length> ::= <non-negative integer> # the length of the literal + // + // <encoded-crc> ::= <hex digit>+ @ # crc of the literal including + // # null-terminator + // + // <encoded-string> ::= <simple character> # uninteresting character + // ::= '?$' <hex digit> <hex digit> # these two nibbles + // # encode the byte for the + // # character + // ::= '?' [a-z] # \xe1 - \xfa + // ::= '?' [A-Z] # \xc1 - \xda + // ::= '?' [0-9] # [,/\:. \n\t'-] + // + // <literal> ::= '??_C@_' <char-type> <literal-length> <encoded-crc> + // <encoded-string> '@' + MicrosoftCXXNameMangler Mangler(*this, Out); + Mangler.getStream() << "\01??_C@_"; + + // <char-type>: The "kind" of string literal is encoded into the mangled name. + // TODO: This needs to be updated when MSVC gains support for unicode + // literals. + if (SL->isAscii()) + Mangler.getStream() << '0'; + else if (SL->isWide()) + Mangler.getStream() << '1'; + else + llvm_unreachable("unexpected string literal kind!"); + + // <literal-length>: The next part of the mangled name consists of the length + // of the string. + // The StringLiteral does not consider the NUL terminator byte(s) but the + // mangling does. + // N.B. The length is in terms of bytes, not characters. + Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth()); + + // We will use the "Rocksoft^tm Model CRC Algorithm" to describe the + // properties of our CRC: + // Width : 32 + // Poly : 04C11DB7 + // Init : FFFFFFFF + // RefIn : True + // RefOut : True + // XorOut : 00000000 + // Check : 340BC6D9 + uint32_t CRC = 0xFFFFFFFFU; + + auto UpdateCRC = [&CRC](char Byte) { + for (unsigned i = 0; i < 8; ++i) { + bool Bit = CRC & 0x80000000U; + if (Byte & (1U << i)) + Bit = !Bit; + CRC <<= 1; + if (Bit) + CRC ^= 0x04C11DB7U; + } + }; + + // CRC all the bytes of the StringLiteral. + for (char Byte : SL->getBytes()) + UpdateCRC(Byte); + + // The NUL terminator byte(s) were not present earlier, + // we need to manually process those bytes into the CRC. + for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth(); + ++NullTerminator) + UpdateCRC('\x00'); + + // The literature refers to the process of reversing the bits in the final CRC + // output as "reflection". + CRC = llvm::reverseBits(CRC); + + // <encoded-crc>: The CRC is encoded utilizing the standard number mangling + // scheme. + Mangler.mangleNumber(CRC); + + // <encoded-crc>: The mangled name also contains the first 32 _characters_ + // (including null-terminator bytes) of the StringLiteral. + // Each character is encoded by splitting them into bytes and then encoding + // the constituent bytes. + auto MangleCharacter = [&Mangler](char Byte) { + // There are five different manglings for characters: + // - ?[0-9]: The set of [,/\:. \n\t'-]. + // - [a-zA-Z0-9_$]: A one-to-one mapping. + // - ?[a-z]: The range from \xe1 to \xfa. + // - ?[A-Z]: The range from \xc1 to \xda. + // - ?$XX: A fallback which maps nibbles. + static const char SpecialMap[] = {',', '/', '\\', ':', '.', + ' ', '\n', '\t', '\'', '-'}; + const char *SpecialMapI = + std::find(std::begin(SpecialMap), std::end(SpecialMap), Byte); + if (SpecialMapI != std::end(SpecialMap)) { + Mangler.getStream() << '?' << SpecialMapI - SpecialMap; + } else if ((Byte >= 'a' && Byte <= 'z') || (Byte >= 'A' && Byte <= 'Z') || + (Byte >= '0' && Byte <= '9') || Byte == '_' || Byte == '$') { + Mangler.getStream() << Byte; + } else if (Byte >= '\xe1' && Byte <= '\xfa') { + Mangler.getStream() << '?' << (char)('a' + Byte - '\xe1'); + } else if (Byte >= '\xc1' && Byte <= '\xda') { + Mangler.getStream() << '?' << (char)('A' + Byte - '\xc1'); + } else { + Mangler.getStream() << "?$"; + Mangler.getStream() << (char)('A' + ((Byte >> 4) & 0xf)); + Mangler.getStream() << (char)('A' + (Byte & 0xf)); + } + }; + + // Enforce our 32 character max. + unsigned MaxBytes = 32 * SL->getCharByteWidth(); + StringRef Bytes = SL->getBytes().substr(0, MaxBytes); + size_t BytesLength = Bytes.size(); + + if (SL->isAscii()) { + // A character maps directly to a byte for ASCII StringLiterals. + for (char Byte : Bytes) + MangleCharacter(Byte); + } else if (SL->isWide()) { + // The ordering of bytes in a wide StringLiteral is like so: + // A B C D ... + // However, they are mangled in the following order: + // B A D C ... + for (size_t i = 0; i != BytesLength;) { + char FirstByte = Bytes[i]; + ++i; + if (i != BytesLength) { + char SecondByte = Bytes[i]; + ++i; + MangleCharacter(SecondByte); + } + MangleCharacter(FirstByte); + } + } else { + llvm_unreachable("unexpected string literal kind!"); + // TODO: This needs to be updated when MSVC gains support for Unicode + // literals. + } + + // We should also encode the NUL terminator(s) if we encoded less than 32 + // characters. + if (BytesLength < MaxBytes) { + size_t PaddingBytes = SL->getCharByteWidth(); + size_t BytesLeft = MaxBytes - BytesLength; + if (BytesLeft < PaddingBytes) + PaddingBytes = BytesLeft; + for (unsigned i = 0; i < PaddingBytes; ++i) + MangleCharacter('\x00'); + } + + Mangler.getStream() << '@'; +} + MicrosoftMangleContext * MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) { return new MicrosoftMangleContextImpl(Context, Diags); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 53e7d56a8d9..097e30c71d1 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2572,25 +2572,67 @@ CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { llvm::Constant * CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) { CharUnits Align = getContext().getAlignOfGlobalVarInChars(S->getType()); - if (S->isAscii() || S->isUTF8()) { - SmallString<64> Str(S->getString()); - - // Resize the string to the right size, which is indicated by its type. - const ConstantArrayType *CAT = Context.getAsConstantArrayType(S->getType()); - Str.resize(CAT->getSize().getZExtValue()); - return GetAddrOfConstantString(Str, /*GlobalName*/ 0, Align.getQuantity()); + + llvm::StringMapEntry<llvm::GlobalVariable *> *Entry = nullptr; + llvm::GlobalVariable *GV = nullptr; + if (!LangOpts.WritableStrings) { + llvm::StringMap<llvm::GlobalVariable *> *ConstantStringMap = nullptr; + switch (S->getCharByteWidth()) { + case 1: + ConstantStringMap = &Constant1ByteStringMap; + break; + case 2: + ConstantStringMap = &Constant2ByteStringMap; + break; + case 4: + ConstantStringMap = &Constant4ByteStringMap; + break; + default: + llvm_unreachable("unhandled byte width!"); + } + Entry = &ConstantStringMap->GetOrCreateValue(S->getBytes()); + GV = Entry->getValue(); + } + + if (!GV) { + StringRef GlobalVariableName; + llvm::GlobalValue::LinkageTypes LT; + if (!LangOpts.WritableStrings && + getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) { + LT = llvm::GlobalValue::LinkOnceODRLinkage; + + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + getCXXABI().getMangleContext().mangleStringLiteral(S, Out); + Out.flush(); + + size_t Length = Buffer.size(); + char *Name = MangledNamesAllocator.Allocate<char>(Length); + std::copy(Buffer.begin(), Buffer.end(), Name); + GlobalVariableName = StringRef(Name, Length); + } else { + LT = llvm::GlobalValue::PrivateLinkage;; + GlobalVariableName = ".str"; + } + + // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant); + + llvm::Constant *C = GetConstantArrayFromStringLiteral(S); + GV = new llvm::GlobalVariable( + getModule(), C->getType(), !LangOpts.WritableStrings, LT, C, + GlobalVariableName, /*InsertBefore=*/nullptr, + llvm::GlobalVariable::NotThreadLocal, AddrSpace); + GV->setUnnamedAddr(true); + if (Entry) + Entry->setValue(GV); } - // FIXME: the following does not memoize wide strings. - llvm::Constant *C = GetConstantArrayFromStringLiteral(S); - llvm::GlobalVariable *GV = - new llvm::GlobalVariable(getModule(),C->getType(), - !LangOpts.WritableStrings, - llvm::GlobalValue::PrivateLinkage, - C,".str"); + if (Align.getQuantity() > GV->getAlignment()) + GV->setAlignment(Align.getQuantity()); - GV->setAlignment(Align.getQuantity()); - GV->setUnnamedAddr(true); return GV; } @@ -2615,7 +2657,7 @@ static llvm::GlobalVariable *GenerateStringLiteral(StringRef str, llvm::Constant *C = llvm::ConstantDataArray::getString(CGM.getLLVMContext(), str, false); - // OpenCL v1.1 s6.5.3: a string literal is in the constant address space. + // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. unsigned AddrSpace = 0; if (CGM.getLangOpts().OpenCL) AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); @@ -2654,7 +2696,7 @@ llvm::Constant *CodeGenModule::GetAddrOfConstantString(StringRef Str, return GenerateStringLiteral(Str, false, *this, GlobalName, Alignment); llvm::StringMapEntry<llvm::GlobalVariable *> &Entry = - ConstantStringMap.GetOrCreateValue(Str); + Constant1ByteStringMap.GetOrCreateValue(Str); if (llvm::GlobalVariable *GV = Entry.getValue()) { if (Alignment > GV->getAlignment()) { diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 4bd8b7a3de7..7694eed1a1b 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -319,7 +319,10 @@ class CodeGenModule : public CodeGenTypeCache { llvm::StringMap<llvm::Constant*> AnnotationStrings; llvm::StringMap<llvm::Constant*> CFConstantStringMap; - llvm::StringMap<llvm::GlobalVariable*> ConstantStringMap; + + llvm::StringMap<llvm::GlobalVariable *> Constant1ByteStringMap; + llvm::StringMap<llvm::GlobalVariable *> Constant2ByteStringMap; + llvm::StringMap<llvm::GlobalVariable *> Constant4ByteStringMap; llvm::DenseMap<const Decl*, llvm::Constant *> StaticLocalDeclMap; llvm::DenseMap<const Decl*, llvm::GlobalVariable*> StaticLocalDeclGuardMap; llvm::DenseMap<const Expr*, llvm::Constant *> MaterializedGlobalTemporaryMap; diff --git a/clang/test/CodeGen/wchar-const.c b/clang/test/CodeGen/wchar-const.c index 2e9af53a14d..34da249639e 100644 --- a/clang/test/CodeGen/wchar-const.c +++ b/clang/test/CodeGen/wchar-const.c @@ -15,7 +15,7 @@ typedef __WCHAR_TYPE__ wchar_t; // CHECK-DAR: private unnamed_addr constant [18 x i32] [i32 84, -// CHECK-WIN: private unnamed_addr constant [18 x i16] [i16 84, +// CHECK-WIN: linkonce_odr unnamed_addr constant [18 x i16] [i16 84, extern void foo(const wchar_t* p); int main (int argc, const char * argv[]) { |