diff options
author | David Majnemer <david.majnemer@gmail.com> | 2014-03-24 21:43:36 +0000 |
---|---|---|
committer | David Majnemer <david.majnemer@gmail.com> | 2014-03-24 21:43:36 +0000 |
commit | 58e5bee17aa1a0ea0c690f9909f5522541e17f95 (patch) | |
tree | 56194f2a89abe9de002cb39bf42115400d4d89eb /clang/lib | |
parent | c89450e0546154f69e1afc9b0bdf5a660ee4d558 (diff) | |
download | bcm5719-llvm-58e5bee17aa1a0ea0c690f9909f5522541e17f95.tar.gz bcm5719-llvm-58e5bee17aa1a0ea0c690f9909f5522541e17f95.zip |
MS ABI: Eliminate Duplicate Strings
COFF doesn't have mergeable sections so LLVM/clang's normal tactics for
string deduplication will not have any effect.
To remedy this we place each string inside it's own section and mark
the section as IMAGE_COMDAT_SELECT_ANY. However, we can only do this if the
string has an external name that we can generate from it's contents.
To be compatible with MSVC, we must use their scheme. Otherwise identical
strings in translation units from clang may not be deduplicated with
translation units in MSVC.
This fixes PR18248.
N.B. We will not attempt to do anything with a string literal which is not of
type 'char' or 'wchar_t' because their compiler does not support unicode
string literals as of this date. Further, we avoid doing this if
either -fwritable-strings or -fsanitize=address are present.
This reverts commit r204596.
llvm-svn: 204675
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/AST/ItaniumMangle.cpp | 10 | ||||
-rw-r--r-- | clang/lib/AST/MicrosoftMangle.cpp | 174 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 78 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.h | 5 |
4 files changed, 248 insertions, 19 deletions
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index a1106e781fc..cec24615ebd 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -21,6 +21,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/TypeLoc.h" @@ -126,6 +127,9 @@ public: /// @{ bool shouldMangleCXXName(const NamedDecl *D) override; + bool shouldMangleStringLiteral(const StringLiteral *) override { + return false; + } void mangleCXXName(const NamedDecl *D, raw_ostream &) override; void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, raw_ostream &) override; @@ -153,6 +157,8 @@ public: void mangleItaniumThreadLocalWrapper(const VarDecl *D, raw_ostream &) override; + void mangleStringLiteral(const StringLiteral *, raw_ostream &) override; + bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { // Lambda closure types are already numbered. if (isLambda(ND)) @@ -3774,6 +3780,10 @@ void ItaniumMangleContextImpl::mangleTypeName(QualType Ty, raw_ostream &Out) { mangleCXXRTTIName(Ty, Out); } +void ItaniumMangleContextImpl::mangleStringLiteral(const StringLiteral *, raw_ostream &) { + llvm_unreachable("Can't mangle string literals"); +} + ItaniumMangleContext * ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) { return new ItaniumMangleContextImpl(Context, Diags); diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index d982df04fe9..9dfd2af8bb6 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -20,6 +20,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/VTableBuilder.h" #include "clang/Basic/ABI.h" @@ -27,6 +28,7 @@ #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/MathExtras.h" using namespace clang; @@ -93,6 +95,7 @@ public: MicrosoftMangleContextImpl(ASTContext &Context, DiagnosticsEngine &Diags) : MicrosoftMangleContext(Context, Diags) {} bool shouldMangleCXXName(const NamedDecl *D) override; + bool shouldMangleStringLiteral(const StringLiteral *SL) override; void mangleCXXName(const NamedDecl *D, raw_ostream &Out) override; void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD, raw_ostream &) override; void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, @@ -118,6 +121,7 @@ public: void mangleDynamicInitializer(const VarDecl *D, raw_ostream &Out) override; void mangleDynamicAtExitDestructor(const VarDecl *D, raw_ostream &Out) override; + void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) override; bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { // Lambda closure types are already numbered. if (isLambda(ND)) @@ -321,6 +325,13 @@ bool MicrosoftMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) { return true; } +bool +MicrosoftMangleContextImpl::shouldMangleStringLiteral(const StringLiteral *SL) { + return SL->isAscii() || SL->isWide(); + // TODO: This needs to be updated when MSVC gains support for Unicode + // literals. +} + void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) { // MSVC doesn't mangle C++ names the same way it mangles extern "C" names. @@ -2315,6 +2326,169 @@ MicrosoftMangleContextImpl::mangleDynamicAtExitDestructor(const VarDecl *D, mangleInitFiniStub(D, Out, 'F'); } +void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL, + raw_ostream &Out) { + // <char-type> ::= 0 # char + // ::= 1 # wchar_t + // ::= ??? # char16_t/char32_t will need a mangling too... + // + // <literal-length> ::= <non-negative integer> # the length of the literal + // + // <encoded-crc> ::= <hex digit>+ @ # crc of the literal including + // # null-terminator + // + // <encoded-string> ::= <simple character> # uninteresting character + // ::= '?$' <hex digit> <hex digit> # these two nibbles + // # encode the byte for the + // # character + // ::= '?' [a-z] # \xe1 - \xfa + // ::= '?' [A-Z] # \xc1 - \xda + // ::= '?' [0-9] # [,/\:. \n\t'-] + // + // <literal> ::= '??_C@_' <char-type> <literal-length> <encoded-crc> + // <encoded-string> '@' + MicrosoftCXXNameMangler Mangler(*this, Out); + Mangler.getStream() << "\01??_C@_"; + + // <char-type>: The "kind" of string literal is encoded into the mangled name. + // TODO: This needs to be updated when MSVC gains support for unicode + // literals. + if (SL->isAscii()) + Mangler.getStream() << '0'; + else if (SL->isWide()) + Mangler.getStream() << '1'; + else + llvm_unreachable("unexpected string literal kind!"); + + // <literal-length>: The next part of the mangled name consists of the length + // of the string. + // The StringLiteral does not consider the NUL terminator byte(s) but the + // mangling does. + // N.B. The length is in terms of bytes, not characters. + Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth()); + + // We will use the "Rocksoft^tm Model CRC Algorithm" to describe the + // properties of our CRC: + // Width : 32 + // Poly : 04C11DB7 + // Init : FFFFFFFF + // RefIn : True + // RefOut : True + // XorOut : 00000000 + // Check : 340BC6D9 + uint32_t CRC = 0xFFFFFFFFU; + + auto UpdateCRC = [&CRC](char Byte) { + for (unsigned i = 0; i < 8; ++i) { + bool Bit = CRC & 0x80000000U; + if (Byte & (1U << i)) + Bit = !Bit; + CRC <<= 1; + if (Bit) + CRC ^= 0x04C11DB7U; + } + }; + + // CRC all the bytes of the StringLiteral. + for (char Byte : SL->getBytes()) + UpdateCRC(Byte); + + // The NUL terminator byte(s) were not present earlier, + // we need to manually process those bytes into the CRC. + for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth(); + ++NullTerminator) + UpdateCRC('\x00'); + + // The literature refers to the process of reversing the bits in the final CRC + // output as "reflection". + CRC = llvm::reverseBits(CRC); + + // <encoded-crc>: The CRC is encoded utilizing the standard number mangling + // scheme. + Mangler.mangleNumber(CRC); + + // <encoded-crc>: The mangled name also contains the first 32 _characters_ + // (including null-terminator bytes) of the StringLiteral. + // Each character is encoded by splitting them into bytes and then encoding + // the constituent bytes. + auto MangleByte = [&Mangler](char Byte) { + // There are five different manglings for characters: + // - [a-zA-Z0-9_$]: A one-to-one mapping. + // - ?[a-z]: The range from \xe1 to \xfa. + // - ?[A-Z]: The range from \xc1 to \xda. + // - ?[0-9]: The set of [,/\:. \n\t'-]. + // - ?$XX: A fallback which maps nibbles. + if ((Byte >= 'a' && Byte <= 'z') || (Byte >= 'A' && Byte <= 'Z') || + (Byte >= '0' && Byte <= '9') || Byte == '_' || Byte == '$') { + Mangler.getStream() << Byte; + } else if (Byte >= '\xe1' && Byte <= '\xfa') { + Mangler.getStream() << '?' << static_cast<char>('a' + (Byte - '\xe1')); + } else if (Byte >= '\xc1' && Byte <= '\xda') { + Mangler.getStream() << '?' << static_cast<char>('A' + (Byte - '\xc1')); + } else { + switch (Byte) { + case ',': + Mangler.getStream() << "?0"; + break; + case '/': + Mangler.getStream() << "?1"; + break; + case '\\': + Mangler.getStream() << "?2"; + break; + case ':': + Mangler.getStream() << "?3"; + break; + case '.': + Mangler.getStream() << "?4"; + break; + case ' ': + Mangler.getStream() << "?5"; + break; + case '\n': + Mangler.getStream() << "?6"; + break; + case '\t': + Mangler.getStream() << "?7"; + break; + case '\'': + Mangler.getStream() << "?8"; + break; + case '-': + Mangler.getStream() << "?9"; + break; + default: + Mangler.getStream() << "?$"; + Mangler.getStream() << static_cast<char>('A' + ((Byte >> 4) & 0xf)); + Mangler.getStream() << static_cast<char>('A' + (Byte & 0xf)); + break; + } + } + }; + + auto MangleChar = [&Mangler, &MangleByte, &SL](uint32_t CodeUnit) { + if (SL->getCharByteWidth() == 1) { + MangleByte(static_cast<char>(CodeUnit)); + } else if (SL->getCharByteWidth() == 2) { + MangleByte(static_cast<char>((CodeUnit >> 16) & 0xff)); + MangleByte(static_cast<char>(CodeUnit & 0xff)); + } else { + llvm_unreachable("unsupported CharByteWidth"); + } + }; + + // Enforce our 32 character max. + unsigned NumCharsToMangle = std::min(32U, SL->getLength()); + for (unsigned i = 0; i < NumCharsToMangle; ++i) + MangleChar(SL->getCodeUnit(i)); + + // Encode the NUL terminator if there is room. + if (NumCharsToMangle < 32) + MangleChar(0); + + Mangler.getStream() << '@'; +} + MicrosoftMangleContext * MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) { return new MicrosoftMangleContextImpl(Context, Diags); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 53e7d56a8d9..c0fbdbecd4e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2572,25 +2572,67 @@ CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { llvm::Constant * CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) { CharUnits Align = getContext().getAlignOfGlobalVarInChars(S->getType()); - if (S->isAscii() || S->isUTF8()) { - SmallString<64> Str(S->getString()); - - // Resize the string to the right size, which is indicated by its type. - const ConstantArrayType *CAT = Context.getAsConstantArrayType(S->getType()); - Str.resize(CAT->getSize().getZExtValue()); - return GetAddrOfConstantString(Str, /*GlobalName*/ 0, Align.getQuantity()); + + llvm::StringMapEntry<llvm::GlobalVariable *> *Entry = nullptr; + llvm::GlobalVariable *GV = nullptr; + if (!LangOpts.WritableStrings) { + llvm::StringMap<llvm::GlobalVariable *> *ConstantStringMap = nullptr; + switch (S->getCharByteWidth()) { + case 1: + ConstantStringMap = &Constant1ByteStringMap; + break; + case 2: + ConstantStringMap = &Constant2ByteStringMap; + break; + case 4: + ConstantStringMap = &Constant4ByteStringMap; + break; + default: + llvm_unreachable("unhandled byte width!"); + } + Entry = &ConstantStringMap->GetOrCreateValue(S->getBytes()); + GV = Entry->getValue(); + } + + if (!GV) { + SmallString<256> MangledNameBuffer; + StringRef GlobalVariableName; + llvm::GlobalValue::LinkageTypes LT; + + // Mangle the string literal if the ABI allows for it. However, we cannot + // do this if we are compiling with ASan or -fwritable-strings because they + // rely on strings having normal linkage. + if (!LangOpts.WritableStrings && !SanOpts.Address && + getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) { + llvm::raw_svector_ostream Out(MangledNameBuffer); + getCXXABI().getMangleContext().mangleStringLiteral(S, Out); + Out.flush(); + + LT = llvm::GlobalValue::LinkOnceODRLinkage; + GlobalVariableName = MangledNameBuffer; + } else { + LT = llvm::GlobalValue::PrivateLinkage;; + GlobalVariableName = ".str"; + } + + // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant); + + llvm::Constant *C = GetConstantArrayFromStringLiteral(S); + GV = new llvm::GlobalVariable( + getModule(), C->getType(), !LangOpts.WritableStrings, LT, C, + GlobalVariableName, /*InsertBefore=*/nullptr, + llvm::GlobalVariable::NotThreadLocal, AddrSpace); + GV->setUnnamedAddr(true); + if (Entry) + Entry->setValue(GV); } - // FIXME: the following does not memoize wide strings. - llvm::Constant *C = GetConstantArrayFromStringLiteral(S); - llvm::GlobalVariable *GV = - new llvm::GlobalVariable(getModule(),C->getType(), - !LangOpts.WritableStrings, - llvm::GlobalValue::PrivateLinkage, - C,".str"); + if (Align.getQuantity() > GV->getAlignment()) + GV->setAlignment(Align.getQuantity()); - GV->setAlignment(Align.getQuantity()); - GV->setUnnamedAddr(true); return GV; } @@ -2615,7 +2657,7 @@ static llvm::GlobalVariable *GenerateStringLiteral(StringRef str, llvm::Constant *C = llvm::ConstantDataArray::getString(CGM.getLLVMContext(), str, false); - // OpenCL v1.1 s6.5.3: a string literal is in the constant address space. + // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. unsigned AddrSpace = 0; if (CGM.getLangOpts().OpenCL) AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); @@ -2654,7 +2696,7 @@ llvm::Constant *CodeGenModule::GetAddrOfConstantString(StringRef Str, return GenerateStringLiteral(Str, false, *this, GlobalName, Alignment); llvm::StringMapEntry<llvm::GlobalVariable *> &Entry = - ConstantStringMap.GetOrCreateValue(Str); + Constant1ByteStringMap.GetOrCreateValue(Str); if (llvm::GlobalVariable *GV = Entry.getValue()) { if (Alignment > GV->getAlignment()) { diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 4bd8b7a3de7..7694eed1a1b 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -319,7 +319,10 @@ class CodeGenModule : public CodeGenTypeCache { llvm::StringMap<llvm::Constant*> AnnotationStrings; llvm::StringMap<llvm::Constant*> CFConstantStringMap; - llvm::StringMap<llvm::GlobalVariable*> ConstantStringMap; + + llvm::StringMap<llvm::GlobalVariable *> Constant1ByteStringMap; + llvm::StringMap<llvm::GlobalVariable *> Constant2ByteStringMap; + llvm::StringMap<llvm::GlobalVariable *> Constant4ByteStringMap; llvm::DenseMap<const Decl*, llvm::Constant *> StaticLocalDeclMap; llvm::DenseMap<const Decl*, llvm::GlobalVariable*> StaticLocalDeclGuardMap; llvm::DenseMap<const Expr*, llvm::Constant *> MaterializedGlobalTemporaryMap; |