diff options
Diffstat (limited to 'clang/lib/AST/MicrosoftMangle.cpp')
-rw-r--r-- | clang/lib/AST/MicrosoftMangle.cpp | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index d982df04fe9..9dfd2af8bb6 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -20,6 +20,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/VTableBuilder.h" #include "clang/Basic/ABI.h" @@ -27,6 +28,7 @@ #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/MathExtras.h" using namespace clang; @@ -93,6 +95,7 @@ public: MicrosoftMangleContextImpl(ASTContext &Context, DiagnosticsEngine &Diags) : MicrosoftMangleContext(Context, Diags) {} bool shouldMangleCXXName(const NamedDecl *D) override; + bool shouldMangleStringLiteral(const StringLiteral *SL) override; void mangleCXXName(const NamedDecl *D, raw_ostream &Out) override; void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD, raw_ostream &) override; void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, @@ -118,6 +121,7 @@ public: void mangleDynamicInitializer(const VarDecl *D, raw_ostream &Out) override; void mangleDynamicAtExitDestructor(const VarDecl *D, raw_ostream &Out) override; + void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) override; bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { // Lambda closure types are already numbered. if (isLambda(ND)) @@ -321,6 +325,13 @@ bool MicrosoftMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) { return true; } +bool +MicrosoftMangleContextImpl::shouldMangleStringLiteral(const StringLiteral *SL) { + return SL->isAscii() || SL->isWide(); + // TODO: This needs to be updated when MSVC gains support for Unicode + // literals. +} + void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) { // MSVC doesn't mangle C++ names the same way it mangles extern "C" names. @@ -2315,6 +2326,169 @@ MicrosoftMangleContextImpl::mangleDynamicAtExitDestructor(const VarDecl *D, mangleInitFiniStub(D, Out, 'F'); } +void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL, + raw_ostream &Out) { + // <char-type> ::= 0 # char + // ::= 1 # wchar_t + // ::= ??? # char16_t/char32_t will need a mangling too... + // + // <literal-length> ::= <non-negative integer> # the length of the literal + // + // <encoded-crc> ::= <hex digit>+ @ # crc of the literal including + // # null-terminator + // + // <encoded-string> ::= <simple character> # uninteresting character + // ::= '?$' <hex digit> <hex digit> # these two nibbles + // # encode the byte for the + // # character + // ::= '?' [a-z] # \xe1 - \xfa + // ::= '?' [A-Z] # \xc1 - \xda + // ::= '?' [0-9] # [,/\:. \n\t'-] + // + // <literal> ::= '??_C@_' <char-type> <literal-length> <encoded-crc> + // <encoded-string> '@' + MicrosoftCXXNameMangler Mangler(*this, Out); + Mangler.getStream() << "\01??_C@_"; + + // <char-type>: The "kind" of string literal is encoded into the mangled name. + // TODO: This needs to be updated when MSVC gains support for unicode + // literals. + if (SL->isAscii()) + Mangler.getStream() << '0'; + else if (SL->isWide()) + Mangler.getStream() << '1'; + else + llvm_unreachable("unexpected string literal kind!"); + + // <literal-length>: The next part of the mangled name consists of the length + // of the string. + // The StringLiteral does not consider the NUL terminator byte(s) but the + // mangling does. + // N.B. The length is in terms of bytes, not characters. + Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth()); + + // We will use the "Rocksoft^tm Model CRC Algorithm" to describe the + // properties of our CRC: + // Width : 32 + // Poly : 04C11DB7 + // Init : FFFFFFFF + // RefIn : True + // RefOut : True + // XorOut : 00000000 + // Check : 340BC6D9 + uint32_t CRC = 0xFFFFFFFFU; + + auto UpdateCRC = [&CRC](char Byte) { + for (unsigned i = 0; i < 8; ++i) { + bool Bit = CRC & 0x80000000U; + if (Byte & (1U << i)) + Bit = !Bit; + CRC <<= 1; + if (Bit) + CRC ^= 0x04C11DB7U; + } + }; + + // CRC all the bytes of the StringLiteral. + for (char Byte : SL->getBytes()) + UpdateCRC(Byte); + + // The NUL terminator byte(s) were not present earlier, + // we need to manually process those bytes into the CRC. + for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth(); + ++NullTerminator) + UpdateCRC('\x00'); + + // The literature refers to the process of reversing the bits in the final CRC + // output as "reflection". + CRC = llvm::reverseBits(CRC); + + // <encoded-crc>: The CRC is encoded utilizing the standard number mangling + // scheme. + Mangler.mangleNumber(CRC); + + // <encoded-crc>: The mangled name also contains the first 32 _characters_ + // (including null-terminator bytes) of the StringLiteral. + // Each character is encoded by splitting them into bytes and then encoding + // the constituent bytes. + auto MangleByte = [&Mangler](char Byte) { + // There are five different manglings for characters: + // - [a-zA-Z0-9_$]: A one-to-one mapping. + // - ?[a-z]: The range from \xe1 to \xfa. + // - ?[A-Z]: The range from \xc1 to \xda. + // - ?[0-9]: The set of [,/\:. \n\t'-]. + // - ?$XX: A fallback which maps nibbles. + if ((Byte >= 'a' && Byte <= 'z') || (Byte >= 'A' && Byte <= 'Z') || + (Byte >= '0' && Byte <= '9') || Byte == '_' || Byte == '$') { + Mangler.getStream() << Byte; + } else if (Byte >= '\xe1' && Byte <= '\xfa') { + Mangler.getStream() << '?' << static_cast<char>('a' + (Byte - '\xe1')); + } else if (Byte >= '\xc1' && Byte <= '\xda') { + Mangler.getStream() << '?' << static_cast<char>('A' + (Byte - '\xc1')); + } else { + switch (Byte) { + case ',': + Mangler.getStream() << "?0"; + break; + case '/': + Mangler.getStream() << "?1"; + break; + case '\\': + Mangler.getStream() << "?2"; + break; + case ':': + Mangler.getStream() << "?3"; + break; + case '.': + Mangler.getStream() << "?4"; + break; + case ' ': + Mangler.getStream() << "?5"; + break; + case '\n': + Mangler.getStream() << "?6"; + break; + case '\t': + Mangler.getStream() << "?7"; + break; + case '\'': + Mangler.getStream() << "?8"; + break; + case '-': + Mangler.getStream() << "?9"; + break; + default: + Mangler.getStream() << "?$"; + Mangler.getStream() << static_cast<char>('A' + ((Byte >> 4) & 0xf)); + Mangler.getStream() << static_cast<char>('A' + (Byte & 0xf)); + break; + } + } + }; + + auto MangleChar = [&Mangler, &MangleByte, &SL](uint32_t CodeUnit) { + if (SL->getCharByteWidth() == 1) { + MangleByte(static_cast<char>(CodeUnit)); + } else if (SL->getCharByteWidth() == 2) { + MangleByte(static_cast<char>((CodeUnit >> 16) & 0xff)); + MangleByte(static_cast<char>(CodeUnit & 0xff)); + } else { + llvm_unreachable("unsupported CharByteWidth"); + } + }; + + // Enforce our 32 character max. + unsigned NumCharsToMangle = std::min(32U, SL->getLength()); + for (unsigned i = 0; i < NumCharsToMangle; ++i) + MangleChar(SL->getCodeUnit(i)); + + // Encode the NUL terminator if there is room. + if (NumCharsToMangle < 32) + MangleChar(0); + + Mangler.getStream() << '@'; +} + MicrosoftMangleContext * MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) { return new MicrosoftMangleContextImpl(Context, Diags); |