diff options
author | Zachary Turner <zturner@google.com> | 2018-07-06 21:01:42 +0000 |
---|---|---|
committer | Zachary Turner <zturner@google.com> | 2018-07-06 21:01:42 +0000 |
commit | 648bebdc67f6dffa7de4b7bfe028498357ebf74d (patch) | |
tree | 61124208eab6f417f30602de6fb94b06ee47bb45 /llvm/lib | |
parent | 975c711358a1b91d9feba2200d1d85c2cc8cbd87 (diff) | |
download | bcm5719-llvm-648bebdc67f6dffa7de4b7bfe028498357ebf74d.tar.gz bcm5719-llvm-648bebdc67f6dffa7de4b7bfe028498357ebf74d.zip |
[PDB] One more fix for hasing GSI records.
The reference implementation uses a case-insensitive string
comparison for strings of equal length. This will cause the
string "tEo" to compare less than "VUo". However we were using
a case sensitive comparison, which would generate the opposite
outcome. Switch to a case insensitive comparison. Also, when
one of the strings contains non-ascii characters, fallback to
a straight memcmp.
The only way to really test this is with a DIA test. Before this
patch, the test will fail (but succeed if link.exe is used instead
of lld-link). After the patch, it succeeds even with lld-link.
llvm-svn: 336464
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp | 35 |
1 files changed, 27 insertions, 8 deletions
diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index eaea24a05f2..c8437d2ef9e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -82,6 +82,26 @@ Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) { return Error::success(); } +static bool isAsciiString(StringRef S) { + return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; }); +} + +// See `caseInsensitiveComparePchPchCchCch` in gsi.cpp +static bool gsiRecordLess(StringRef S1, StringRef S2) { + size_t LS = S1.size(); + size_t RS = S2.size(); + // Shorter strings always compare less than longer strings. + if (LS != RS) + return LS < RS; + + // If either string contains non ascii characters, memcmp them. + if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2))) + return memcmp(S1.data(), S2.data(), LS) < 0; + + // Both strings are ascii, use memicmp. + return memicmp(S1.data(), S2.data(), LS) < 0; +} + void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1> TmpBuckets; @@ -118,17 +138,16 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); HashBuckets.push_back(ChainStartOff); - // Sort each bucket by memcmp of the symbol's name. + // Sort each bucket by memcmp of the symbol's name. It's important that + // we use the same sorting algorithm as is used by the reference + // implementation to ensure that the search for a record within a bucket + // can properly early-out when it detects the record won't be found. The + // algorithm used here corredsponds to the function + // caseInsensitiveComparePchPchCchCch in the reference implementation. std::sort(Bucket.begin(), Bucket.end(), [](const std::pair<StringRef, PSHashRecord> &Left, const std::pair<StringRef, PSHashRecord> &Right) { - size_t LS = Left.first.size(); - size_t RS = Right.first.size(); - if (LS < RS) - return true; - if (LS > RS) - return false; - return Left.first < Right.first; + return gsiRecordLess(Left.first, Right.first); }); for (const auto &Entry : Bucket) |