6 files changed, 263 insertions, 87 deletions
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 4802cc6e819..db703809f7c 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -146,7 +146,7 @@ Error DbiStream::reload() {
 
   if (ECSubstream.getLength() > 0) {
     BinaryStreamReader ECReader(ECSubstream);
-    if (auto EC = ECNames.load(ECReader))
+    if (auto EC = ECNames.reload(ECReader))
       return EC;
   }
 
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
index f158c5c5386..859295d2c7d 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -338,7 +338,7 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
 }
 
 Expected<PDBStringTable &> PDBFile::getStringTable() {
-  if (!Strings || !PDBStringTableStream) {
+  if (!Strings) {
     auto IS = getPDBInfoStream();
     if (!IS)
       return IS.takeError();
@@ -350,12 +350,13 @@ Expected<PDBStringTable &> PDBFile::getStringTable() {
     if (!NS)
       return NS.takeError();
 
-    BinaryStreamReader Reader(**NS);
     auto N = llvm::make_unique<PDBStringTable>();
-    if (auto EC = N->load(Reader))
+    BinaryStreamReader Reader(**NS);
+    if (auto EC = N->reload(Reader))
       return std::move(EC);
+    assert(Reader.bytesRemaining() == 0);
+    StringTableStream = std::move(*NS);
     Strings = std::move(N);
-    PDBStringTableStream = std::move(*NS);
   }
   return *Strings;
 }
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index 972c995bf4b..4dd965c6907 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -80,9 +80,9 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) {
 }
 
 Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
-  uint32_t PDBStringTableSize = Strings.finalize();
+  uint32_t StringsLen = Strings.calculateSerializedSize();
 
-  if (auto EC = addNamedStream("/names", PDBStringTableSize))
+  if (auto EC = addNamedStream("/names", StringsLen))
     return std::move(EC);
   if (auto EC = addNamedStream("/LinkInfo", 0))
     return std::move(EC);
@@ -109,6 +109,13 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
   return Msf->build();
 }
 
+Expected<uint32_t> PDBFileBuilder::getNamedStreamIndex(StringRef Name) const {
+  uint32_t SN = 0;
+  if (!NamedStreams.get(Name, SN))
+    return llvm::make_error<pdb::RawError>(raw_error_code::no_stream);
+  return SN;
+}
+
 Error PDBFileBuilder::commit(StringRef Filename) {
   auto ExpectedLayout = finalizeMsfLayout();
   if (!ExpectedLayout)
@@ -146,12 +153,12 @@ Error PDBFileBuilder::commit(StringRef Filename) {
       return EC;
   }
 
-  uint32_t PDBStringTableStreamNo = 0;
-  if (!NamedStreams.get("/names", PDBStringTableStreamNo))
-    return llvm::make_error<pdb::RawError>(raw_error_code::no_stream);
+  auto ExpectedSN = getNamedStreamIndex("/names");
+  if (!ExpectedSN)
+    return ExpectedSN.takeError();
 
-  auto NS = WritableMappedBlockStream::createIndexedStream(
-      Layout, Buffer, PDBStringTableStreamNo);
+  auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
+                                                           *ExpectedSN);
   BinaryStreamWriter NSWriter(*NS);
   if (auto EC = Strings.commit(NSWriter))
     return EC;
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
index fd3e69db86f..ee32f61f16f 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -1,5 +1,4 @@
-//===- PDBStringTable.cpp - PDB String Table -----------------------*- C++
-//-*-===//
+//===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,6 +10,7 @@
 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/Hash.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
@@ -21,71 +21,91 @@ using namespace llvm;
 using namespace llvm::support;
 using namespace llvm::pdb;
 
-PDBStringTable::PDBStringTable() {}
-
-Error PDBStringTable::load(BinaryStreamReader &Stream) {
-  ByteSize = Stream.getLength();
+uint32_t PDBStringTable::getByteSize() const { return ByteSize; }
+uint32_t PDBStringTable::getNameCount() const { return NameCount; }
+uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; }
+uint32_t PDBStringTable::getSignature() const { return Header->Signature; }
 
-  const PDBStringTableHeader *H;
-  if (auto EC = Stream.readObject(H))
+Error PDBStringTable::readHeader(BinaryStreamReader &Reader) {
+  if (auto EC = Reader.readObject(Header))
     return EC;
 
-  if (H->Signature != PDBStringTableSignature)
+  if (Header->Signature != PDBStringTableSignature)
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Invalid hash table signature");
-  if (H->HashVersion != 1 && H->HashVersion != 2)
+  if (Header->HashVersion != 1 && Header->HashVersion != 2)
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Unsupported hash version");
 
-  Signature = H->Signature;
-  HashVersion = H->HashVersion;
-  if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize))
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTable::readStrings(BinaryStreamReader &Reader) {
+  if (auto EC = Strings.initialize(Reader)) {
     return joinErrors(std::move(EC),
                       make_error<RawError>(raw_error_code::corrupt_file,
                                            "Invalid hash table byte length"));
+  }
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
 
+Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) {
   const support::ulittle32_t *HashCount;
-  if (auto EC = Stream.readObject(HashCount))
+  if (auto EC = Reader.readObject(HashCount))
     return EC;
 
-  if (auto EC = Stream.readArray(IDs, *HashCount))
+  if (auto EC = Reader.readArray(IDs, *HashCount)) {
     return joinErrors(std::move(EC),
                       make_error<RawError>(raw_error_code::corrupt_file,
                                            "Could not read bucket array"));
+  }
 
-  if (Stream.bytesRemaining() < sizeof(support::ulittle32_t))
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Missing name count");
+  return Error::success();
+}
 
-  if (auto EC = Stream.readInteger(NameCount))
+Error PDBStringTable::readEpilogue(BinaryStreamReader &Reader) {
+  if (auto EC = Reader.readInteger(NameCount))
     return EC;
 
-  if (Stream.bytesRemaining() > 0)
-    return make_error<RawError>(raw_error_code::stream_too_long,
-                                "Unexpected bytes found in string table");
-
+  assert(Reader.bytesRemaining() == 0);
   return Error::success();
 }
 
-uint32_t PDBStringTable::getByteSize() const { return ByteSize; }
+Error PDBStringTable::reload(BinaryStreamReader &Reader) {
+
+  BinaryStreamReader SectionReader;
+
+  std::tie(SectionReader, Reader) = Reader.split(sizeof(PDBStringTableHeader));
+  if (auto EC = readHeader(SectionReader))
+    return EC;
+
+  std::tie(SectionReader, Reader) = Reader.split(Header->ByteSize);
+  if (auto EC = readStrings(SectionReader))
+    return EC;
+
+  // We don't know how long the hash table is until we parse it, so let the
+  // function responsible for doing that figure it out.
+  if (auto EC = readHashTable(Reader))
+    return EC;
+
+  std::tie(SectionReader, Reader) = Reader.split(sizeof(uint32_t));
+  if (auto EC = readEpilogue(SectionReader))
+    return EC;
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
 
 StringRef PDBStringTable::getStringForID(uint32_t ID) const {
-  if (ID == IDs[0])
-    return StringRef();
-
-  // NamesBuffer is a buffer of null terminated strings back to back.  ID is
-  // the starting offset of the string we're looking for.  So just seek into
-  // the desired offset and a read a null terminated stream from that offset.
-  StringRef Result;
-  BinaryStreamReader NameReader(NamesBuffer);
-  NameReader.setOffset(ID);
-  if (auto EC = NameReader.readCString(Result))
-    consumeError(std::move(EC));
-  return Result;
+  return Strings.getString(ID);
 }
 
 uint32_t PDBStringTable::getIDForString(StringRef Str) const {
-  uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str);
+  uint32_t Hash =
+      (Header->HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str);
   size_t Count = IDs.size();
   uint32_t Start = Hash % Count;
   for (size_t I = 0; I < Count; ++I) {
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index 4add6ead212..a472181a489 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -8,31 +8,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
+
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Endian.h"
 
 using namespace llvm;
+using namespace llvm::msf;
 using namespace llvm::support;
 using namespace llvm::support::endian;
 using namespace llvm::pdb;
 
 uint32_t PDBStringTableBuilder::insert(StringRef S) {
-  auto P = Strings.insert({S, StringSize});
-
-  // If a given string didn't exist in the string table, we want to increment
-  // the string table size.
-  if (P.second)
-    StringSize += S.size() + 1; // +1 for '\0'
-  return P.first->second;
-}
-
-uint32_t PDBStringTableBuilder::getStringIndex(StringRef S) {
-  auto Iter = Strings.find(S);
-  assert(Iter != Strings.end());
-  return Iter->second;
+  return Strings.insert(S);
 }
 
 static uint32_t computeBucketCount(uint32_t NumStrings) {
@@ -44,49 +36,52 @@ static uint32_t computeBucketCount(uint32_t NumStrings) {
   return (NumStrings + 1) * 1.25;
 }
 
-uint32_t PDBStringTableBuilder::finalize() {
-  uint32_t Size = 0;
-  Size += sizeof(PDBStringTableHeader);
-  Size += StringSize;
-  Size += sizeof(uint32_t); // Hash table begins with 4-byte size field.
+uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
+  uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
+  Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
 
-  uint32_t BucketCount = computeBucketCount(Strings.size());
-  Size += BucketCount * sizeof(uint32_t);
+  return Size;
+}
 
-  Size +=
-      sizeof(uint32_t); // The /names stream ends with the number of strings.
+uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
+  uint32_t Size = 0;
+  Size += sizeof(PDBStringTableHeader);
+  Size += Strings.calculateSerializedSize();
+  Size += calculateHashTableSize();
+  Size += sizeof(uint32_t); // The /names stream ends with the string count.
   return Size;
 }
 
-Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
+Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
   // Write a header
   PDBStringTableHeader H;
   H.Signature = PDBStringTableSignature;
   H.HashVersion = 1;
-  H.ByteSize = StringSize;
+  H.ByteSize = Strings.calculateSerializedSize();
   if (auto EC = Writer.writeObject(H))
     return EC;
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
 
-  // Write a string table.
-  uint32_t StringStart = Writer.getOffset();
-  for (auto Pair : Strings) {
-    StringRef S = Pair.first;
-    uint32_t Offset = Pair.second;
-    Writer.setOffset(StringStart + Offset);
-    if (auto EC = Writer.writeCString(S))
-      return EC;
-  }
-  Writer.setOffset(StringStart + StringSize);
+Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
+  if (auto EC = Strings.commit(Writer))
+    return EC;
+
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
 
+Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
   // Write a hash table.
   uint32_t BucketCount = computeBucketCount(Strings.size());
   if (auto EC = Writer.writeInteger(BucketCount))
     return EC;
   std::vector<ulittle32_t> Buckets(BucketCount);
 
-  for (auto Pair : Strings) {
-    StringRef S = Pair.first;
-    uint32_t Offset = Pair.second;
+  for (auto &Pair : Strings) {
+    StringRef S = Pair.getKey();
+    uint32_t Offset = Pair.getValue();
     uint32_t Hash = hashStringV1(S);
 
     for (uint32_t I = 0; I != BucketCount; ++I) {
@@ -102,7 +97,37 @@ Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
 
   if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
     return EC;
-  if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size())))
+
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
+  if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
+    return EC;
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
+  BinaryStreamWriter SectionWriter;
+
+  std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
+  if (auto EC = writeHeader(SectionWriter))
+    return EC;
+
+  std::tie(SectionWriter, Writer) =
+      Writer.split(Strings.calculateSerializedSize());
+  if (auto EC = writeStrings(SectionWriter))
     return EC;
+
+  std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
+  if (auto EC = writeHashTable(SectionWriter))
+    return EC;
+
+  std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
+  if (auto EC = writeEpilogue(SectionWriter))
+    return EC;
+
   return Error::success();
 }
diff --git a/llvm/lib/DebugInfo/PDB/Native/StringTableStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/StringTableStreamBuilder.cpp
new file mode 100644
index 00000000000..b19440961bc
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Native/StringTableStreamBuilder.cpp
@@ -0,0 +1,123 @@
+//===- StringTableStreamBuilder.cpp - PDB String Table ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/StringTableStreamBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace llvm::support::endian;
+using namespace llvm::pdb;
+
+uint32_t StringTableStreamBuilder::insert(StringRef S) {
+  return Strings.insert(S);
+}
+
+static uint32_t computeBucketCount(uint32_t NumStrings) {
+  // The /names stream is basically an on-disk open-addressing hash table.
+  // Hash collisions are resolved by linear probing. We cannot make
+  // utilization 100% because it will make the linear probing extremely
+  // slow. But lower utilization wastes disk space. As a reasonable
+  // load factor, we choose 80%. We need +1 because slot 0 is reserved.
+  return (NumStrings + 1) * 1.25;
+}
+
+uint32_t StringTableStreamBuilder::hashTableSize() const {
+  uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
+
+  Size += computeBucketCount(Strings.size()) * sizeof(uint32_t);
+  return Size;
+}
+
+uint32_t StringTableStreamBuilder::calculateSerializedSize() const {
+  uint32_t Size = 0;
+  Size += sizeof(StringTableHeader);
+  Size += Strings.calculateSerializedSize();
+  Size += hashTableSize();
+  Size += sizeof(uint32_t); // The table ends with the number of strings.
+  return Size;
+}
+
+Error StringTableStreamBuilder::writeHeader(BinaryStreamWriter &Writer) const {
+  // Write a header
+  StringTableHeader H;
+  H.Signature = StringTableSignature;
+  H.HashVersion = 1;
+  H.ByteSize = Strings.calculateSerializedSize();
+  if (auto EC = Writer.writeObject(H))
+    return EC;
+
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error StringTableStreamBuilder::writeStrings(BinaryStreamWriter &Writer) const {
+  if (auto EC = Strings.commit(Writer))
+    return EC;
+
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error StringTableStreamBuilder::writeHashTable(
+    BinaryStreamWriter &Writer) const {
+  // Write a hash table.
+  uint32_t BucketCount = computeBucketCount(Strings.size());
+  if (auto EC = Writer.writeInteger(BucketCount))
+    return EC;
+
+  std::vector<ulittle32_t> Buckets(BucketCount);
+
+  for (auto &Pair : Strings) {
+    StringRef S = Pair.getKey();
+    uint32_t Offset = Pair.getValue();
+    uint32_t Hash = hashStringV1(S);
+
+    for (uint32_t I = 0; I != BucketCount; ++I) {
+      uint32_t Slot = (Hash + I) % BucketCount;
+      if (Slot == 0)
+        continue; // Skip reserved slot
+      if (Buckets[Slot] != 0)
+        continue;
+      Buckets[Slot] = Offset;
+      break;
+    }
+  }
+
+  if (auto EC = Writer.writeArray(makeArrayRef(Buckets)))
+    return EC;
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error StringTableStreamBuilder::commit(BinaryStreamWriter &Writer) const {
+  BinaryStreamWriter Section;
+
+  std::tie(Section, Writer) = Writer.split(sizeof(StringTableHeader));
+  if (auto EC = writeHeader(Section))
+    return EC;
+
+  std::tie(Section, Writer) = Writer.split(Strings.calculateSerializedSize());
+  if (auto EC = writeStrings(Section))
+    return EC;
+
+  std::tie(Section, Writer) = Writer.split(hashTableSize());
+  if (auto EC = writeHashTable(Section))
+    return EC;
+
+  if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
+    return EC;
+
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}