From 6e47204b0cf3e63e38d94a1d93e36e8ed65d0ed0 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@apple.com>
Date: Mon, 7 Nov 2011 09:18:42 +0000
Subject: Add a new dwarf accelerator table prototype with the goal of
 replacing the pubnames and pubtypes tables. LLDB can currently use this
 format and a full spec is forthcoming and submission for standardization is
 planned.

A basic summary:

The dwarf accelerator tables are an indirect hash table optimized
for null lookup rather than access to known data. They are output into
an on-disk format that looks like this:

.-------------.
|  HEADER     |
|-------------|
|  BUCKETS    |
|-------------|
|  HASHES     |
|-------------|
|  OFFSETS    |
|-------------|
|  DATA       |
`-------------'

where the header contains a magic number, version, type of hash function,
the number of buckets, total number of hashes, and room for a special
struct of data and the length of that struct.

The buckets contain an index (e.g. 6) into the hashes array. The hashes
section contains all of the 32-bit hash values in contiguous memory, and
the offsets contain the offset into the data area for the particular
hash.

For a lookup example, we could hash a function name and take it modulo the
number of buckets giving us our bucket. From there we take the bucket value
as an index into the hashes table and look at each successive hash as long
as the hash value is still the same modulo result (bucket value) as earlier.
If we have a match we look at that same entry in the offsets table and
grab the offset in the data for our final match.

llvm-svn: 143921
---
 llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 250 ++++++++++++++++++++++++
 1 file changed, 250 insertions(+)
 create mode 100644 llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp

(limited to 'llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp')

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 00000000000..b7c8c6ee47c
--- /dev/null
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,250 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "DIE.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+  switch (AT) {
+  default: llvm_unreachable("invalid AtomType!");
+  case eAtomTypeNULL: return "eAtomTypeNULL";
+  case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+  case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+  case eAtomTypeTag: return "eAtomTypeTag";
+  case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+  case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+  } 
+}
+
+// The general case would need to have a less hard coded size for the
+// length of the HeaderData, however, if we're constructing based on a
+// single Atom then we know it will always be: 4 + 4 + 2 + 2.
+DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) :
+  Header(12),
+  HeaderData(atom) {
+}
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die) {
+  // If the string is in the list already then add this die to the list
+  // otherwise add a new one.
+  DIEArray &DIEs = Entries[Name];
+  DIEs.push_back(die);
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+  // First get the number of unique hashes.
+  std::vector<uint32_t> uniques;
+  uniques.resize(Data.size());
+  for (size_t i = 0; i < Data.size(); ++i)
+    uniques[i] = Data[i]->HashValue;
+  std::sort(uniques.begin(), uniques.end());
+  std::vector<uint32_t>::iterator p =
+    std::unique(uniques.begin(), uniques.end());
+  uint32_t num = std::distance(uniques.begin(), p);
+
+  // Then compute the bucket size, minimum of 1 bucket.
+  if (num > 1024) Header.bucket_count = num/4;
+  if (num > 16) Header.bucket_count = num/2;
+  else Header.bucket_count = num > 0 ? num : 1;
+
+  Header.hashes_count = num;
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+  // Create the individual hash data outputs.
+  for (StringMap<DIEArray>::const_iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+    struct HashData *Entry = new HashData((*EI).getKeyData());
+    for (DIEArray::const_iterator DI = (*EI).second.begin(),
+           DE = (*EI).second.end();
+         DI != DE; ++DI)
+      Entry->addOffset((*DI)->getOffset());
+    Data.push_back(Entry);
+  }
+
+  // Figure out how many buckets we need, then compute the bucket
+  // contents and the final ordering. We'll emit the hashes and offsets
+  // by doing a walk during the emission phase. We add temporary
+  // symbols to the data so that we can reference them during the offset
+  // later, we'll emit them when we emit the data.
+  ComputeBucketCount();
+
+  // Compute bucket contents and final ordering.
+  Buckets.resize(Header.bucket_count);
+  for (size_t i = 0; i < Data.size(); ++i) {
+    uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+    Buckets[bucket].push_back(Data[i]);
+    Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+  }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+  Asm->OutStreamer.AddComment("Header Magic");
+  Asm->EmitInt32(Header.magic);
+  Asm->OutStreamer.AddComment("Header Version");
+  Asm->EmitInt16(Header.version);
+  Asm->OutStreamer.AddComment("Header Hash Function");
+  Asm->EmitInt16(Header.hash_function);
+  Asm->OutStreamer.AddComment("Header Bucket Count");
+  Asm->EmitInt32(Header.bucket_count);
+  Asm->OutStreamer.AddComment("Header Hash Count");
+  Asm->EmitInt32(Header.hashes_count);
+  Asm->OutStreamer.AddComment("Header Data Length");
+  Asm->EmitInt32(Header.header_data_len);
+  Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+  Asm->EmitInt32(HeaderData.die_offset_base);
+  Asm->OutStreamer.AddComment("HeaderData Atom Count");
+  Asm->EmitInt32(HeaderData.Atoms.size());
+  for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+    Atom A = HeaderData.Atoms[i];
+    Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+    Asm->EmitInt16(A.type);
+    Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+    Asm->EmitInt16(A.form);
+  }
+}
+
+// Walk through and emit the buckets for the table. This will look
+// like a list of numbers of how many elements are in each bucket.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+  unsigned index = 0;
+  for (size_t i = 0; i < Buckets.size(); ++i) {
+    Twine Comment = Twine("Bucket ") + Twine(i);
+    Asm->OutStreamer.AddComment(Comment);
+    if (Buckets[i].size() != 0)
+      Asm->EmitInt32(index);
+    else
+      Asm->EmitInt32(UINT32_MAX);
+    index += Buckets[i].size();
+  }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+  for (size_t i = 0; i < Buckets.size(); ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      Twine Comment = Twine("Hash in Bucket ") + Twine(i);
+      Asm->OutStreamer.AddComment(Comment);
+      Asm->EmitInt32((*HI)->HashValue);
+    } 
+  }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+  for (size_t i = 0; i < Buckets.size(); ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      Twine Comment = Twine("Offset in Bucket ") + Twine(i);
+      Asm->OutStreamer.AddComment(Comment);
+      MCContext &Context = Asm->OutStreamer.getContext();
+      const MCExpr *Sub =
+        MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+                                MCSymbolRefExpr::Create(SecBegin, Context),
+                                Context);
+      Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+    }
+  }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+  uint64_t PrevHash = UINT64_MAX;
+  for (size_t i = 0; i < Buckets.size(); ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      // Remember to emit the label for our offset.
+      Asm->OutStreamer.EmitLabel((*HI)->Sym);
+      Asm->OutStreamer.AddComment((*HI)->Str);
+      Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+                             D->getDwarfStrSectionSym());
+      Asm->OutStreamer.AddComment("Num DIEs");
+      Asm->EmitInt32((*HI)->DIEOffsets.size());
+      for (std::vector<uint32_t>::const_iterator
+             DI = (*HI)->DIEOffsets.begin(), DE = (*HI)->DIEOffsets.end();
+           DI != DE; ++DI) {
+        Asm->EmitInt32((*DI));
+      }
+      // Emit a 0 to terminate the data unless we have a hash collision.
+      if (PrevHash != (*HI)->HashValue)
+        Asm->EmitInt32(0);
+      PrevHash = (*HI)->HashValue;
+    }
+  }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+                           DwarfDebug *D) {
+  // Emit the header.
+  EmitHeader(Asm);
+
+  // Emit the buckets.
+  EmitBuckets(Asm);
+
+  // Emit the hashes.
+  EmitHashes(Asm);
+
+  // Emit the offsets.
+  EmitOffsets(Asm, SecBegin);
+
+  // Emit the hash data.
+  EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+  Header.print(O);
+  HeaderData.print(O);
+
+  O << "Entries: \n";
+  for (StringMap<DIEArray>::const_iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+    O << "Name: " << (*EI).getKeyData() << "\n";
+    for (DIEArray::const_iterator DI = (*EI).second.begin(),
+           DE = (*EI).second.end();
+         DI != DE; ++DI)
+      (*DI)->print(O);
+  }
+
+  O << "Buckets and Hashes: \n";
+  for (size_t i = 0; i < Buckets.size(); ++i)
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI)
+      (*HI)->print(O);
+
+  O << "Data: \n";
+    for (std::vector<HashData*>::const_iterator
+           DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+      (*DI)->print(O);
+  
+
+}
+#endif
-- 
cgit v1.2.3