lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298

//===-- HashedNameToDIE.cpp -------------------------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "HashedNameToDIE.h"
#include "lldb/Core/ConstString.h"
#include "lldb/Core/DataExtractor.h"
#include "lldb/Core/Stream.h"
#include "lldb/Core/StreamString.h"
#include "lldb/Core/RegularExpression.h"
#include "lldb/Symbol/ObjectFile.h"

#include "DWARFCompileUnit.h"
#include "DWARFDebugInfo.h"
#include "DWARFDebugInfoEntry.h"
#include "DWARFDefines.h"
#include "SymbolFileDWARF.h"
using namespace lldb;
using namespace lldb_private;

static uint32_t
dl_new_hash (const char *s)
{
    uint32_t h = 5381;
    
    for (unsigned char c = *s; c; c = *++s)
        h = ((h << 5) + h) + c;
    
    return h;
}


void
HashedNameToDIE::Header::Dump (Stream &s)
{
    s.Printf ("header.magic              = 0x%8.8x", magic);
    s.Printf ("header.version            = 0x%4.4x", version);
    s.Printf ("header.addr_bytesize      = 0x%2.2x", addr_bytesize);
    s.Printf ("header.hash_function      = 0x%2.2x", hash_function);
    s.Printf ("header.bucket_count       = 0x%8.8x %u", bucket_count, bucket_count);
    s.Printf ("header.hashes_count       = 0x%8.8x %u", hashes_count, hashes_count);
    s.Printf ("header.prologue_length    = 0x%8.8x %u", prologue_length, prologue_length);
}

uint32_t
HashedNameToDIE::Header::Read (const DataExtractor &data, uint32_t offset)
{
    magic = data.GetU32 (&offset);
    if (magic != HASH_MAGIC)
    {
        // Magic bytes didn't match
        version = 0;
        return UINT32_MAX;
    }
    
    version = data.GetU16 (&offset);
    if (version != 1)
    {
        // Unsupported version
        return UINT32_MAX;
    }
    addr_bytesize       = data.GetU8  (&offset);
    hash_function       = data.GetU8  (&offset);
    bucket_count        = data.GetU32 (&offset);
    hashes_count        = data.GetU32 (&offset);
    prologue_length     = data.GetU32 (&offset);
    return offset;
}

void
HashedNameToDIE::DWARF::Header::Dump (Stream &s)
{
    HashedNameToDIE::Header::Dump (s);
    dwarf_prologue.Dump (s);
}

uint32_t
HashedNameToDIE::DWARF::Header::Read (const DataExtractor &data, uint32_t offset)
{
    offset = HashedNameToDIE::Header::Read (data, offset);
    if (offset != UINT32_MAX)
        offset = dwarf_prologue.Read (data, offset);
    else
        dwarf_prologue.Clear();
    return offset;
}

void
HashedNameToDIE::DWARF::Prologue::Dump (Stream &s)
{
    s.Printf ("dwarf_prologue.die_base_offset    = 0x%8.8x\n", die_base_offset);
    const size_t num_atoms = atoms.size();
    for (size_t i = 0; i < num_atoms; ++i)
    {
        s.Printf ("dwarf_prologue.atom[%zi] = %17s %s\n", 
                  i, 
                  GetAtomTypeName (atoms[i].type), 
                  DW_FORM_value_to_name(atoms[i].form));
    }
}

uint32_t
HashedNameToDIE::DWARF::Prologue::Read (const DataExtractor &data, uint32_t offset)
{
    Clear();
    die_base_offset = data.GetU32 (&offset);
    Atom atom;
    while (offset != UINT32_MAX)
    {
        atom.type = data.GetU16 (&offset);
        atom.form = data.GetU16 (&offset);
        if (atom.type == eAtomTypeNULL)
            break;
        atoms.push_back(atom);
    }
    return offset;
}


HashedNameToDIE::MemoryTable::MemoryTable (SymbolFileDWARF *dwarf, 
                                           const lldb_private::DataExtractor &data,
                                           bool is_apple_names) :
    m_data (data),
    m_string_table (dwarf->get_debug_str_data ()),
    m_is_apple_names (is_apple_names),
    m_header  ()
{
}

bool
HashedNameToDIE::MemoryTable::Initialize ()
{
    uint32_t offset = 0;
    offset = m_header.Read (m_data, offset);
    return m_header.version == 1;
}


size_t
HashedNameToDIE::MemoryTable::Find (const char *name_cstr, DIEArray &die_ofsets) const
{
    if (m_header.version == 1)
    {
        if (name_cstr && name_cstr[0])
        {
            // Hash the C string
            const uint32_t name_hash = dl_new_hash (name_cstr);
            
            const uint32_t bucket_count = m_header.bucket_count;
            const uint32_t hashes_count = m_header.bucket_count;
            // Find the correct bucket for the using the hash value
            const uint32_t bucket_idx = name_hash % bucket_count;
            
            // Calculate the offset for the bucket entry for the bucket index
            uint32_t offset = GetOffsetOfBucketEntry (bucket_idx);
            
            // Extract the bucket entry which is a hash index. If the hash index
            // is UINT32_MAX, then the bucket is empty. If it isn't, it is the
            // index of the hash in the hashes array. We will then iterate through
            // all hashes as long as they match "bucket_idx" which was calculated
            // above
            uint32_t hash_idx = m_data.GetU32 (&offset);
            if (hash_idx != UINT32_MAX)
            {
                uint32_t hash_offset = GetOffsetOfHashValue (hash_idx);
                
                const size_t initial_size = die_ofsets.size();
                uint32_t hash;
                while (((hash = m_data.GetU32 (&hash_offset)) % bucket_count) == bucket_idx)
                {
                    if (hash_idx >= hashes_count)
                        break;
                    
                    if (hash == name_hash)
                    {
                        // The hash matches, but we still need to verify that the
                        // C string matches in case we have a hash collision. Figure
                        // out the offset for the data associated with this hash entry
                        offset = GetOffsetOfHashDataOffset (hash_idx);
                        uint32_t hash_data_offset = m_data.GetU32 (&offset);
                        uint32_t str_offset;
                        // Now we have the offset to the data for all strings that match
                        // our 32 bit hash. The format of the hash bucket is:
                        //
                        // uint32_t stroff;     // string offset in .debug_str table
                        // uint32_t num_dies;   // Number of DIEs in debug info that match the string that follow this
                        // uint32_t die_offsets[num_dies]; // An array of DIE offsets
                        //
                        // When a "stroff" is read and it is zero, then the data for this
                        // hash is terminated.
                        while ((str_offset = m_data.GetU32 (&hash_data_offset)) != 0)
                        {
                            // Extract the C string and comapare it
                            const char *cstr_name = m_string_table.PeekCStr(str_offset);
                            if (cstr_name)
                            {
                                if (strcmp(name_cstr, cstr_name) == 0)
                                {
                                    // We have a match, now extract the DIE count
                                    const uint32_t die_count = m_data.GetU32 (&hash_data_offset);
                                    // Now extract "die_count" DIE offsets and put them into the
                                    // results
                                    for (uint32_t die_idx = 0; die_idx < die_count; ++die_idx)
                                        die_ofsets.push_back(m_data.GetU32 (&hash_data_offset));
                                }
                            }
                        }
                    }
                    ++hash_idx;
                }
                
                return die_ofsets.size() - initial_size;
            }
        }
    }
    return 0;
}

void
HashedNameToDIE::MemoryTable::Dump (Stream &s)
{
    if (m_header.version == 1)
    {        
        if (m_is_apple_names)
            s.PutCString (".apple_names contents:\n");
        else
            s.PutCString (".apple_types contents:\n");
        
        m_header.Dump (s);
        uint32_t empty_bucket_count = 0;
        uint32_t hash_collisions = 0;
        uint32_t hash_idx_offset = GetOffsetOfBucketEntry (0);
        const uint32_t bucket_count = m_header.bucket_count;
        const uint32_t hashes_count = m_header.hashes_count;
        for (uint32_t bucket_idx=0; bucket_idx<bucket_count; ++bucket_idx)
        {
            uint32_t hash_idx = m_data.GetU32 (&hash_idx_offset);
            s.Printf("bucket[%u] ", bucket_idx);

            if (hash_idx != UINT32_MAX)
            {                
                s.Printf(" => hash[%u]\n", hash_idx);
                
                uint32_t hash_offset = GetOffsetOfHashValue (hash_idx);
                uint32_t data_offset = GetOffsetOfHashDataOffset (hash_idx);
                
                uint32_t hash;
                while (((hash = m_data.GetU32 (&hash_offset)) % bucket_count) == bucket_idx)
                {
                    if (hash_idx >= hashes_count)
                        break;
                    
                    uint32_t hash_data_offset = m_data.GetU32 (&data_offset);
                    s.Printf("  hash[%u] = 0x%8.8x\n", hash_idx, hash);

                    uint32_t string_count = 0;
                    uint32_t strp_offset;
                    while ((strp_offset = m_data.GetU32 (&hash_data_offset)) != 0)
                    {
                        const uint32_t num_die_offsets = m_data.GetU32 (&hash_data_offset);
                        s.Printf("   str[%u] = 0x%8.8x \"%s\", dies[%u] = {", 
                                 string_count, 
                                 strp_offset, 
                                 m_string_table.PeekCStr(strp_offset),
                                 num_die_offsets);
                        ++string_count;
                        
                        for (uint32_t die_idx=0; die_idx<num_die_offsets; ++die_idx)
                        {
                            const uint32_t die_offset = m_data.GetU32 (&hash_data_offset);
                            s.Printf(" 0x%8.8x", die_offset);
                        }
                        s.PutCString (" }\n");
                    }
                    if (string_count > 1)
                        ++hash_collisions;
                }
            }
            else
            {
                s.PutCString(" EMPTY\n");
                ++empty_bucket_count;
            }
            s.EOL();
        }
        s.EOL();
        s.Printf ("%u of %u buckets empty (%2.1f%%)\n",  empty_bucket_count, bucket_count, (((float)empty_bucket_count/(float)m_header.bucket_count)*100.0f));
        s.Printf ("Average hashes/non-empty bucket = %2.1f%%\n", ((float)m_header.hashes_count/(float)(m_header.bucket_count - empty_bucket_count)));
        s.Printf ("Hash collisions = %u\n", hash_collisions);
    }
}