lld/lib/ReaderWriter/Native/NativeFileFormat.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257

//===- lib/ReaderWriter/Native/NativeFileFormat.h -------------------------===//
//
//                             The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#ifndef LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H
#define LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H

#include "llvm/Support/DataTypes.h"
#include <cstdint>

namespace lld {

//
// Overview:
//
// The number one design goal of this file format is enable the linker to
// read object files into in-memory Atom objects extremely quickly.
// The second design goal is to enable future modifications to the
// Atom attribute model.
//
// The llvm native object file format is not like traditional object file
// formats (e.g. ELF, COFF, mach-o).  There is no symbol table and no
// sections.  Instead the file is essentially an array of archived Atoms.
// It is *not* serialized Atoms which would require deserialization into
// in memory objects.  Instead it is an array of read-only info about each
// Atom.  The NativeReader bulk creates in-memory Atoms which just have
// an ivar which points to the read-only info for that Atom. No additional
// processing is done to construct the in-memory Atoms. All Atom attribute
// getter methods are virtual calls which dig up the info they need from the
// ivar data.
//
// To support the gradual evolution of Atom attributes, the Atom read-only
// data is versioned. The NativeReader chooses which in-memory Atom class
// to use based on the version. What this means is that if new attributes
// are added (or changed) in the Atom model, a new native atom class and
// read-only atom info struct needs to be defined.  Then, all the existing
// native reader atom classes need to be modified to do their best effort
// to map their old style read-only data to the new Atom model.  At some point
// some classes to support old versions may be dropped.
//
//
// Details:
//
// The native object file format consists of a header that specifies the
// endianness of the file and the architecture along with a list of "chunks"
// in the file.  A Chunk is simply a tagged range of the file.  There is
// one chunk for the array of atom infos.  There is another chunk for the
// string pool, and another for the content pool.
//
// It turns out there most atoms have very similar sets of attributes, only
// the name and content attribute vary. To exploit this fact to reduce the file
// size, the atom read-only info contains just the name and content info plus
// a reference to which attribute set it uses.  The attribute sets are stored
// in another chunk.
//


//
// An entry in the NativeFileHeader that describes one chunk of the file.
//
struct NativeChunk {
  uint32_t    signature;
  uint32_t    fileOffset;
  uint32_t    fileSize;
  uint32_t    elementCount;
};


//
// The header in a native object file
//
struct NativeFileHeader {
  uint8_t     magic[16];
  uint32_t    endian;
  uint32_t    architecture;
  uint32_t    fileSize;
  uint32_t    chunkCount;
  // NativeChunk chunks[]
};

//
// Possible values for NativeChunk.signature field
//
enum NativeChunkSignatures {
  NCS_DefinedAtomsV1 = 1,
  NCS_AttributesArrayV1 = 2,
  NCS_AbsoluteAttributesV1 = 12,
  NCS_UndefinedAtomsV1 = 3,
  NCS_SharedLibraryAtomsV1 = 4,
  NCS_AbsoluteAtomsV1 = 5,
  NCS_Strings = 6,
  NCS_ReferencesArrayV1 = 7,
  NCS_ReferencesArrayV2 = 8,
  NCS_TargetsTable = 9,
  NCS_AddendsTable = 10,
  NCS_Content = 11,
};

//
// The 16-bytes at the start of a native object file
//
#define NATIVE_FILE_HEADER_MAGIC "llvm nat obj v1 "

//
// Possible values for the NativeFileHeader.endian field
//
enum {
  NFH_BigEndian     = 0x42696745,
  NFH_LittleEndian  = 0x4574696c
};


//
// Possible values for the NativeFileHeader.architecture field
//
enum {
  NFA_x86     = 1,
  NFA_x86_64  = 2,
  NFA_armv6   = 3,
  NFA_armv7   = 4,
};


//
// The NCS_DefinedAtomsV1 chunk contains an array of these structs
//
struct NativeDefinedAtomIvarsV1 {
  uint32_t  nameOffset;
  uint32_t  attributesOffset;
  uint32_t  referencesStartIndex;
  uint32_t  referencesCount;
  uint32_t  contentOffset;
  uint32_t  contentSize;
  uint64_t  sectionSize;
};


//
// The NCS_AttributesArrayV1 chunk contains an array of these structs
//
struct NativeAtomAttributesV1 {
  uint32_t  sectionNameOffset;
  uint16_t  align2;
  uint16_t  alignModulus;
  uint8_t   scope;
  uint8_t   interposable;
  uint8_t   merge;
  uint8_t   contentType;
  uint8_t   sectionChoice;
  uint8_t   deadStrip;
  uint8_t   dynamicExport;
  uint8_t   permissions;
  uint8_t   alias;
  uint8_t   codeModel;
};


//
// The NCS_UndefinedAtomsV1 chunk contains an array of these structs
//
struct NativeUndefinedAtomIvarsV1 {
  uint32_t  nameOffset;
  uint32_t  flags;
  uint32_t  fallbackNameOffset;
};


//
// The NCS_SharedLibraryAtomsV1 chunk contains an array of these structs
//
struct NativeSharedLibraryAtomIvarsV1 {
  uint64_t  size;
  uint32_t  nameOffset;
  uint32_t  loadNameOffset;
  uint32_t  type;
  uint32_t  flags;
};


//
// The NCS_AbsoluteAtomsV1 chunk contains an array of these structs
//
struct NativeAbsoluteAtomIvarsV1 {
  uint32_t  nameOffset;
  uint32_t  attributesOffset;
  uint32_t  reserved;
  uint64_t  value;
};


//
// The NCS_ReferencesArrayV1 chunk contains an array of these structs
//
struct NativeReferenceIvarsV1 {
  enum {
    noTarget = UINT16_MAX
  };
  uint32_t  offsetInAtom;
  uint16_t  kindValue;
  uint8_t   kindNamespace;
  uint8_t   kindArch;
  uint16_t  targetIndex;
  uint16_t  addendIndex;
};


//
// The NCS_ReferencesArrayV2 chunk contains an array of these structs
//
struct NativeReferenceIvarsV2 {
  enum : unsigned {
    noTarget = UINT32_MAX
  };
  uint64_t  offsetInAtom;
  int64_t   addend;
  uint16_t  kindValue;
  uint8_t   kindNamespace;
  uint8_t   kindArch;
  uint32_t  targetIndex;
};


//
// The NCS_TargetsTable chunk contains an array of uint32_t entries.
// The C++ class Reference has a target() method that returns a
// pointer to another Atom.  We can't have pointers in object files,
// so instead  NativeReferenceIvarsV1 contains an index to the target.
// The index is into this NCS_TargetsTable of uint32_t entries.
// The values in this table are the index of the (target) atom in this file.
// For DefinedAtoms the value is from 0 to NCS_DefinedAtomsV1.elementCount.
// For UndefinedAtoms the value is from NCS_DefinedAtomsV1.elementCount to
// NCS_DefinedAtomsV1.elementCount+NCS_UndefinedAtomsV1.elementCount.
//


//
// The NCS_AddendsTable chunk contains an array of int64_t entries.
// If we allocated space for addends directly in NativeReferenceIvarsV1
// it would double the size of that struct.  But since addends are rare,
// we instead just keep a pool of addends and have NativeReferenceIvarsV1
// (if it needs an addend) just store the index (into the pool) of the
// addend it needs.
//


} // namespace lld

#endif // LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H