1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
|
//===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Define the main class fuzzer::Fuzzer and most functions.
//===----------------------------------------------------------------------===//
#ifndef LLVM_FUZZER_INTERNAL_H
#define LLVM_FUZZER_INTERNAL_H
#include <cassert>
#include <chrono>
#include <climits>
#include <cstddef>
#include <cstdlib>
#include <random>
#include <string.h>
#include <string>
#include <unordered_set>
#include <vector>
#include "FuzzerInterface.h"
namespace fuzzer {
using namespace std::chrono;
typedef std::vector<uint8_t> Unit;
// A simple POD sized array of bytes.
template <size_t kMaxSize> class FixedWord {
public:
FixedWord() {}
FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); }
void Set(const uint8_t *B, uint8_t S) {
assert(S <= kMaxSize);
memcpy(Data, B, S);
Size = S;
}
bool operator==(const FixedWord<kMaxSize> &w) const {
return Size == w.Size && 0 == memcmp(Data, w.Data, Size);
}
bool operator<(const FixedWord<kMaxSize> &w) const {
if (Size != w.Size)
return Size < w.Size;
return memcmp(Data, w.Data, Size) < 0;
}
static size_t GetMaxSize() { return kMaxSize; }
const uint8_t *data() const { return Data; }
uint8_t size() const { return Size; }
private:
uint8_t Size = 0;
uint8_t Data[kMaxSize];
};
typedef FixedWord<27> Word; // 28 bytes.
bool IsFile(const std::string &Path);
std::string FileToString(const std::string &Path);
Unit FileToVector(const std::string &Path);
void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
long *Epoch);
void WriteToFile(const Unit &U, const std::string &Path);
void CopyFileToErr(const std::string &Path);
// Returns "Dir/FileName" or equivalent for the current OS.
std::string DirPlusFile(const std::string &DirPath,
const std::string &FileName);
void Printf(const char *Fmt, ...);
void PrintHexArray(const Unit &U, const char *PrintAfter = "");
void PrintHexArray(const uint8_t *Data, size_t Size,
const char *PrintAfter = "");
void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = "");
void PrintASCII(const Unit &U, const char *PrintAfter = "");
void PrintASCII(const Word &W, const char *PrintAfter = "");
std::string Hash(const Unit &U);
void SetTimer(int Seconds);
std::string Base64(const Unit &U);
int ExecuteCommand(const std::string &Command);
// Private copy of SHA1 implementation.
static const int kSHA1NumBytes = 20;
// Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'.
void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out);
// Changes U to contain only ASCII (isprint+isspace) characters.
// Returns true iff U has been changed.
bool ToASCII(Unit &U);
bool IsASCII(const Unit &U);
int NumberOfCpuCores();
int GetPid();
// Dictionary.
// Parses one dictionary entry.
// If successfull, write the enty to Unit and returns true,
// otherwise returns false.
bool ParseOneDictionaryEntry(const std::string &Str, Unit *U);
// Parses the dictionary file, fills Units, returns true iff all lines
// were parsed succesfully.
bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units);
class MutationDispatcher {
public:
MutationDispatcher(FuzzerRandomBase &Rand);
~MutationDispatcher();
/// Indicate that we are about to start a new sequence of mutations.
void StartMutationSequence();
/// Print the current sequence of mutations.
void PrintMutationSequence();
/// Indicate that the current sequence of mutations was successfull.
void RecordSuccessfulMutationSequence();
/// Mutates data by shuffling bytes.
size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize);
/// Mutates data by erasing a byte.
size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize);
/// Mutates data by inserting a byte.
size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize);
/// Mutates data by chanding one byte.
size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize);
/// Mutates data by chanding one bit.
size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize);
/// Mutates data by adding a word from the manual dictionary.
size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size,
size_t MaxSize);
/// Mutates data by adding a word from the temporary automatic dictionary.
size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size,
size_t MaxSize);
/// Mutates data by adding a word from the persistent automatic dictionary.
size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size,
size_t MaxSize);
/// Tries to find an ASCII integer in Data, changes it to another ASCII int.
size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize);
/// CrossOver Data with some other element of the corpus.
size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize);
/// Applies one of the above mutations.
/// Returns the new size of data which could be up to MaxSize.
size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize);
/// Creates a cross-over of two pieces of Data, returns its size.
size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2,
size_t Size2, uint8_t *Out, size_t MaxOutSize);
void AddWordToManualDictionary(const Word &W);
void AddWordToAutoDictionary(const Word &W, size_t PositionHint);
void ClearAutoDictionary();
void PrintRecommendedDictionary();
void SetCorpus(const std::vector<Unit> *Corpus);
private:
FuzzerRandomBase &Rand;
struct Impl;
Impl *MDImpl;
};
class Fuzzer {
public:
struct FuzzingOptions {
int Verbosity = 1;
int MaxLen = 0;
int UnitTimeoutSec = 300;
bool AbortOnTimeout = false;
int TimeoutExitCode = 77;
int MaxTotalTimeSec = 0;
bool DoCrossOver = true;
int MutateDepth = 5;
bool ExitOnFirst = false;
bool UseCounters = false;
bool UseIndirCalls = true;
bool UseTraces = false;
bool UseMemcmp = true;
bool UseFullCoverageSet = false;
bool Reload = true;
bool ShuffleAtStartUp = true;
int PreferSmallDuringInitialShuffle = -1;
size_t MaxNumberOfRuns = ULONG_MAX;
int SyncTimeout = 600;
int ReportSlowUnits = 10;
bool OnlyASCII = false;
std::string OutputCorpus;
std::string SyncCommand;
std::string ArtifactPrefix = "./";
std::string ExactArtifactPath;
bool SaveArtifacts = true;
bool PrintNEW = true; // Print a status line when new units are found;
bool OutputCSV = false;
bool PrintNewCovPcs = false;
};
Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options);
void AddToCorpus(const Unit &U) {
Corpus.push_back(U);
UpdateCorpusDistribution();
}
size_t ChooseUnitIdxToMutate();
const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; };
void Loop();
void Drill();
void ShuffleAndMinimize();
void InitializeTraceState();
void AssignTaintLabels(uint8_t *Data, size_t Size);
size_t CorpusSize() const { return Corpus.size(); }
void ReadDir(const std::string &Path, long *Epoch) {
Printf("Loading corpus: %s\n", Path.c_str());
ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch);
}
void RereadOutputCorpus();
// Save the current corpus to OutputCorpus.
void SaveCorpus();
size_t secondsSinceProcessStartUp() {
return duration_cast<seconds>(system_clock::now() - ProcessStartTime)
.count();
}
size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; }
static void StaticAlarmCallback();
void ExecuteCallback(const Unit &U);
// Merge Corpora[1:] into Corpora[0].
void Merge(const std::vector<std::string> &Corpora);
private:
void AlarmCallback();
void MutateAndTestOne();
void ReportNewCoverage(const Unit &U);
bool RunOne(const Unit &U);
void RunOneAndUpdateCorpus(Unit &U);
void WriteToOutputCorpus(const Unit &U);
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
void PrintStats(const char *Where, const char *End = "\n");
void PrintStatusForNewUnit(const Unit &U);
// Updates the probability distribution for the units in the corpus.
// Must be called whenever the corpus or unit weights are changed.
void UpdateCorpusDistribution();
void SyncCorpus();
size_t RecordBlockCoverage();
size_t RecordCallerCalleeCoverage();
void PrepareCoverageBeforeRun();
bool CheckCoverageAfterRun();
// Trace-based fuzzing: we run a unit with some kind of tracing
// enabled and record potentially useful mutations. Then
// We apply these mutations one by one to the unit and run it again.
// Start tracing; forget all previously proposed mutations.
void StartTraceRecording();
// Stop tracing.
void StopTraceRecording();
void SetDeathCallback();
static void StaticDeathCallback();
void DeathCallback();
uint8_t *CurrentUnitData;
size_t CurrentUnitSize;
size_t TotalNumberOfRuns = 0;
size_t TotalNumberOfExecutedTraceBasedMutations = 0;
std::vector<Unit> Corpus;
std::unordered_set<std::string> UnitHashesAddedToCorpus;
// For UseCounters
std::vector<uint8_t> CounterBitmap;
size_t TotalBits() { // Slow. Call it only for printing stats.
size_t Res = 0;
for (auto x : CounterBitmap)
Res += __builtin_popcount(x);
return Res;
}
// TODO(krasin): remove GetRand from UserSuppliedFuzzer,
// and fully rely on the generator and the seed.
// The user supplied fuzzer will have a way to access the
// generator for its own purposes (like seeding the custom
// PRNG).
std::mt19937 Generator;
std::piecewise_constant_distribution<double> CorpusDistribution;
UserSuppliedFuzzer &USF;
FuzzingOptions Options;
system_clock::time_point ProcessStartTime = system_clock::now();
system_clock::time_point LastExternalSync = system_clock::now();
system_clock::time_point UnitStartTime;
long TimeOfLongestUnitInSeconds = 0;
long EpochOfLastReadOfOutputCorpus = 0;
size_t LastRecordedBlockCoverage = 0;
size_t LastRecordedCallerCalleeCoverage = 0;
size_t LastCoveragePcBufferLen = 0;
};
class SimpleUserSuppliedFuzzer : public UserSuppliedFuzzer {
public:
SimpleUserSuppliedFuzzer(FuzzerRandomBase *Rand, UserCallback Callback)
: UserSuppliedFuzzer(Rand), Callback(Callback) {}
virtual int TargetFunction(const uint8_t *Data, size_t Size) override {
return Callback(Data, Size);
}
private:
UserCallback Callback = nullptr;
};
}; // namespace fuzzer
#endif // LLVM_FUZZER_INTERNAL_H
|