diff options
author | Ivan Krasin <krasin@chromium.org> | 2016-01-22 22:28:27 +0000 |
---|---|---|
committer | Ivan Krasin <krasin@chromium.org> | 2016-01-22 22:28:27 +0000 |
commit | df91910bd4d4afa8339ba27cd7f354760171bb5a (patch) | |
tree | a205f251a9d87efc5e6873dcbe2c8f20538e6065 /llvm/lib | |
parent | 13e7cb294c95eb0192728179f9eb0a97150f306a (diff) | |
download | bcm5719-llvm-df91910bd4d4afa8339ba27cd7f354760171bb5a.tar.gz bcm5719-llvm-df91910bd4d4afa8339ba27cd7f354760171bb5a.zip |
Use std::piecewise_constant_distribution instead of ad-hoc binary search.
Summary:
Fix the issue with the most recently discovered unit receiving much less attention.
Note: this is the second attempt (prev: r258473). Now, libc++ build is fixed.
Reviewers: aizatsky, kcc
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D16487
llvm-svn: 258571
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Fuzzer/FuzzerInternal.h | 67 | ||||
-rw-r--r-- | llvm/lib/Fuzzer/FuzzerLoop.cpp | 80 | ||||
-rw-r--r-- | llvm/lib/Fuzzer/test/FuzzerUnittest.cpp | 22 |
3 files changed, 106 insertions, 63 deletions
diff --git a/llvm/lib/Fuzzer/FuzzerInternal.h b/llvm/lib/Fuzzer/FuzzerInternal.h index bd513d019cb..500dc63fc44 100644 --- a/llvm/lib/Fuzzer/FuzzerInternal.h +++ b/llvm/lib/Fuzzer/FuzzerInternal.h @@ -13,14 +13,15 @@ #define LLVM_FUZZER_INTERNAL_H #include <cassert> -#include <climits> #include <chrono> +#include <climits> #include <cstddef> #include <cstdlib> -#include <string> +#include <random> #include <string.h> -#include <vector> +#include <string> #include <unordered_set> +#include <vector> #include "FuzzerInterface.h" @@ -29,10 +30,8 @@ using namespace std::chrono; typedef std::vector<uint8_t> Unit; // A simple POD sized array of bytes. -template<size_t kMaxSize> -class FixedWord { - public: - +template <size_t kMaxSize> class FixedWord { +public: FixedWord() : Size(0) {} FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } @@ -42,12 +41,13 @@ class FixedWord { Size = S; } - bool operator == (const FixedWord<kMaxSize> &w) const { + bool operator==(const FixedWord<kMaxSize> &w) const { return Size == w.Size && 0 == memcmp(Data, w.Data, Size); } - bool operator < (const FixedWord<kMaxSize> &w) const { - if (Size != w.Size) return Size < w.Size; + bool operator<(const FixedWord<kMaxSize> &w) const { + if (Size != w.Size) + return Size < w.Size; return memcmp(Data, w.Data, Size) < 0; } @@ -55,12 +55,12 @@ class FixedWord { const uint8_t *data() const { return Data; } uint8_t size() const { return Size; } - private: +private: uint8_t Size; uint8_t Data[kMaxSize]; }; -typedef FixedWord<27> Word; // 28 bytes. +typedef FixedWord<27> Word; // 28 bytes. std::string FileToString(const std::string &Path); Unit FileToVector(const std::string &Path); @@ -108,7 +108,7 @@ bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units); class MutationDispatcher { - public: +public: MutationDispatcher(FuzzerRandomBase &Rand); ~MutationDispatcher(); /// Indicate that we are about to start a new sequence of mutations. @@ -162,27 +162,27 @@ class MutationDispatcher { void SetCorpus(const std::vector<Unit> *Corpus); - private: +private: FuzzerRandomBase &Rand; struct Impl; Impl *MDImpl; }; class Fuzzer { - public: +public: struct FuzzingOptions { int Verbosity = 1; int MaxLen = 0; int UnitTimeoutSec = 300; int MaxTotalTimeSec = 0; bool DoCrossOver = true; - int MutateDepth = 5; + int MutateDepth = 5; bool ExitOnFirst = false; bool UseCounters = false; bool UseIndirCalls = true; bool UseTraces = false; bool UseMemcmp = true; - bool UseFullCoverageSet = false; + bool UseFullCoverageSet = false; bool Reload = true; bool ShuffleAtStartUp = true; int PreferSmallDuringInitialShuffle = -1; @@ -195,12 +195,15 @@ class Fuzzer { std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; bool SaveArtifacts = true; - bool PrintNEW = true; // Print a status line when new units are found; + bool PrintNEW = true; // Print a status line when new units are found; bool OutputCSV = false; bool PrintNewCovPcs = false; }; Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options); - void AddToCorpus(const Unit &U) { Corpus.push_back(U); } + void AddToCorpus(const Unit &U) { + Corpus.push_back(U); + UpdateCorpusDistribution(); + } size_t ChooseUnitIdxToMutate(); const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; void Loop(); @@ -231,7 +234,7 @@ class Fuzzer { // Merge Corpora[1:] into Corpora[0]. void Merge(const std::vector<std::string> &Corpora); - private: +private: void AlarmCallback(); void MutateAndTestOne(); void ReportNewCoverage(const Unit &U); @@ -241,6 +244,9 @@ class Fuzzer { void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n"); void PrintStatusForNewUnit(const Unit &U); + // Updates the probability distribution for the units in the corpus. + // Must be called whenever the corpus or unit weights are changed. + void UpdateCorpusDistribution(); void SyncCorpus(); @@ -249,7 +255,6 @@ class Fuzzer { void PrepareCoverageBeforeRun(); bool CheckCoverageAfterRun(); - // Trace-based fuzzing: we run a unit with some kind of tracing // enabled and record potentially useful mutations. Then // We apply these mutations one by one to the unit and run it again. @@ -274,12 +279,20 @@ class Fuzzer { // For UseCounters std::vector<uint8_t> CounterBitmap; - size_t TotalBits() { // Slow. Call it only for printing stats. + size_t TotalBits() { // Slow. Call it only for printing stats. size_t Res = 0; - for (auto x : CounterBitmap) Res += __builtin_popcount(x); + for (auto x : CounterBitmap) + Res += __builtin_popcount(x); return Res; } + // TODO(krasin): remove GetRand from UserSuppliedFuzzer, + // and fully rely on the generator and the seed. + // The user supplied fuzzer will have a way to access the + // generator for its own purposes (like seeding the custom + // PRNG). + std::mt19937 Generator; + std::piecewise_constant_distribution<double> CorpusDistribution; UserSuppliedFuzzer &USF; FuzzingOptions Options; system_clock::time_point ProcessStartTime = system_clock::now(); @@ -292,8 +305,8 @@ class Fuzzer { size_t LastCoveragePcBufferLen = 0; }; -class SimpleUserSuppliedFuzzer: public UserSuppliedFuzzer { - public: +class SimpleUserSuppliedFuzzer : public UserSuppliedFuzzer { +public: SimpleUserSuppliedFuzzer(FuzzerRandomBase *Rand, UserCallback Callback) : UserSuppliedFuzzer(Rand), Callback(Callback) {} @@ -301,10 +314,10 @@ class SimpleUserSuppliedFuzzer: public UserSuppliedFuzzer { return Callback(Data, Size); } - private: +private: UserCallback Callback = nullptr; }; -}; // namespace fuzzer +}; // namespace fuzzer #endif // LLVM_FUZZER_INTERNAL_H diff --git a/llvm/lib/Fuzzer/FuzzerLoop.cpp b/llvm/lib/Fuzzer/FuzzerLoop.cpp index 7f3b0f59918..a84c2348b53 100644 --- a/llvm/lib/Fuzzer/FuzzerLoop.cpp +++ b/llvm/lib/Fuzzer/FuzzerLoop.cpp @@ -15,9 +15,9 @@ #include <memory> #if defined(__has_include) -# if __has_include(<sanitizer/coverage_interface.h>) -# include <sanitizer/coverage_interface.h> -# endif +#if __has_include(<sanitizer / coverage_interface.h>) +#include <sanitizer/coverage_interface.h> +#endif #endif extern "C" { @@ -28,11 +28,11 @@ __attribute__((weak)) void __sanitizer_print_stack_trace(); __attribute__((weak)) void __sanitizer_reset_coverage(); __attribute__((weak)) size_t __sanitizer_get_total_unique_caller_callee_pairs(); __attribute__((weak)) size_t __sanitizer_get_total_unique_coverage(); -__attribute__((weak)) -void __sanitizer_set_death_callback(void (*callback)(void)); +__attribute__((weak)) void +__sanitizer_set_death_callback(void (*callback)(void)); __attribute__((weak)) size_t __sanitizer_get_number_of_counters(); -__attribute__((weak)) -uintptr_t __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset); +__attribute__((weak)) uintptr_t +__sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset); __attribute__((weak)) uintptr_t __sanitizer_get_coverage_pc_buffer(uintptr_t **data); } @@ -42,7 +42,8 @@ static const size_t kMaxUnitSizeToPrint = 256; static void MissingWeakApiFunction(const char *FnName) { Printf("ERROR: %s is not defined. Exiting.\n" - "Did you use -fsanitize-coverage=... to build your code?\n", FnName); + "Did you use -fsanitize-coverage=... to build your code?\n", + FnName); exit(1); } @@ -56,7 +57,7 @@ static void MissingWeakApiFunction(const char *FnName) { static Fuzzer *F; Fuzzer::Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options) - : USF(USF), Options(Options) { + : Generator(USF.GetRand().Rand()), USF(USF), Options(Options) { SetDeathCallback(); InitializeTraceState(); assert(!F); @@ -92,7 +93,8 @@ void Fuzzer::AlarmCallback() { assert(Options.UnitTimeoutSec > 0); size_t Seconds = duration_cast<seconds>(system_clock::now() - UnitStartTime).count(); - if (Seconds == 0) return; + if (Seconds == 0) + return; if (Options.Verbosity >= 2) Printf("AlarmCallback %zd\n", Seconds); if (Seconds >= (size_t)Options.UnitTimeoutSec) { @@ -146,7 +148,8 @@ void Fuzzer::PrintStats(const char *Where, const char *End) { } void Fuzzer::RereadOutputCorpus() { - if (Options.OutputCorpus.empty()) return; + if (Options.OutputCorpus.empty()) + return; std::vector<Unit> AdditionalCorpus; ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus, &EpochOfLastReadOfOutputCorpus); @@ -154,15 +157,17 @@ void Fuzzer::RereadOutputCorpus() { Corpus = AdditionalCorpus; return; } - if (!Options.Reload) return; + if (!Options.Reload) + return; if (Options.Verbosity >= 2) - Printf("Reload: read %zd new units.\n", AdditionalCorpus.size()); + Printf("Reload: read %zd new units.\n", AdditionalCorpus.size()); for (auto &X : AdditionalCorpus) { if (X.size() > (size_t)Options.MaxLen) X.resize(Options.MaxLen); if (UnitHashesAddedToCorpus.insert(Hash(X)).second) { if (RunOne(X)) { Corpus.push_back(X); + UpdateCorpusDistribution(); PrintStats("RELOAD"); } } @@ -200,6 +205,7 @@ void Fuzzer::ShuffleAndMinimize() { } } Corpus = NewCorpus; + UpdateCorpusDistribution(); for (auto &X : Corpus) UnitHashesAddedToCorpus.insert(Hash(X)); PrintStats("INITED"); @@ -304,7 +310,8 @@ bool Fuzzer::CheckCoverageAfterRun() { } void Fuzzer::WriteToOutputCorpus(const Unit &U) { - if (Options.OutputCorpus.empty()) return; + if (Options.OutputCorpus.empty()) + return; std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U)); WriteToFile(U, Path); if (Options.Verbosity >= 2) @@ -317,7 +324,7 @@ void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { return; std::string Path = Options.ArtifactPrefix + Prefix + Hash(U); if (!Options.ExactArtifactPath.empty()) - Path = Options.ExactArtifactPath; // Overrides ArtifactPrefix. + Path = Options.ExactArtifactPath; // Overrides ArtifactPrefix. WriteToFile(U, Path); Printf("artifact_prefix='%s'; Test unit written to %s\n", Options.ArtifactPrefix.c_str(), Path.c_str()); @@ -326,7 +333,8 @@ void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { } void Fuzzer::SaveCorpus() { - if (Options.OutputCorpus.empty()) return; + if (Options.OutputCorpus.empty()) + return; for (const auto &U : Corpus) WriteToFile(U, DirPlusFile(Options.OutputCorpus, Hash(U))); if (Options.Verbosity) @@ -347,6 +355,7 @@ void Fuzzer::PrintStatusForNewUnit(const Unit &U) { void Fuzzer::ReportNewCoverage(const Unit &U) { Corpus.push_back(U); + UpdateCorpusDistribution(); UnitHashesAddedToCorpus.insert(Hash(U)); USF.GetMD().RecordSuccessfulMutationSequence(); PrintStatusForNewUnit(U); @@ -409,22 +418,11 @@ void Fuzzer::MutateAndTestOne() { // Returns an index of random unit from the corpus to mutate. // Hypothesis: units added to the corpus last are more likely to be interesting. -// This function gives more wieght to the more recent units. +// This function gives more weight to the more recent units. size_t Fuzzer::ChooseUnitIdxToMutate() { - size_t N = Corpus.size(); - size_t Total = (N + 1) * N / 2; - size_t R = USF.GetRand()(Total); - size_t IdxBeg = 0, IdxEnd = N; - // Binary search. - while (IdxEnd - IdxBeg >= 2) { - size_t Idx = IdxBeg + (IdxEnd - IdxBeg) / 2; - if (R > (Idx + 1) * Idx / 2) - IdxBeg = Idx; - else - IdxEnd = Idx; - } - assert(IdxBeg < N); - return IdxBeg; + size_t Idx = static_cast<size_t>(CorpusDistribution(Generator)); + assert(Idx < Corpus.size()); + return Idx; } // Experimental search heuristic: drilling. @@ -437,7 +435,7 @@ size_t Fuzzer::ChooseUnitIdxToMutate() { void Fuzzer::Drill() { // The corpus is already read, shuffled, and minimized. assert(!Corpus.empty()); - Options.PrintNEW = false; // Don't print NEW status lines when drilling. + Options.PrintNEW = false; // Don't print NEW status lines when drilling. Unit U = ChooseUnitToMutate(); @@ -447,6 +445,7 @@ void Fuzzer::Drill() { std::vector<Unit> SavedCorpus; SavedCorpus.swap(Corpus); Corpus.push_back(U); + UpdateCorpusDistribution(); assert(Corpus.size() == 1); RunOne(U); PrintStats("DRILL "); @@ -490,7 +489,7 @@ void Fuzzer::Loop() { break; if (Options.MaxTotalTimeSec > 0 && secondsSinceProcessStartUp() > - static_cast<size_t>(Options.MaxTotalTimeSec)) + static_cast<size_t>(Options.MaxTotalTimeSec)) break; // Perform several mutations and runs. MutateAndTestOne(); @@ -501,7 +500,8 @@ void Fuzzer::Loop() { } void Fuzzer::SyncCorpus() { - if (Options.SyncCommand.empty() || Options.OutputCorpus.empty()) return; + if (Options.SyncCommand.empty() || Options.OutputCorpus.empty()) + return; auto Now = system_clock::now(); if (duration_cast<seconds>(Now - LastExternalSync).count() < Options.SyncTimeout) @@ -510,4 +510,14 @@ void Fuzzer::SyncCorpus() { ExecuteCommand(Options.SyncCommand + " " + Options.OutputCorpus); } -} // namespace fuzzer +void Fuzzer::UpdateCorpusDistribution() { + size_t N = Corpus.size(); + std::vector<double> Intervals(N + 1); + std::vector<double> Weights(N); + std::iota(Intervals.begin(), Intervals.end(), 0); + std::iota(Weights.begin(), Weights.end(), 1); + CorpusDistribution = std::piecewise_constant_distribution<double>( + Intervals.begin(), Intervals.end(), Weights.begin()); +} + +} // namespace fuzzer diff --git a/llvm/lib/Fuzzer/test/FuzzerUnittest.cpp b/llvm/lib/Fuzzer/test/FuzzerUnittest.cpp index 9512e167ab9..e0cca7d396c 100644 --- a/llvm/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/llvm/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -6,7 +6,7 @@ using namespace fuzzer; // For now, have LLVMFuzzerTestOneInput just to make it link. // Later we may want to make unittests that actually call LLVMFuzzerTestOneInput. -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { abort(); } @@ -400,3 +400,23 @@ TEST(FuzzerUtil, Base64) { EXPECT_EQ("YWJjeHk=", Base64({'a', 'b', 'c', 'x', 'y'})); EXPECT_EQ("YWJjeHl6", Base64({'a', 'b', 'c', 'x', 'y', 'z'})); } + +TEST(Corpus, Distribution) { + FuzzerRandomLibc Rand(0); + SimpleUserSuppliedFuzzer USF(&Rand, LLVMFuzzerTestOneInput); + Fuzzer::FuzzingOptions Options; + Fuzzer Fuzz(USF, Options); + size_t N = 10; + size_t TriesPerUnit = 1<<20; + for (size_t i = 0; i < N; i++) { + Fuzz.AddToCorpus(Unit{ static_cast<uint8_t>(i) }); + } + std::vector<size_t> Hist(N); + for (size_t i = 0; i < N * TriesPerUnit; i++) { + Hist[Fuzz.ChooseUnitIdxToMutate()]++; + } + for (size_t i = 0; i < N; i++) { + // A weak sanity check that every unit gets invoked. + EXPECT_GT(Hist[i], TriesPerUnit / N / 3); + } +} |