diff options
Diffstat (limited to 'compiler-rt/lib')
-rw-r--r-- | compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp | 72 | ||||
-rw-r--r-- | compiler-rt/lib/fuzzer/FuzzerFork.cpp | 19 | ||||
-rw-r--r-- | compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp | 120 |
3 files changed, 136 insertions, 75 deletions
diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp index 311f53a3130..1fba3913c96 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp @@ -120,6 +120,12 @@ static Vector<uint8_t> DFTStringToVector(const std::string &DFTString) { return DFT; } +static std::ostream &operator<<(std::ostream &OS, const Vector<uint8_t> &DFT) { + for (auto B : DFT) + OS << (B ? "1" : "0"); + return OS; +} + static bool ParseError(const char *Err, const std::string &Line) { Printf("DataFlowTrace: parse error: %s: Line: %s\n", Err, Line.c_str()); return false; @@ -240,24 +246,74 @@ int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath, const Vector<SizedFile> &CorporaFiles) { Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n", DFTBinary.c_str(), DirPath.c_str(), CorporaFiles.size()); - setenv("DFSAN_OPTIONS", "fast16labels=1:warn_unimplemented=0", 1); MkDir(DirPath); + auto Temp = TempPath(".dft"); for (auto &F : CorporaFiles) { // For every input F we need to collect the data flow and the coverage. // Data flow collection may fail if we request too many DFSan tags at once. // So, we start from requesting all tags in range [0,Size) and if that fails // we then request tags in [0,Size/2) and [Size/2, Size), and so on. // Function number => DFT. - auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File))); std::unordered_map<size_t, Vector<uint8_t>> DFTMap; std::unordered_set<std::string> Cov; - Command Cmd; - Cmd.addArgument(DFTBinary); - Cmd.addArgument(F.File); - Cmd.addArgument(OutPath); - Printf("CMD: %s\n", Cmd.toString().c_str()); - ExecuteCommand(Cmd); + std::queue<std::pair<size_t, size_t>> Q; + Q.push({0, F.Size}); + while (!Q.empty()) { + auto R = Q.front(); + Printf("\n\n\n********* Trying: [%zd, %zd)\n", R.first, R.second); + Q.pop(); + Command Cmd; + Cmd.addArgument(DFTBinary); + Cmd.addArgument(std::to_string(R.first)); + Cmd.addArgument(std::to_string(R.second)); + Cmd.addArgument(F.File); + Cmd.addArgument(Temp); + Printf("CMD: %s\n", Cmd.toString().c_str()); + if (ExecuteCommand(Cmd)) { + // DFSan has failed, collect tags for two subsets. + if (R.second - R.first >= 2) { + size_t Mid = (R.second + R.first) / 2; + Q.push({R.first, Mid}); + Q.push({Mid, R.second}); + } + } else { + Printf("********* Success: [%zd, %zd)\n", R.first, R.second); + std::ifstream IF(Temp); + std::string L; + while (std::getline(IF, L, '\n')) { + // Data flow collection has succeeded. + // Merge the results with the other runs. + if (L.empty()) continue; + if (L[0] == 'C') { + // Take coverage lines as is, they will be the same in all attempts. + Cov.insert(L); + } else if (L[0] == 'F') { + size_t FunctionNum = 0; + std::string DFTString; + if (ParseDFTLine(L, &FunctionNum, &DFTString)) { + auto &DFT = DFTMap[FunctionNum]; + if (DFT.empty()) { + // Haven't seen this function before, take DFT as is. + DFT = DFTStringToVector(DFTString); + } else if (DFT.size() == DFTString.size()) { + // Have seen this function already, merge DFTs. + DFTStringAppendToVector(&DFT, DFTString); + } + } + } + } + } + } + auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File))); + // Dump combined DFT to disk. + Printf("Producing DFT for %s\n", OutPath.c_str()); + std::ofstream OF(OutPath); + for (auto &DFT: DFTMap) + OF << "F" << DFT.first << " " << DFT.second << std::endl; + for (auto &C : Cov) + OF << C << std::endl; } + RemoveFile(Temp); // Write functions.txt if it's currently empty or doesn't exist. auto FunctionsTxtPath = DirPlusFile(DirPath, kFunctionsTxt); if (FileToString(FunctionsTxtPath).empty()) { diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp index 5c4855f8284..870a2244850 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -89,7 +89,6 @@ struct GlobalEnv { std::string DFTDir; std::string DataFlowBinary; Set<uint32_t> Features, Cov; - Set<std::string> FilesWithDFT; Vector<std::string> Files; Random *Rand; std::chrono::system_clock::time_point ProcessStartTime; @@ -127,13 +126,10 @@ struct GlobalEnv { auto Job = new FuzzJob; std::string Seeds; if (size_t CorpusSubsetSize = - std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { - for (size_t i = 0; i < CorpusSubsetSize; i++) { - auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; - Seeds += (Seeds.empty() ? "" : ",") + SF; - CollectDFT(SF); - } - } + std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) + for (size_t i = 0; i < CorpusSubsetSize; i++) + Seeds += (Seeds.empty() ? "" : ",") + + Files[Rand->SkewTowardsLast(Files.size())]; if (!Seeds.empty()) { Job->SeedListPath = DirPlusFile(TempDir, std::to_string(JobId) + ".seeds"); @@ -200,6 +196,7 @@ struct GlobalEnv { auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); WriteToFile(U, NewPath); Files.push_back(NewPath); + CollectDFT(NewPath); } Features.insert(NewFeatures.begin(), NewFeatures.end()); Cov.insert(NewCov.begin(), NewCov.end()); @@ -220,7 +217,6 @@ struct GlobalEnv { void CollectDFT(const std::string &InputPath) { if (DataFlowBinary.empty()) return; - if (!FilesWithDFT.insert(InputPath).second) return; Command Cmd(Args); Cmd.removeFlag("fork"); Cmd.removeFlag("runs"); @@ -230,7 +226,7 @@ struct GlobalEnv { Cmd.removeArgument(C); Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log")); Cmd.combineOutAndErr(); - // Printf("CollectDFT: %s\n", Cmd.toString().c_str()); + // Printf("CollectDFT: %s %s\n", InputPath.c_str(), Cmd.toString().c_str()); ExecuteCommand(Cmd); } @@ -300,6 +296,9 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, {}, &Env.Cov, CFPath, false); + for (auto &F : Env.Files) + Env.CollectDFT(F); + RemoveFile(CFPath); Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); diff --git a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp index 8a5d695094d..989675e81d9 100644 --- a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp +++ b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp @@ -35,8 +35,7 @@ // Run: // # Collect data flow and coverage for INPUT_FILE // # write to OUTPUT_FILE (default: stdout) -// export DFSAN_OPTIONS=fast16labels=1:warn_unimplemented=0 -// ./a.out INPUT_FILE [OUTPUT_FILE] +// ./a.out FIRST_LABEL LAST_LABEL INPUT_FILE [OUTPUT_FILE] // // # Print all instrumented functions. llvm-symbolizer must be present in PATH // ./a.out @@ -49,6 +48,8 @@ // C1 8 // =============== // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. +// The byte string is LEN+1 bytes. The last byte is set if the function +// depends on the input length. // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered // in addition to the function's entry block, out of T total instrumented // blocks. @@ -71,20 +72,22 @@ __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); } // extern "C" static size_t InputLen; -static size_t NumIterations; +static size_t InputLabelBeg; +static size_t InputLabelEnd; +static size_t InputSizeLabel; static size_t NumFuncs, NumGuards; static uint32_t *GuardsBeg, *GuardsEnd; static const uintptr_t *PCsBeg, *PCsEnd; -static __thread size_t CurrentFunc, CurrentIteration; -static dfsan_label **FuncLabels; // NumFuncs x NumIterations. +static __thread size_t CurrentFunc; +static dfsan_label *FuncLabels; // Array of NumFuncs elements. static bool *BBExecuted; // Array of NumGuards elements. +static char *PrintableStringForLabel; // InputLen + 2 bytes. +static bool LabelSeen[1 << 8 * sizeof(dfsan_label)]; enum { PCFLAG_FUNC_ENTRY = 1, }; -const int kNumLabels = 16; - static inline bool BlockIsEntry(size_t BlockIdx) { return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY; } @@ -109,32 +112,35 @@ static int PrintFunctions() { return 0; } -static void PrintBinary(FILE *Out, dfsan_label L, size_t Len) { - char buf[kNumLabels + 1]; - assert(Len <= kNumLabels); - for (int i = 0; i < kNumLabels; i++) - buf[i] = (L & (1 << i)) ? '1' : '0'; - buf[Len] = 0; - fprintf(Out, "%s", buf); +extern "C" +void SetBytesForLabel(dfsan_label L, char *Bytes) { + if (LabelSeen[L]) + return; + LabelSeen[L] = true; + assert(L); + if (L < InputSizeLabel) { + Bytes[L + InputLabelBeg - 1] = '1'; + } else if (L == InputSizeLabel) { + Bytes[InputLen] = '1'; + } else { + auto *DLI = dfsan_get_label_info(L); + SetBytesForLabel(DLI->l1, Bytes); + SetBytesForLabel(DLI->l2, Bytes); + } +} + +static char *GetPrintableStringForLabel(dfsan_label L) { + memset(PrintableStringForLabel, '0', InputLen + 1); + PrintableStringForLabel[InputLen + 1] = 0; + memset(LabelSeen, 0, sizeof(LabelSeen)); + SetBytesForLabel(L, PrintableStringForLabel); + return PrintableStringForLabel; } static void PrintDataFlow(FILE *Out) { - for (size_t Func = 0; Func < NumFuncs; Func++) { - bool HasAny = false; - for (size_t Iter = 0; Iter < NumIterations; Iter++) - if (FuncLabels[Func][Iter]) - HasAny = true; - if (!HasAny) - continue; - fprintf(Out, "F%zd ", Func); - size_t LenOfLastIteration = kNumLabels; - if (auto Tail = InputLen % kNumLabels) - LenOfLastIteration = Tail; - for (size_t Iter = 0; Iter < NumIterations; Iter++) - PrintBinary(Out, FuncLabels[Func][Iter], - Iter == NumIterations - 1 ? LenOfLastIteration : kNumLabels); - fprintf(Out, "\n"); - } + for (size_t I = 0; I < NumFuncs; I++) + if (FuncLabels[I]) + fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I])); } static void PrintCoverage(FILE *Out) { @@ -163,9 +169,12 @@ int main(int argc, char **argv) { LLVMFuzzerInitialize(&argc, &argv); if (argc == 1) return PrintFunctions(); - assert(argc == 2 || argc == 3); + assert(argc == 4 || argc == 5); + InputLabelBeg = atoi(argv[1]); + InputLabelEnd = atoi(argv[2]); + assert(InputLabelBeg < InputLabelEnd); - const char *Input = argv[1]; + const char *Input = argv[3]; fprintf(stderr, "INFO: reading '%s'\n", Input); FILE *In = fopen(Input, "r"); assert(In); @@ -175,35 +184,30 @@ int main(int argc, char **argv) { unsigned char *Buf = (unsigned char*)malloc(InputLen); size_t NumBytesRead = fread(Buf, 1, InputLen, In); assert(NumBytesRead == InputLen); + PrintableStringForLabel = (char*)malloc(InputLen + 2); fclose(In); - NumIterations = (NumBytesRead + kNumLabels - 1) / kNumLabels; - FuncLabels = (dfsan_label**)calloc(NumFuncs, sizeof(dfsan_label*)); - for (size_t Func = 0; Func < NumFuncs; Func++) - FuncLabels[Func] = - (dfsan_label *)calloc(NumIterations, sizeof(dfsan_label)); - - for (CurrentIteration = 0; CurrentIteration < NumIterations; - CurrentIteration++) { - fprintf(stderr, "INFO: running '%s' %zd/%zd\n", Input, CurrentIteration, - NumIterations); - dfsan_flush(); - dfsan_set_label(0, Buf, InputLen); - - size_t BaseIdx = CurrentIteration * kNumLabels; - size_t LastIdx = BaseIdx + kNumLabels < NumBytesRead ? BaseIdx + kNumLabels - : NumBytesRead; - assert(BaseIdx < LastIdx); - for (size_t Idx = BaseIdx; Idx < LastIdx; Idx++) - dfsan_set_label(1 << (Idx - BaseIdx), Buf + Idx, 1); - LLVMFuzzerTestOneInput(Buf, InputLen); + fprintf(stderr, "INFO: running '%s'\n", Input); + for (size_t I = 1; I <= InputLen; I++) { + size_t Idx = I - 1; + if (Idx >= InputLabelBeg && Idx < InputLabelEnd) { + dfsan_label L = dfsan_create_label("", nullptr); + assert(L == I - InputLabelBeg); + dfsan_set_label(L, Buf + Idx, 1); + } } + dfsan_label SizeL = dfsan_create_label("", nullptr); + InputSizeLabel = SizeL; + assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1); + dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); + + LLVMFuzzerTestOneInput(Buf, InputLen); free(Buf); - bool OutIsStdout = argc == 2; + bool OutIsStdout = argc == 4; fprintf(stderr, "INFO: writing dataflow to %s\n", - OutIsStdout ? "<stdout>" : argv[2]); - FILE *Out = OutIsStdout ? stdout : fopen(argv[2], "w"); + OutIsStdout ? "<stdout>" : argv[4]); + FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w"); PrintDataFlow(Out); PrintCoverage(Out); if (!OutIsStdout) fclose(Out); @@ -233,6 +237,7 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, GuardsBeg[i] = NumFuncs; } } + FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label)); BBExecuted = (bool*)calloc(NumGuards, sizeof(bool)); fprintf(stderr, "INFO: %zd instrumented function(s) observed " "and %zd basic blocks\n", NumFuncs, NumGuards); @@ -253,13 +258,14 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, dfsan_label L1, dfsan_label UnusedL) { assert(CurrentFunc < NumFuncs); - FuncLabels[CurrentFunc][CurrentIteration] |= L1; + FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1); } #define HOOK(Name, Type) \ void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \ assert(CurrentFunc < NumFuncs); \ - FuncLabels[CurrentFunc][CurrentIteration] |= L1 | L2; \ + FuncLabels[CurrentFunc] = \ + dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \ } HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t) |