summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKostya Serebryany <kcc@google.com>2018-06-06 01:23:29 +0000
committerKostya Serebryany <kcc@google.com>2018-06-06 01:23:29 +0000
commit1fd005f552595ceb2a10f2deacc6b64a50019afb (patch)
tree224b67c41f607b9c7564e49515ba03cfa5beaf85
parentf3914b74c1825f4deece3317542743109dabbf56 (diff)
downloadbcm5719-llvm-1fd005f552595ceb2a10f2deacc6b64a50019afb.tar.gz
bcm5719-llvm-1fd005f552595ceb2a10f2deacc6b64a50019afb.zip
[libFuzzer] initial implementation of -data_flow_trace. It parses the data flow trace and prints the summary, but doesn't use the information in any other way yet
llvm-svn: 334058
-rw-r--r--compiler-rt/lib/fuzzer/CMakeLists.txt1
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp90
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h40
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerDriver.cpp2
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerFlags.def2
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerIO.cpp8
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerIO.h2
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerInternal.h2
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerLoop.cpp1
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerOptions.h1
-rw-r--r--compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp8
-rwxr-xr-xcompiler-rt/lib/fuzzer/scripts/collect_data_flow.py25
-rw-r--r--compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp8
-rw-r--r--compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp4
-rw-r--r--compiler-rt/test/fuzzer/dataflow.test11
15 files changed, 202 insertions, 3 deletions
diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt
index 7e696de12f8..7ec0dd551d0 100644
--- a/compiler-rt/lib/fuzzer/CMakeLists.txt
+++ b/compiler-rt/lib/fuzzer/CMakeLists.txt
@@ -1,5 +1,6 @@
set(LIBFUZZER_SOURCES
FuzzerCrossOver.cpp
+ FuzzerDataFlowTrace.cpp
FuzzerDriver.cpp
FuzzerExtFunctionsDlsym.cpp
FuzzerExtFunctionsDlsymWin.cpp
diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
new file mode 100644
index 00000000000..69efd6f38b5
--- /dev/null
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
@@ -0,0 +1,90 @@
+//===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---*- C++ -* ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// fuzzer::DataFlowTrace
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerDataFlowTrace.h"
+#include "FuzzerIO.h"
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+#include <vector>
+
+namespace fuzzer {
+
+void DataFlowTrace::Init(const std::string &DirPath,
+ const std::string &FocusFunction) {
+ if (DirPath.empty()) return;
+ const char *kFunctionsTxt = "functions.txt";
+ Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str());
+ Vector<SizedFile> Files;
+ GetSizedFilesFromDir(DirPath, &Files);
+ std::string L;
+
+ // Read functions.txt
+ std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt));
+ size_t FocusFuncIdx = SIZE_MAX;
+ size_t NumFunctions = 0;
+ while (std::getline(IF, L, '\n')) {
+ NumFunctions++;
+ if (FocusFunction == L)
+ FocusFuncIdx = NumFunctions - 1;
+ }
+ if (!NumFunctions || FocusFuncIdx == SIZE_MAX || Files.size() <= 1)
+ return;
+ // Read traces.
+ size_t NumTraceFiles = 0;
+ size_t NumTracesWithFocusFunction = 0;
+ for (auto &SF : Files) {
+ auto Name = Basename(SF.File);
+ if (Name == kFunctionsTxt) continue;
+ auto ParseError = [&](const char *Err) {
+ Printf("DataFlowTrace: parse error: %s\n File: %s\n Line: %s\n", Err,
+ Name.c_str(), L.c_str());
+ };
+ NumTraceFiles++;
+ // Printf("=== %s\n", Name.c_str());
+ std::ifstream IF(SF.File);
+ while (std::getline(IF, L, '\n')) {
+ size_t SpacePos = L.find(' ');
+ if (SpacePos == std::string::npos)
+ return ParseError("no space in the trace line");
+ if (L.empty() || L[0] != 'F')
+ return ParseError("the trace line doesn't start with 'F'");
+ size_t N = std::atol(L.c_str() + 1);
+ if (N >= NumFunctions)
+ return ParseError("N is greater than the number of functions");
+ if (N == FocusFuncIdx) {
+ NumTracesWithFocusFunction++;
+ const char *Beg = L.c_str() + SpacePos + 1;
+ const char *End = L.c_str() + L.size();
+ assert(Beg < End);
+ size_t Len = End - Beg;
+ Vector<bool> V(Len);
+ for (size_t I = 0; I < Len; I++) {
+ if (Beg[I] != '0' && Beg[I] != '1')
+ ParseError("the trace should contain only 0 or 1");
+ V[I] = Beg[I] == '1';
+ }
+ // Print just a few small traces.
+ if (NumTracesWithFocusFunction <= 3 && Len <= 16)
+ Printf("%s => |%s|\n", Name.c_str(), L.c_str() + SpacePos + 1);
+ break; // No need to parse the following lines.
+ }
+ }
+ }
+ assert(NumTraceFiles == Files.size() - 1);
+ Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, "
+ "%zd traces with focus function\n",
+ NumTraceFiles, NumFunctions, NumTracesWithFocusFunction);
+}
+
+} // namespace fuzzer
+
diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
new file mode 100644
index 00000000000..2b7b71fdbfa
--- /dev/null
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
@@ -0,0 +1,40 @@
+//===- FuzzerDataFlowTrace.h - Internal header for the Fuzzer ---*- C++ -* ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// fuzzer::DataFlowTrace; reads and handles a data-flow trace.
+//
+// A data flow trace is generated by e.g. dataflow/DataFlow.cpp
+// and is stored on disk in a separate directory.
+//
+// The trace dir contains a file 'functions.txt' which lists function names,
+// oner per line, e.g.
+// ==> functions.txt <==
+// Func2
+// LLVMFuzzerTestOneInput
+// Func1
+//
+// All other files in the dir are the traces, see dataflow/DataFlow.cpp.
+// The name of the file is sha1 of the input used to generate the trace.
+//
+// Current status:
+// the data is parsed and the summary is printed, but the data is not yet
+// used in any other way.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_DATA_FLOW_TRACE
+#define LLVM_FUZZER_DATA_FLOW_TRACE
+
+#include "FuzzerDefs.h"
+
+namespace fuzzer {
+struct DataFlowTrace {
+ void Init(const std::string &DirPath, const std::string &FocusFunction);
+};
+} // namespace fuzzer
+
+#endif // LLVM_FUZZER_DATA_FLOW_TRACE
diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index dfb3d492ced..d7b95734991 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -623,6 +623,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Options.ExitOnItem = Flags.exit_on_item;
if (Flags.focus_function)
Options.FocusFunction = Flags.focus_function;
+ if (Flags.data_flow_trace)
+ Options.DataFlowTrace = Flags.data_flow_trace;
unsigned Seed = Flags.seed;
// Initialize Seed.
diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def
index 139e6187f3a..5be6d2641ba 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFlags.def
+++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def
@@ -153,3 +153,5 @@ FUZZER_DEPRECATED_FLAG(use_equivalence_server)
FUZZER_FLAG_INT(analyze_dict, 0, "Experimental")
FUZZER_DEPRECATED_FLAG(use_clang_coverage)
FUZZER_FLAG_INT(use_feature_frequency, 0, "Experimental/internal")
+
+FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace")
diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.cpp b/compiler-rt/lib/fuzzer/FuzzerIO.cpp
index dac5ec658f1..f3ead0ec535 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIO.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIO.cpp
@@ -100,6 +100,14 @@ std::string DirPlusFile(const std::string &DirPath,
return DirPath + GetSeparator() + FileName;
}
+std::string Basename(const std::string &Path, char Separator) {
+ size_t Pos = Path.rfind(Separator);
+ if (Pos == std::string::npos)
+ return Path;
+ assert(Pos < Path.size());
+ return Path.substr(Pos + 1);
+}
+
void DupAndCloseStderr() {
int OutputFd = DuplicateFile(2);
if (OutputFd > 0) {
diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h
index ea9f0d5a670..6d7757435b7 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIO.h
+++ b/compiler-rt/lib/fuzzer/FuzzerIO.h
@@ -67,6 +67,8 @@ struct SizedFile {
void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V);
char GetSeparator();
+// Similar to the basename utility: returns the file name w/o the dir prefix.
+std::string Basename(const std::string &Path, char Separator = GetSeparator());
FILE* OpenFile(int Fd, const char *Mode);
diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h
index 2b2638f1f8f..ec098a78f27 100644
--- a/compiler-rt/lib/fuzzer/FuzzerInternal.h
+++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h
@@ -12,6 +12,7 @@
#ifndef LLVM_FUZZER_INTERNAL_H
#define LLVM_FUZZER_INTERNAL_H
+#include "FuzzerDataFlowTrace.h"
#include "FuzzerDefs.h"
#include "FuzzerExtFunctions.h"
#include "FuzzerInterface.h"
@@ -134,6 +135,7 @@ private:
InputCorpus &Corpus;
MutationDispatcher &MD;
FuzzingOptions Options;
+ DataFlowTrace DFT;
system_clock::time_point ProcessStartTime = system_clock::now();
system_clock::time_point UnitStartTime, UnitStopTime;
diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
index 9c19ba91320..27bd5ee6551 100644
--- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -160,6 +160,7 @@ Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
CurrentUnitSize = 0;
memset(BaseSha1, 0, sizeof(BaseSha1));
TPC.SetFocusFunction(Options.FocusFunction);
+ DFT.Init(Options.DataFlowTrace, Options.FocusFunction);
}
Fuzzer::~Fuzzer() {}
diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h
index 946f0b9d60b..7a52d362451 100644
--- a/compiler-rt/lib/fuzzer/FuzzerOptions.h
+++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h
@@ -46,6 +46,7 @@ struct FuzzingOptions {
std::string ExitOnSrcPos;
std::string ExitOnItem;
std::string FocusFunction;
+ std::string DataFlowTrace;
bool SaveArtifacts = true;
bool PrintNEW = true; // Print a status line when new units are found;
bool PrintNewCovPcs = false;
diff --git a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp
index 99863074d72..a79c796ac45 100644
--- a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp
+++ b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp
@@ -69,6 +69,7 @@ static const uintptr_t *FuncsBeg;
static __thread size_t CurrentFunc;
static dfsan_label *FuncLabels; // Array of NumFuncs elements.
static char *PrintableStringForLabel; // InputLen + 2 bytes.
+static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
// Prints all instrumented functions.
static int PrintFunctions() {
@@ -89,7 +90,11 @@ static int PrintFunctions() {
return 0;
}
-static void SetBytesForLabel(dfsan_label L, char *Bytes) {
+extern "C"
+void SetBytesForLabel(dfsan_label L, char *Bytes) {
+ if (LabelSeen[L])
+ return;
+ LabelSeen[L] = true;
assert(L);
if (L <= InputLen + 1) {
Bytes[L - 1] = '1';
@@ -103,6 +108,7 @@ static void SetBytesForLabel(dfsan_label L, char *Bytes) {
static char *GetPrintableStringForLabel(dfsan_label L) {
memset(PrintableStringForLabel, '0', InputLen + 1);
PrintableStringForLabel[InputLen + 1] = 0;
+ memset(LabelSeen, 0, sizeof(LabelSeen));
SetBytesForLabel(L, PrintableStringForLabel);
return PrintableStringForLabel;
}
diff --git a/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py
index d13f6dcc411..c3faf71c0af 100755
--- a/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py
+++ b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py
@@ -11,9 +11,15 @@
# the complete trace for all input bytes (running it on all bytes at once
# may fail if DFSan runs out of labels).
# Usage:
-# collect_data_flow.py BINARY INPUT [RESULT]
+#
+# # Collect dataflow for one input, store it in OUTPUT (default is stdout)
+# collect_data_flow.py BINARY INPUT [OUTPUT]
+#
+# # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
+# collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
#===------------------------------------------------------------------------===#
import atexit
+import hashlib
import sys
import os
import subprocess
@@ -26,9 +32,26 @@ def cleanup(d):
print "removing: ", d
shutil.rmtree(d)
+def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
+ print "Collecting dataflow for corpus:", corpus_dir, \
+ "output_dir:", output_dir
+ assert not os.path.exists(output_dir)
+ os.mkdir(output_dir)
+ for root, dirs, files in os.walk(corpus_dir):
+ for f in files:
+ path = os.path.join(root, f)
+ sha1 = hashlib.sha1(open(path).read()).hexdigest()
+ output = os.path.join(output_dir, sha1)
+ subprocess.call([self, exe, path, output])
+ functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
+ subprocess.call([exe], stdout=functions_txt)
+
+
def main(argv):
exe = argv[1]
inp = argv[2]
+ if os.path.isdir(inp):
+ return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
size = os.path.getsize(inp)
q = [[0, size]]
tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
index a38a45344e9..0b8673876a9 100644
--- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
+++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
@@ -28,6 +28,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
abort();
}
+TEST(Fuzzer, Basename) {
+ EXPECT_EQ(Basename("foo/bar"), "bar");
+ EXPECT_EQ(Basename("bar"), "bar");
+ EXPECT_EQ(Basename("/bar"), "bar");
+ EXPECT_EQ(Basename("foo/x"), "x");
+ EXPECT_EQ(Basename("foo/"), "");
+}
+
TEST(Fuzzer, CrossOver) {
std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
fuzzer::EF = t.get();
diff --git a/compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp b/compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp
index 0ff682abc95..1278cb05633 100644
--- a/compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp
+++ b/compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp
@@ -8,12 +8,14 @@
#include <cstdlib>
#include <cstdio>
+extern "C"
__attribute__((noinline))
-static bool Func1(const uint8_t *Data, size_t Size) {
+bool Func1(const uint8_t *Data, size_t Size) {
// assumes Size >= 5, doesn't check it.
return Data[4] == 'M';
}
+extern "C"
__attribute__((noinline))
bool Func2(const uint8_t *Data, size_t Size) {
return Size >= 6 && Data[5] == 'E';
diff --git a/compiler-rt/test/fuzzer/dataflow.test b/compiler-rt/test/fuzzer/dataflow.test
index 7162b06f6d2..7b85c6f9e45 100644
--- a/compiler-rt/test/fuzzer/dataflow.test
+++ b/compiler-rt/test/fuzzer/dataflow.test
@@ -5,6 +5,7 @@ REQUIRES: linux
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o %t-DataFlow.o
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %S/ThreeFunctionsTest.cpp %t-DataFlow.o -o %t-ThreeFunctionsTestDF
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %S/ExplodeDFSanLabelsTest.cpp %t-DataFlow.o -o %t-ExplodeDFSanLabelsTestDF
+RUN: %cpp_compiler %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest
# Dump the function list.
RUN: %t-ThreeFunctionsTestDF 2>&1 | FileCheck %s --check-prefix=FUNC_LIST
@@ -70,3 +71,13 @@ RUN: %t-ExplodeDFSanLabelsTestDF 2 4 %t/IN/1234567890123456
RUN: %t-ExplodeDFSanLabelsTestDF 4 6 %t/IN/1234567890123456
# Or we can use collect_data_flow
RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ExplodeDFSanLabelsTestDF %t/IN/1234567890123456
+
+# Test that we can run collect_data_flow on the entire corpus dir
+RUN: rm -rf %t/OUT
+RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN %t/OUT
+RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
+USE_DATA_FLOW_TRACE: INFO: Focus function is set to 'Func2'
+USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT
+USE_DATA_FLOW_TRACE-DAG: a8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001|
+USE_DATA_FLOW_TRACE-DGA: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011|
+USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function
OpenPOWER on IntegriCloud