diff options
| author | Max Moroz <mmoroz@chromium.org> | 2019-04-12 21:00:12 +0000 |
|---|---|---|
| committer | Max Moroz <mmoroz@chromium.org> | 2019-04-12 21:00:12 +0000 |
| commit | b6e6d3c740a4b94a64ad62745a18571f1a9cb3cb (patch) | |
| tree | b157a324eb0d9219f0c09b16245c0297c48321c0 /compiler-rt | |
| parent | 9e27514996e7dd183cd84b59656ac64b7514776f (diff) | |
| download | bcm5719-llvm-b6e6d3c740a4b94a64ad62745a18571f1a9cb3cb.tar.gz bcm5719-llvm-b6e6d3c740a4b94a64ad62745a18571f1a9cb3cb.zip | |
[libFuzzer] Fix DataFlow.cpp logic when tracing long inputs.
Summary:
1. Do not create DFSan labels for the bytes which we do not trace. This is where we run out of labels at the first place.
2. When dumping the traces on the disk, make sure to offset the label identifiers by the number of the first byte in the trace range.
3. For the last label, make sure to write it at the last position of the trace bit string, as that label represents the input size, not any particular byte.
Also fixed the bug with division in python which I've introduced when migrated the scripts to Python3 (`//` is required for integral division).
Otherwise, the scripts are wasting too much time unsuccessfully trying to
collect and process traces from the long inputs. For more context, see
https://github.com/google/oss-fuzz/issues/1632#issuecomment-481761789
Reviewers: kcc
Reviewed By: kcc
Subscribers: delcypher, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D60538
llvm-svn: 358311
Diffstat (limited to 'compiler-rt')
| -rw-r--r-- | compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp | 25 | ||||
| -rwxr-xr-x | compiler-rt/lib/fuzzer/scripts/collect_data_flow.py | 4 | ||||
| -rw-r--r-- | compiler-rt/test/fuzzer/dataflow.test | 11 |
3 files changed, 29 insertions, 11 deletions
diff --git a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp index 742e956e784..187a8e52cd5 100644 --- a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp +++ b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp @@ -63,6 +63,9 @@ __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); } // extern "C" static size_t InputLen; +static size_t InputLabelBeg; +static size_t InputLabelEnd; +static size_t InputSizeLabel; static size_t NumFuncs; static const uintptr_t *FuncsBeg; static __thread size_t CurrentFunc; @@ -95,8 +98,10 @@ void SetBytesForLabel(dfsan_label L, char *Bytes) { return; LabelSeen[L] = true; assert(L); - if (L <= InputLen + 1) { - Bytes[L - 1] = '1'; + if (L < InputSizeLabel) { + Bytes[L + InputLabelBeg - 1] = '1'; + } else if (L == InputSizeLabel) { + Bytes[InputLen] = '1'; } else { auto *DLI = dfsan_get_label_info(L); SetBytesForLabel(DLI->l1, Bytes); @@ -124,9 +129,9 @@ int main(int argc, char **argv) { if (argc == 1) return PrintFunctions(); assert(argc == 4 || argc == 5); - size_t Beg = atoi(argv[1]); - size_t End = atoi(argv[2]); - assert(Beg < End); + InputLabelBeg = atoi(argv[1]); + InputLabelEnd = atoi(argv[2]); + assert(InputLabelBeg < InputLabelEnd); const char *Input = argv[3]; fprintf(stderr, "INFO: reading '%s'\n", Input); @@ -143,14 +148,16 @@ int main(int argc, char **argv) { fprintf(stderr, "INFO: running '%s'\n", Input); for (size_t I = 1; I <= InputLen; I++) { - dfsan_label L = dfsan_create_label("", nullptr); - assert(L == I); size_t Idx = I - 1; - if (Idx >= Beg && Idx < End) + if (Idx >= InputLabelBeg && Idx < InputLabelEnd) { + dfsan_label L = dfsan_create_label("", nullptr); + assert(L == I - InputLabelBeg); dfsan_set_label(L, Buf + Idx, 1); + } } dfsan_label SizeL = dfsan_create_label("", nullptr); - assert(SizeL == InputLen + 1); + InputSizeLabel = SizeL; + assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1); dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); LLVMFuzzerTestOneInput(Buf, InputLen); diff --git a/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py index e8b56a71910..bd601eb63ab 100755 --- a/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py +++ b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py @@ -65,8 +65,8 @@ def main(argv): tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1])) ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile]) if ret and r[1] - r[0] >= 2: - q.append([r[0], (r[1] + r[0]) / 2]) - q.append([(r[1] + r[0]) / 2, r[1]]) + q.append([r[0], (r[1] + r[0]) // 2]) + q.append([(r[1] + r[0]) // 2, r[1]]) else: outputs.append(tmpfile) print("******* Success: ", r) diff --git a/compiler-rt/test/fuzzer/dataflow.test b/compiler-rt/test/fuzzer/dataflow.test index 64f083735cb..36ec55881e8 100644 --- a/compiler-rt/test/fuzzer/dataflow.test +++ b/compiler-rt/test/fuzzer/dataflow.test @@ -82,3 +82,14 @@ USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT USE_DATA_FLOW_TRACE-DAG: a8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001| USE_DATA_FLOW_TRACE-DGA: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011| USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function + +# Test that we can run collect_data_flow on a long input (>2**16 bytes) +RUN: rm -rf %t/OUT +RUN: printf "%0.sA" {1..150001} > %t/IN/very_long_input +RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/very_long_input %t/OUT | FileCheck %s --check-prefix=COLLECT_TRACE_FOR_LONG_INPUT +RUN: rm %t/IN/very_long_input +COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[0, 150001] +COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[75000, 150001] +COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[112500, 150001] +COLLECT_TRACE_FOR_LONG_INPUT: ******* Success:{{[ ]+}}[{{[0123456789]+}}, 150001] +COLLECT_TRACE_FOR_LONG_INPUT: ******* Success:{{[ ]+}}[0, {{[0123456789]+}}] |

