diff options
Diffstat (limited to 'compiler-rt')
| -rw-r--r-- | compiler-rt/lib/esan/esan_circular_buffer.h | 6 | ||||
| -rw-r--r-- | compiler-rt/lib/esan/esan_flags.inc | 6 | ||||
| -rw-r--r-- | compiler-rt/lib/esan/working_set.cpp | 97 | ||||
| -rw-r--r-- | compiler-rt/test/esan/TestCases/workingset-samples.cpp | 19 | 
4 files changed, 112 insertions, 16 deletions
| diff --git a/compiler-rt/lib/esan/esan_circular_buffer.h b/compiler-rt/lib/esan/esan_circular_buffer.h index 98891109cb9..9ce102d04d8 100644 --- a/compiler-rt/lib/esan/esan_circular_buffer.h +++ b/compiler-rt/lib/esan/esan_circular_buffer.h @@ -28,9 +28,11 @@ class CircularBuffer {    explicit CircularBuffer() {}    CircularBuffer(uptr BufferCapacity) {      initialize(BufferCapacity); +    WasConstructed = true;    }    ~CircularBuffer() { -    free(); +    if (WasConstructed) // Else caller will call free() explicitly. +      free();    }    void initialize(uptr BufferCapacity) {      Capacity = BufferCapacity; @@ -38,6 +40,7 @@ class CircularBuffer {      Data = (T *)MmapOrDie(Capacity * sizeof(T), "CircularBuffer");      StartIdx = 0;      Count = 0; +    WasConstructed = false;    }    void free() {      UnmapOrDie(Data, Capacity * sizeof(T)); @@ -83,6 +86,7 @@ class CircularBuffer {    CircularBuffer(const CircularBuffer&);    void operator=(const CircularBuffer&); +  bool WasConstructed;    T *Data;    uptr Capacity;    uptr StartIdx; diff --git a/compiler-rt/lib/esan/esan_flags.inc b/compiler-rt/lib/esan/esan_flags.inc index f8a95267f91..eb9d0e5a903 100644 --- a/compiler-rt/lib/esan/esan_flags.inc +++ b/compiler-rt/lib/esan/esan_flags.inc @@ -39,3 +39,9 @@ ESAN_FLAG(bool, record_snapshots, true,  // To disable samples, turn off record_snapshots.  ESAN_FLAG(int, sample_freq, 20,            "Working set tool: sampling frequency in milliseconds.") + +// This controls the difference in frequency between each successive series +// of snapshots.  There are 8 in total, with number 0 using sample_freq. +// Number N samples number N-1 every (1 << snapshot_step) instance of N-1. +ESAN_FLAG(int, snapshot_step, 2, "Working set tool: the log of the sampling " +          "performed for the next-higher-frequency snapshot series.") diff --git a/compiler-rt/lib/esan/working_set.cpp b/compiler-rt/lib/esan/working_set.cpp index f4ed8a17e8b..9e7520f492e 100644 --- a/compiler-rt/lib/esan/working_set.cpp +++ b/compiler-rt/lib/esan/working_set.cpp @@ -14,6 +14,7 @@  #include "working_set.h"  #include "esan.h" +#include "esan_circular_buffer.h"  #include "esan_flags.h"  #include "esan_shadow.h"  #include "esan_sideline.h" @@ -24,9 +25,15 @@  //   cache line has ever been accessed.  // - The lowest bit of each shadow byte indicates whether the corresponding  //   cache line was accessed since the last sample. -// - The other bits can be used either for a single working set snapshot -//   between two consecutive samples, or an aggregate working set snapshot -//   over multiple sample periods (future work). +// - The other bits are used for working set snapshots at successively +//   lower frequencies, each bit to the left from the lowest bit stepping +//   down the frequency by 2 to the power of getFlags()->snapshot_step. +// Thus we have something like this: +//   Bit 0: Since last sample +//   Bit 1: Since last 2^2 samples +//   Bit 2: Since last 2^4 samples +//   Bit 3: ... +//   Bit 7: Ever accessed.  // We live with races in accessing each shadow byte.  typedef unsigned char byte; @@ -37,6 +44,10 @@ static const u32 CacheLineSize = 64;  // See the shadow byte layout description above.  static const u32 TotalWorkingSetBitIdx = 7; +// We accumulate to the left until we hit this bit. +// We don't need to accumulate to the final bit as it's set on each ref +// by the compiler instrumentation. +static const u32 MaxAccumBitIdx = 6;  static const u32 CurWorkingSetBitIdx = 0;  static const byte ShadowAccessedVal =    (1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx); @@ -47,6 +58,26 @@ static SidelineThread Thread;  // may want to consider a 64-bit int.  static u32 SnapshotNum; +// We store the wset size for each of 8 different sampling frequencies. +static const u32 NumFreq = 8; // One for each bit of our shadow bytes. +// We cannot use static objects as the global destructor is called +// prior to our finalize routine. +// These are each circular buffers, sized up front. +CircularBuffer<u32> SizePerFreq[NumFreq]; +// We cannot rely on static initializers (they may run too late) but +// we record the size here for clarity: +u32 CircularBufferSizes[NumFreq] = { +  // These are each mmap-ed so our minimum is one page. +  32*1024, +  16*1024, +  8*1024, +  4*1024, +  4*1024, +  4*1024, +  4*1024, +  4*1024, +}; +  void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,                                    bool IsWrite) {    if (Size == 0) @@ -95,13 +126,17 @@ static u32 countAndClearShadowValues(u32 BitIdx, uptr ShadowStart,      ByteValue << 24;    // Get word aligned start.    ShadowStart = RoundDownTo(ShadowStart, sizeof(u32)); +  bool Accum = getFlags()->record_snapshots && BitIdx < MaxAccumBitIdx;    for (u32 *Ptr = (u32 *)ShadowStart; Ptr < (u32 *)ShadowEnd; ++Ptr) {      if ((*Ptr & WordValue) != 0) {        byte *BytePtr = (byte *)Ptr;        for (u32 j = 0; j < sizeof(u32); ++j) {          if (BytePtr[j] & ByteValue) {            ++WorkingSetSize; -          // TODO: Accumulate to the lower-frequency bit to the left. +          if (Accum) { +            // Accumulate to the lower-frequency bit to the left. +            BytePtr[j] |= (ByteValue << 1); +          }          }        }        // Clear this bit from every shadow byte. @@ -134,19 +169,41 @@ static u32 computeWorkingSizeAndReset(u32 BitIdx) {  // This is invoked from a signal handler but in a sideline thread doing nothing  // else so it is a little less fragile than a typical signal handler.  static void takeSample(void *Arg) { -  // FIXME: record the size and report at process end.  For now this simply -  // serves as a test of the sideline thread functionality. -  VReport(1, "%s: snapshot #%d: %u\n", SanitizerToolName, SnapshotNum, -          computeWorkingSizeAndReset(CurWorkingSetBitIdx)); -  ++SnapshotNum; +  u32 BitIdx = CurWorkingSetBitIdx; +  u32 Freq = 1; +  ++SnapshotNum; // Simpler to skip 0 whose mod matches everything. +  while (BitIdx <= MaxAccumBitIdx && (SnapshotNum % Freq) == 0) { +    u32 NumLines = computeWorkingSizeAndReset(BitIdx); +    VReport(1, "%s: snapshot #%5d bit %d freq %4d: %8u\n", SanitizerToolName, +            SnapshotNum, BitIdx, Freq, NumLines); +    SizePerFreq[BitIdx].push_back(NumLines); +    Freq = Freq << getFlags()->snapshot_step; +    BitIdx++; +  }  }  void initializeWorkingSet() {    CHECK(getFlags()->cache_line_size == CacheLineSize);    registerMemoryFaultHandler(); -  if (getFlags()->record_snapshots) +  if (getFlags()->record_snapshots) { +    for (u32 i = 0; i < NumFreq; ++i) +      SizePerFreq[i].initialize(CircularBufferSizes[i]);      Thread.launchThread(takeSample, nullptr, getFlags()->sample_freq); +  } +} + +static u32 getPeriodForPrinting(u32 MilliSec, const char *&Unit) { +  if (MilliSec > 600000) { +    Unit = "min"; +    return MilliSec / 60000; +  } else if (MilliSec > 10000) { +    Unit = "sec"; +    return MilliSec / 1000; +  } else { +    Unit = "ms"; +    return MilliSec; +  }  }  static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) { @@ -167,12 +224,28 @@ static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) {  }  int finalizeWorkingSet() { -  if (getFlags()->record_snapshots) +  const char *Unit; +  if (getFlags()->record_snapshots) {      Thread.joinThread(); +    u32 Freq = 1; +    Report(" Total number of samples: %u\n", SnapshotNum); +    for (u32 i = 0; i < NumFreq; ++i) { +      u32 Time = getPeriodForPrinting(getFlags()->sample_freq*Freq, Unit); +      Report(" Samples array #%d at period %u %s\n", i, Time, Unit); +      // FIXME: report whether we wrapped around and thus whether we +      // have data on the whole run or just the last N samples. +      for (u32 j = 0; j < SizePerFreq[i].size(); ++j) { +        u32 Size = getSizeForPrinting(SizePerFreq[i][j], Unit); +        Report("#%4d: %8u %s (%9u cache lines)\n", j, Size, Unit, +               SizePerFreq[i][j]); +      } +      Freq = Freq << getFlags()->snapshot_step; +      SizePerFreq[i].free(); +    } +  }    // Get the working set size for the entire execution.    u32 NumOfCachelines = computeWorkingSizeAndReset(TotalWorkingSetBitIdx); -  const char *Unit;    u32 Size = getSizeForPrinting(NumOfCachelines, Unit);    Report(" %s: the total working set size: %u %s (%u cache lines)\n",           SanitizerToolName, Size, Unit, NumOfCachelines); diff --git a/compiler-rt/test/esan/TestCases/workingset-samples.cpp b/compiler-rt/test/esan/TestCases/workingset-samples.cpp index cb0ce01e878..6ca1a2ccc23 100644 --- a/compiler-rt/test/esan/TestCases/workingset-samples.cpp +++ b/compiler-rt/test/esan/TestCases/workingset-samples.cpp @@ -1,5 +1,5 @@  // RUN: %clang_esan_wset -O0 %s -o %t 2>&1 -// RUN: %env_esan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s +// RUN: %run %t 2>&1 | FileCheck %s  #include <sched.h>  #include <stdlib.h> @@ -19,8 +19,21 @@ int main(int argc, char **argv) {    for (int i = 0; i < size; ++i)      buf[i] = i;    munmap(buf, size); -  // CHECK:      {{.*}}EfficiencySanitizer: snapshot {{.*}} -  // CHECK-NEXT: {{.*}}EfficiencySanitizer: snapshot {{.*}} +  // We only check for a few samples here to reduce the chance of flakiness. +  // CHECK:      =={{[0-9]+}}== Total number of samples: {{[0-9]+}} +  // CHECK-NEXT: =={{[0-9]+}}== Samples array #0 at period 20 ms +  // CHECK-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines) +  // CHECK-NEXT: =={{[0-9]+}}==#   1: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines) +  // CHECK-NEXT: =={{[0-9]+}}==#   2: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines) +  // CHECK-NEXT: =={{[0-9]+}}==#   3: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines) +  // CHECK:      =={{[0-9]+}}== Samples array #1 at period 80 ms +  // CHECK-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines) +  // CHECK:      =={{[0-9]+}}== Samples array #2 at period 320 ms +  // CHECK:      =={{[0-9]+}}== Samples array #3 at period 1280 ms +  // CHECK:      =={{[0-9]+}}== Samples array #4 at period 5120 ms +  // CHECK:      =={{[0-9]+}}== Samples array #5 at period 20 sec +  // CHECK:      =={{[0-9]+}}== Samples array #6 at period 81 sec +  // CHECK:      =={{[0-9]+}}== Samples array #7 at period 327 sec    // CHECK: {{.*}} EfficiencySanitizer: the total working set size: 32 MB (5242{{[0-9][0-9]}} cache lines)    return 0;  } | 

