summaryrefslogtreecommitdiffstats
path: root/compiler-rt/lib/esan/working_set.cpp
blob: dcd68373706bdcf15cd7830c07a634d7ce52b579 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
//===-- working_set.cpp ---------------------------------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of EfficiencySanitizer, a family of performance tuners.
//
// This file contains working-set-specific code.
//===----------------------------------------------------------------------===//

#include "working_set.h"
#include "esan.h"
#include "esan_flags.h"
#include "esan_shadow.h"
#include "sanitizer_common/sanitizer_procmaps.h"

// We shadow every cache line of app memory with one shadow byte.
// - The highest bit of each shadow byte indicates whether the corresponding
//   cache line has ever been accessed.
// - The lowest bit of each shadow byte indicates whether the corresponding
//   cache line was accessed since the last sample.
// - The other bits can be used either for a single working set snapshot
//   between two consecutive samples, or an aggregate working set snapshot
//   over multiple sample periods (future work).
// We live with races in accessing each shadow byte.
typedef unsigned char byte;

namespace __esan {

// Our shadow memory assumes that the line size is 64.
static const u32 CacheLineSize = 64;

// See the shadow byte layout description above.
static const u32 TotalWorkingSetBitIdx = 7;
static const u32 CurWorkingSetBitIdx = 0;
static const byte ShadowAccessedVal =
  (1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx);

void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,
                                  bool IsWrite) {
  if (Size == 0)
    return;
  SIZE_T I = 0;
  uptr LineSize = getFlags()->cache_line_size;
  // As Addr+Size could overflow at the top of a 32-bit address space,
  // we avoid the simpler formula that rounds the start and end.
  SIZE_T NumLines = Size / LineSize +
    // Add any extra at the start or end adding on an extra line:
    (LineSize - 1 + Addr % LineSize + Size % LineSize) / LineSize;
  byte *Shadow = (byte *)appToShadow(Addr);
  // Write shadow bytes until we're word-aligned.
  while (I < NumLines && (uptr)Shadow % 4 != 0) {
    if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal)
      *Shadow |= ShadowAccessedVal;
    ++Shadow;
    ++I;
  }
  // Write whole shadow words at a time.
  // Using a word-stride loop improves the runtime of a microbenchmark of
  // memset calls by 10%.
  u32 WordValue = ShadowAccessedVal | ShadowAccessedVal << 8 |
    ShadowAccessedVal << 16 | ShadowAccessedVal << 24;
  while (I + 4 <= NumLines) {
    if ((*(u32*)Shadow & WordValue) != WordValue)
      *(u32*)Shadow |= WordValue;
    Shadow += 4;
    I += 4;
  }
  // Write any trailing shadow bytes.
  while (I < NumLines) {
    if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal)
      *Shadow |= ShadowAccessedVal;
    ++Shadow;
    ++I;
  }
}

// This routine will word-align ShadowStart and ShadowEnd prior to scanning.
static u32 countAndClearShadowValues(u32 BitIdx, uptr ShadowStart,
                                     uptr ShadowEnd) {
  u32 WorkingSetSize = 0;
  u32 ByteValue = 0x1 << BitIdx;
  u32 WordValue = ByteValue | ByteValue << 8 | ByteValue << 16 |
    ByteValue << 24;
  // Get word aligned start.
  ShadowStart = RoundDownTo(ShadowStart, sizeof(u32));
  for (u32 *Ptr = (u32 *)ShadowStart; Ptr < (u32 *)ShadowEnd; ++Ptr) {
    if ((*Ptr & WordValue) != 0) {
      byte *BytePtr = (byte *)Ptr;
      for (u32 j = 0; j < sizeof(u32); ++j) {
        if (BytePtr[j] & ByteValue) {
          ++WorkingSetSize;
          // TODO: Accumulate to the lower-frequency bit to the left.
        }
      }
      // Clear this bit from every shadow byte.
      *Ptr &= ~WordValue;
    }
  }
  return WorkingSetSize;
}

// Scan shadow memory to calculate the number of cache lines being accessed,
// i.e., the number of non-zero bits indexed by BitIdx in each shadow byte.
// We also clear the lowest bits (most recent working set snapshot).
static u32 computeWorkingSizeAndReset(u32 BitIdx) {
  u32 WorkingSetSize = 0;
  MemoryMappingLayout MemIter(true/*cache*/);
  uptr Start, End, Prot;
  while (MemIter.Next(&Start, &End, nullptr/*offs*/, nullptr/*file*/,
                      0/*file size*/, &Prot)) {
    VPrintf(4, "%s: considering %p-%p app=%d shadow=%d prot=%u\n",
            __FUNCTION__, Start, End, Prot, isAppMem(Start),
            isShadowMem(Start));
    if (isShadowMem(Start) && (Prot & MemoryMappingLayout::kProtectionWrite)) {
      VPrintf(3, "%s: walking %p-%p\n", __FUNCTION__, Start, End);
      WorkingSetSize += countAndClearShadowValues(BitIdx, Start, End);
    }
  }
  return WorkingSetSize;
}

void initializeWorkingSet() {
  CHECK(getFlags()->cache_line_size == CacheLineSize);
  registerMemoryFaultHandler();
}

static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) {
  // We need a constant to avoid software divide support:
  static const u32 KilobyteCachelines = (0x1 << 10) / CacheLineSize;
  static const u32 MegabyteCachelines = KilobyteCachelines << 10;

  if (NumOfCachelines > 10 * MegabyteCachelines) {
    Unit = "MB";
    return NumOfCachelines / MegabyteCachelines;
  } else if (NumOfCachelines > 10 * KilobyteCachelines) {
    Unit = "KB";
    return NumOfCachelines / KilobyteCachelines;
  } else {
    Unit = "Bytes";
    return NumOfCachelines * CacheLineSize;
  }
}

int finalizeWorkingSet() {
  // Get the working set size for the entire execution.
  u32 NumOfCachelines = computeWorkingSizeAndReset(TotalWorkingSetBitIdx);
  const char *Unit;
  u32 Size = getSizeForPrinting(NumOfCachelines, Unit);
  Report(" %s: the total working set size: %u %s (%u cache lines)\n",
         SanitizerToolName, Size, Unit, NumOfCachelines);
  return 0;
}

} // namespace __esan
OpenPOWER on IntegriCloud