summaryrefslogtreecommitdiffstats
path: root/compiler-rt
diff options
context:
space:
mode:
Diffstat (limited to 'compiler-rt')
-rw-r--r--compiler-rt/lib/profile/InstrProfiling.c175
-rw-r--r--compiler-rt/test/profile/instrprof-value-prof.c183
2 files changed, 299 insertions, 59 deletions
diff --git a/compiler-rt/lib/profile/InstrProfiling.c b/compiler-rt/lib/profile/InstrProfiling.c
index 58097f7d3f5..216452953e1 100644
--- a/compiler-rt/lib/profile/InstrProfiling.c
+++ b/compiler-rt/lib/profile/InstrProfiling.c
@@ -9,8 +9,19 @@
#include "InstrProfiling.h"
#include <limits.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#define INSTR_PROF_VALUE_PROF_DATA
+#define INSTR_PROF_COMMON_API_IMPL
+#include "InstrProfData.inc"
+
+#define PROF_OOM(Msg) PROF_ERR(Msg ":%s\n", "Out of memory");
+#define PROF_OOM_RETURN(Msg) \
+ { \
+ PROF_OOM(Msg) \
+ return 0; \
+ }
LLVM_LIBRARY_VISIBILITY uint64_t __llvm_profile_get_magic(void) {
return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64)
@@ -60,20 +71,29 @@ LLVM_LIBRARY_VISIBILITY void __llvm_profile_reset_counters(void) {
}
}
-/* Total number of value profile data in bytes. */
-static uint64_t TotalValueDataSize = 0;
-
-#ifdef _MIPS_ARCH
+/* This method is only used in value profiler mock testing. */
LLVM_LIBRARY_VISIBILITY void
-__llvm_profile_instrument_target(uint64_t TargetValue, void *Data_,
- uint32_t CounterIndex) {}
+__llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
+ uint32_t ValueKind, uint16_t NumValueSites) {
+ *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
+}
-#else
+/* This method is only used in value profiler mock testing. */
+LLVM_LIBRARY_VISIBILITY const __llvm_profile_data *
+__llvm_profile_iterate_data(const __llvm_profile_data *Data) {
+ return Data + 1;
+}
+
+/* This method is only used in value profiler mock testing. */
+LLVM_LIBRARY_VISIBILITY void *
+__llvm_get_function_addr(const __llvm_profile_data *Data) {
+ return Data->FunctionPointer;
+}
/* Allocate an array that holds the pointers to the linked lists of
* value profile counter nodes. The number of element of the array
* is the total number of value profile sites instrumented. Returns
- * 0 if allocation fails.
+ * 0 if allocation fails.
*/
static int allocateValueProfileCounters(__llvm_profile_data *Data) {
@@ -90,16 +110,27 @@ static int allocateValueProfileCounters(__llvm_profile_data *Data) {
free(Mem);
return 0;
}
- /* In the raw format, there will be an value count array preceding
- * the value profile data. The element type of the array is uint8_t,
- * and there is one element in array per value site. The element
- * stores the number of values profiled for the corresponding site.
- */
- uint8_t Padding = __llvm_profile_get_num_padding_bytes(NumVSites);
- __sync_fetch_and_add(&TotalValueDataSize, NumVSites + Padding);
return 1;
}
+static void deallocateValueProfileCounters(__llvm_profile_data *Data) {
+ uint64_t NumVSites = 0, I;
+ uint32_t VKI;
+ if (!Data->Values)
+ return;
+ for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
+ NumVSites += Data->NumValueSites[VKI];
+ for (I = 0; I < NumVSites; I++) {
+ ValueProfNode *Node = ((ValueProfNode **)Data->Values)[I];
+ while (Node) {
+ ValueProfNode *Next = Node->Next;
+ free(Node);
+ Node = Next;
+ }
+ }
+ free(Data->Values);
+}
+
LLVM_LIBRARY_VISIBILITY void
__llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
uint32_t CounterIndex) {
@@ -149,61 +180,87 @@ __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
free(CurrentVNode);
return;
}
- __sync_fetch_and_add(&TotalValueDataSize, Success * sizeof(ValueProfNode));
}
-#endif
+
+/* For multi-threaded programs, while the profile is being dumped, other
+ threads may still be updating the value profile data and creating new
+ value entries. To accommadate this, we need to add extra bytes to the
+ data buffer. The size of the extra space is controlled by an environment
+ varaible. */
+static unsigned getVprofExtraBytes() {
+ const char *ExtraStr = getenv("LLVM_VALUE_PROF_BUFFER_EXTRA");
+ if (!ExtraStr || !ExtraStr[0])
+ return 1024;
+ return (unsigned)atoi(ExtraStr);
+}
LLVM_LIBRARY_VISIBILITY uint64_t
__llvm_profile_gather_value_data(uint8_t **VDataArray) {
+ size_t S = 0, RealSize = 0, BufferCapacity = 0, Extra = 0;
+ __llvm_profile_data *I;
+ if (!VDataArray)
+ PROF_OOM_RETURN("Failed to write value profile data ");
- if (!VDataArray || 0 == TotalValueDataSize)
- return 0;
-
- uint64_t NumData = TotalValueDataSize;
- *VDataArray = (uint8_t *)calloc(NumData, sizeof(uint8_t));
- if (!*VDataArray)
- return 0;
-
- uint8_t *VDataEnd = *VDataArray + NumData;
- uint8_t *PerSiteCountsHead = *VDataArray;
const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
- __llvm_profile_data *I;
- for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) {
- uint64_t NumVSites = 0;
- uint32_t VKI, i;
+ /*
+ * Compute the total Size of the buffer to hold ValueProfData
+ * structures for functions with value profile data.
+ */
+ for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) {
+ ValueProfRuntimeRecord R;
+ /* Extract the value profile data info from the runtime. */
+ if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values))
+ PROF_OOM_RETURN("Failed to write value profile data ");
+ /* Compute the size of ValueProfData from this runtime record. */
+ if (getNumValueKindsRT(&R) != 0)
+ S += getValueProfDataSizeRT(&R);
+ finalizeValueProfRuntimeRecord(&R);
+ }
+ /* No value sites or no value profile data is collected. */
+ if (!S)
+ return 0;
- if (!I->Values)
+ Extra = getVprofExtraBytes();
+ BufferCapacity = S + Extra;
+ *VDataArray = calloc(BufferCapacity, sizeof(uint8_t));
+ if (!*VDataArray)
+ PROF_OOM_RETURN("Failed to write value profile data ");
+
+ ValueProfData *VD = (ValueProfData *)(*VDataArray);
+ /*
+ * Extract value profile data and write into ValueProfData structure
+ * one by one. Note that new value profile data added to any value
+ * site (from another thread) after the ValueProfRuntimeRecord is
+ * initialized (when the profile data snapshot is taken) won't be
+ * collected. This is not a problem as those dropped value will have
+ * very low taken count.
+ */
+ for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) {
+ ValueProfRuntimeRecord R;
+ if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values))
+ PROF_OOM_RETURN("Failed to write value profile data ");
+ if (getNumValueKindsRT(&R) == 0)
continue;
- ValueProfNode **ValueCounters = (ValueProfNode **)I->Values;
-
- for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
- NumVSites += I->NumValueSites[VKI];
- uint8_t Padding = __llvm_profile_get_num_padding_bytes(NumVSites);
-
- uint8_t *PerSiteCountPtr = PerSiteCountsHead;
- InstrProfValueData *VDataPtr =
- (InstrProfValueData *)(PerSiteCountPtr + NumVSites + Padding);
-
- for (i = 0; i < NumVSites; ++i) {
-
- ValueProfNode *VNode = ValueCounters[i];
-
- uint8_t VDataCount = 0;
- while (VNode && ((uint8_t *)(VDataPtr + 1) <= VDataEnd)) {
- *VDataPtr = VNode->VData;
- VNode = VNode->Next;
- ++VDataPtr;
- if (++VDataCount == UCHAR_MAX)
- break;
- }
- *PerSiteCountPtr = VDataCount;
- ++PerSiteCountPtr;
+ /* Record R has taken a snapshot of the VP data at this point. Newly
+ added VP data for this function will be dropped. */
+ /* Check if there is enough space. */
+ if (BufferCapacity - RealSize < getValueProfDataSizeRT(&R)) {
+ PROF_ERR("Value profile data is dropped :%s \n",
+ "Out of buffer space. Use environment "
+ " LLVM_VALUE_PROF_BUFFER_EXTRA to allocate more");
+ I->Values = 0;
}
- I->Values = (void *)PerSiteCountsHead;
- PerSiteCountsHead = (uint8_t *)VDataPtr;
+
+ serializeValueProfDataFromRT(&R, VD);
+ deallocateValueProfileCounters(I);
+ I->Values = VD;
+ finalizeValueProfRuntimeRecord(&R);
+ RealSize += VD->TotalSize;
+ VD = (ValueProfData *)((char *)VD + VD->TotalSize);
}
- return PerSiteCountsHead - *VDataArray;
+
+ return RealSize;
}
diff --git a/compiler-rt/test/profile/instrprof-value-prof.c b/compiler-rt/test/profile/instrprof-value-prof.c
new file mode 100644
index 00000000000..b103ff8c4ee
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-value-prof.c
@@ -0,0 +1,183 @@
+// RUN: %clang_profgen -O2 -o %t %s
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t-2.profraw %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata merge -o %t-2.profdata %t-2.profraw
+// RUN: llvm-profdata merge -o %t-merged.profdata %t.profraw %t-2.profdata
+// RUN: llvm-profdata show --all-functions -ic-targets %t-2.profdata | FileCheck %s -check-prefix=NO-VALUE
+// RUN: llvm-profdata show --all-functions -ic-targets %t.profdata | FileCheck %s
+// value profile merging current do sorting based on target values -- this will destroy the order of the target
+// in the list leading to comparison problem. For now just check a small subset of output.
+// RUN: llvm-profdata show --all-functions -ic-targets %t-merged.profdata | FileCheck %s -check-prefix=MERGE
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+typedef struct __llvm_profile_data __llvm_profile_data;
+const __llvm_profile_data *__llvm_profile_begin_data(void);
+const __llvm_profile_data *__llvm_profile_end_data(void);
+void __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
+ uint32_t ValueKind,
+ uint16_t NumValueSites);
+__llvm_profile_data *
+__llvm_profile_iterate_data(const __llvm_profile_data *Data);
+void *__llvm_get_function_addr(const __llvm_profile_data *Data);
+void __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
+ uint32_t CounterIndex);
+
+#define DEF_FUNC(x) \
+ void x() {}
+#define DEF_2_FUNCS(x) DEF_FUNC(x##_1) DEF_FUNC(x##_2)
+#define DEF_4_FUNCS(x) DEF_2_FUNCS(x##_1) DEF_2_FUNCS(x##_2)
+#define DEF_8_FUNCS(x) DEF_4_FUNCS(x##_1) DEF_4_FUNCS(x##_2)
+#define DEF_16_FUNCS(x) DEF_8_FUNCS(x##_1) DEF_8_FUNCS(x##_2)
+#define DEF_32_FUNCS(x) DEF_16_FUNCS(x##_1) DEF_16_FUNCS(x##_2)
+#define DEF_64_FUNCS(x) DEF_32_FUNCS(x##_1) DEF_32_FUNCS(x##_2)
+#define DEF_128_FUNCS(x) DEF_64_FUNCS(x##_1) DEF_64_FUNCS(x##_2)
+
+#define FUNC_ADDR(x) &x,
+#define FUNC_2_ADDRS(x) FUNC_ADDR(x##_1) FUNC_ADDR(x##_2)
+#define FUNC_4_ADDRS(x) FUNC_2_ADDRS(x##_1) FUNC_2_ADDRS(x##_2)
+#define FUNC_8_ADDRS(x) FUNC_4_ADDRS(x##_1) FUNC_4_ADDRS(x##_2)
+#define FUNC_16_ADDRS(x) FUNC_8_ADDRS(x##_1) FUNC_8_ADDRS(x##_2)
+#define FUNC_32_ADDRS(x) FUNC_16_ADDRS(x##_1) FUNC_16_ADDRS(x##_2)
+#define FUNC_64_ADDRS(x) FUNC_32_ADDRS(x##_1) FUNC_32_ADDRS(x##_2)
+#define FUNC_128_ADDRS(x) FUNC_64_ADDRS(x##_1) FUNC_64_ADDRS(x##_2)
+
+DEF_8_FUNCS(callee)
+DEF_128_FUNCS(caller)
+
+void *CallerAddrs[] = {FUNC_128_ADDRS(caller)};
+
+void *CalleeAddrs[] = {FUNC_8_ADDRS(callee)};
+
+static int cmpaddr(const void *p1, const void *p2) {
+ void *addr1 = *(void **)p1;
+ void *addr2 = *(void **)p2;
+ return (intptr_t)addr2 - (intptr_t)addr1;
+}
+
+int main(int argc, const char *argv[]) {
+ unsigned S, NS = 0, V, doInstrument = 1;
+ const __llvm_profile_data *Data, *DataEnd;
+
+ if (argc < 2)
+ doInstrument = 0;
+
+ qsort(CallerAddrs, sizeof(CallerAddrs) / sizeof(void *), sizeof(void *),
+ cmpaddr);
+
+ /* We will synthesis value profile data for 128 callers functions.
+ * The number of * value sites. The number values for each value site
+ * ranges from 0 to 8. */
+
+ Data = __llvm_profile_begin_data();
+ DataEnd = __llvm_profile_end_data();
+
+ for (; Data < DataEnd; Data = __llvm_profile_iterate_data(Data)) {
+ void *func = __llvm_get_function_addr(Data);
+ if (bsearch(&func, CallerAddrs, sizeof(CallerAddrs) / sizeof(void *),
+ sizeof(void *), cmpaddr)) {
+ __llvm_profile_set_num_value_sites((__llvm_profile_data *)Data,
+ 0 /*IPVK_IndirectCallTarget */, NS);
+ if (!doInstrument) {
+ NS++;
+ continue;
+ }
+ for (S = 0; S < NS; S++) {
+ for (V = 0; V < S % 8; V++) {
+ unsigned C;
+ for (C = 0; C < V + 1; C++)
+ __llvm_profile_instrument_target((uint64_t)CalleeAddrs[V],
+ (void *)Data, S);
+ }
+ }
+ NS++;
+ }
+ }
+}
+
+// NO-VALUE: Indirect Call Site Count: 127
+// NO-VALUE-NEXT: Indirect Target Results:
+// MERGE: Indirect Call Site Count: 127
+// MERGE-NEXT: Indirect Target Results:
+// MERGE-NEXT: [ 1, callee_1_1_1, 1 ]
+// CHECK: Indirect Call Site Count: 127
+// CHECK-NEXT: Indirect Target Results:
+// CHECK-NEXT: [ 1, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 2, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 2, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 3, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 3, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 3, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 4, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 4, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 4, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 4, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 5, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 5, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 5, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 5, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 5, callee_2_1_1, 5 ]
+// CHECK-NEXT: [ 6, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 6, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 6, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 6, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 6, callee_2_1_1, 5 ]
+// CHECK-NEXT: [ 6, callee_2_1_2, 6 ]
+// CHECK-NEXT: [ 7, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 7, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 7, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 7, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 7, callee_2_1_1, 5 ]
+// CHECK-NEXT: [ 7, callee_2_1_2, 6 ]
+// CHECK-NEXT: [ 7, callee_2_2_1, 7 ]
+// CHECK-NEXT: [ 9, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 10, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 10, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 11, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 11, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 11, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 12, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 12, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 12, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 12, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 13, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 13, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 13, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 13, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 13, callee_2_1_1, 5 ]
+// CHECK-NEXT: [ 14, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 14, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 14, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 14, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 14, callee_2_1_1, 5 ]
+// CHECK-NEXT: [ 14, callee_2_1_2, 6 ]
+// CHECK-NEXT: [ 15, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 15, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 15, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 15, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 15, callee_2_1_1, 5 ]
+// CHECK-NEXT: [ 15, callee_2_1_2, 6 ]
+// CHECK-NEXT: [ 15, callee_2_2_1, 7 ]
+// CHECK-NEXT: [ 17, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 18, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 18, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 19, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 19, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 19, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 20, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 20, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 20, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 20, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 21, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 21, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 21, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 21, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 21, callee_2_1_1, 5 ]
+// CHECK-NEXT: [ 22, callee_1_1_1, 1 ]
+// CHECK-NEXT: [ 22, callee_1_1_2, 2 ]
+// CHECK-NEXT: [ 22, callee_1_2_1, 3 ]
+// CHECK-NEXT: [ 22, callee_1_2_2, 4 ]
+// CHECK-NEXT: [ 22, callee_2_1_1, 5 ]
+
OpenPOWER on IntegriCloud