summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel_ds.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2013-06-17 17:36:52 -0700
committerIngo Molnar <mingo@kernel.org>2013-06-19 14:43:35 +0200
commitf9134f36aed59ab55c0ab1a4618dd455f15aef5f (patch)
treebdccb1042167cb72e257127e1097c506c5b4468e /arch/x86/kernel/cpu/perf_event_intel_ds.c
parent135c5612c460f89657c4698fe2ea753f6f667963 (diff)
downloadtalos-op-linux-f9134f36aed59ab55c0ab1a4618dd455f15aef5f.tar.gz
talos-op-linux-f9134f36aed59ab55c0ab1a4618dd455f15aef5f.zip
perf/x86/intel: Add mem-loads/stores support for Haswell
mem-loads is basically the same as Sandy Bridge, but we use a separate string for changes later. Haswell doesn't support the full precise store mode, so we emulate it using the "DataLA" facility. This allows to do everything, but for data sources we can only detect L1 hit or not. There is no explicit enable bit anymore, so we have to tie it to a perf internal only flag. The address is supported for all memory related PEBS events with DataLA. Instead of only logging for the load and store events we allow logging it for all (it will be simply 0 if the current event does not support it) Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Andi Kleen <ak@linux.jf.intel.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Link: http://lkml.kernel.org/r/1371515812-9646-7-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel_ds.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c32
1 files changed, 25 insertions, 7 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index e83148ffe392..ed3e5533ce33 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
return val;
}
+static u64 precise_store_data_hsw(u64 status)
+{
+ union perf_mem_data_src dse;
+
+ dse.val = 0;
+ dse.mem_op = PERF_MEM_OP_STORE;
+ dse.mem_lvl = PERF_MEM_LVL_NA;
+ if (status & 1)
+ dse.mem_lvl = PERF_MEM_LVL_L1;
+ /* Nothing else supported. Sorry. */
+ return dse.val;
+}
+
static u64 load_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -566,13 +579,13 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
- INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
/* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
/* MEM_UOPS_RETIRED.STLB_MISS_STORES */
@@ -582,7 +595,7 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
/* MEM_UOPS_RETIRED.SPLIT_STORES */
INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
@@ -759,7 +772,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
return;
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
- fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
+ fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
+ PERF_X86_EVENT_PEBS_ST_HSW);
perf_sample_data_init(&data, 0, event->hw.last_period);
@@ -770,9 +784,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* if PEBS-LL or PreciseStore
*/
if (fll || fst) {
- if (sample_type & PERF_SAMPLE_ADDR)
- data.addr = pebs->dla;
-
/*
* Use latency for weight (only avail with PEBS-LL)
*/
@@ -785,6 +796,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_SRC) {
if (fll)
data.data_src.val = load_latency_data(pebs->dse);
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ data.data_src.val =
+ precise_store_data_hsw(pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
@@ -814,6 +828,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else
regs.flags &= ~PERF_EFLAGS_EXACT;
+ if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
+ x86_pmu.intel_cap.pebs_format >= 1)
+ data.addr = pebs->dla;
+
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
OpenPOWER on IntegriCloud