From ca037701a025334e724e5c61b3b1082940c8b981 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Mar 2010 19:52:12 +0100 Subject: perf, x86: Add PEBS infrastructure This patch implements support for Intel Precise Event Based Sampling, which is an alternative counter mode in which the counter triggers a hardware assist to collect information on events. The hardware assist takes a trap like snapshot of a subset of the machine registers. This data is written to the Intel Debug-Store, which can be programmed with a data threshold at which to raise a PMI. With the PEBS hardware assist being trap like, the reported IP is always one instruction after the actual instruction that triggered the event. This implements a simple PEBS model that always takes a single PEBS event at a time. This is done so that the interaction with the rest of the system is as expected (freq adjust, period randomization, lbr, callchains, etc.). It adds an ABI element: perf_event_attr::precise, which indicates that we wish to use this (constrained, but precise) mode. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.392111285@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 223 ++++++++++++++------------------------- 1 file changed, 78 insertions(+), 145 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1d665a0b202c..0c03d5c1671f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -31,45 +31,6 @@ static u64 perf_event_mask __read_mostly; -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 4 - -/* The size of a BTS record in bytes: */ -#define BTS_RECORD_SIZE 24 - -/* The size of a per-cpu BTS buffer in bytes: */ -#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) - -/* The BTS overflow threshold in bytes from the end of the buffer: */ -#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) - - -/* - * Bits in the debugctlmsr controlling branch tracing. - */ -#define X86_DEBUGCTL_TR (1 << 6) -#define X86_DEBUGCTL_BTS (1 << 7) -#define X86_DEBUGCTL_BTINT (1 << 8) -#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) -#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) - -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; - struct event_constraint { union { unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; @@ -88,17 +49,29 @@ struct amd_nb { }; struct cpu_hw_events { + /* + * Generic x86 PMC bits + */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; - struct debug_store *ds; int n_events; int n_added; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + + /* + * Intel DebugStore bits + */ + struct debug_store *ds; + u64 pebs_enabled; + + /* + * AMD specific bits + */ struct amd_nb *amd_nb; }; @@ -112,12 +85,24 @@ struct cpu_hw_events { #define EVENT_CONSTRAINT(c, n, m) \ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) +/* + * Constraint on the Event code. + */ #define INTEL_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) +/* + * Constraint on the Event code + UMask + fixed-mask + */ #define FIXED_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) +/* + * Constraint on the Event code + UMask + */ +#define PEBS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -128,6 +113,9 @@ struct cpu_hw_events { * struct x86_pmu - generic x86 pmu */ struct x86_pmu { + /* + * Generic x86 PMC bits + */ const char *name; int version; int (*handle_irq)(struct pt_regs *); @@ -146,10 +134,6 @@ struct x86_pmu { u64 event_mask; int apic; u64 max_period; - u64 intel_ctrl; - void (*enable_bts)(u64 config); - void (*disable_bts)(void); - struct event_constraint * (*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); @@ -162,6 +146,19 @@ struct x86_pmu { void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); + + /* + * Intel Arch Perfmon v2+ + */ + u64 intel_ctrl; + + /* + * Intel DebugStore bits + */ + int bts, pebs; + int pebs_record_size; + void (*drain_pebs)(struct pt_regs *regs); + struct event_constraint *pebs_constraints; }; static struct x86_pmu x86_pmu __read_mostly; @@ -293,110 +290,14 @@ static void release_pmc_hardware(void) #endif } -static inline bool bts_available(void) -{ - return x86_pmu.enable_bts != NULL; -} - -static void init_debug_store_on_cpu(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, - (u32)((u64)(unsigned long)ds), - (u32)((u64)(unsigned long)ds >> 32)); -} - -static void fini_debug_store_on_cpu(int cpu) -{ - if (!per_cpu(cpu_hw_events, cpu).ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); -} - -static void release_bts_hardware(void) -{ - int cpu; - - if (!bts_available()) - return; - - get_online_cpus(); - - for_each_online_cpu(cpu) - fini_debug_store_on_cpu(cpu); - - for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - continue; - - per_cpu(cpu_hw_events, cpu).ds = NULL; - - kfree((void *)(unsigned long)ds->bts_buffer_base); - kfree(ds); - } - - put_online_cpus(); -} - -static int reserve_bts_hardware(void) -{ - int cpu, err = 0; - - if (!bts_available()) - return 0; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - struct debug_store *ds; - void *buffer; - - err = -ENOMEM; - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) { - kfree(buffer); - break; - } - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = - ds->bts_buffer_base + BTS_BUFFER_SIZE; - ds->bts_interrupt_threshold = - ds->bts_absolute_maximum - BTS_OVFL_TH; - - per_cpu(cpu_hw_events, cpu).ds = ds; - err = 0; - } - - if (err) - release_bts_hardware(); - else { - for_each_online_cpu(cpu) - init_debug_store_on_cpu(cpu); - } - - put_online_cpus(); - - return err; -} +static int reserve_ds_buffers(void); +static void release_ds_buffers(void); static void hw_perf_event_destroy(struct perf_event *event) { if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { release_pmc_hardware(); - release_bts_hardware(); + release_ds_buffers(); mutex_unlock(&pmc_reserve_mutex); } } @@ -459,7 +360,7 @@ static int __hw_perf_event_init(struct perf_event *event) if (!reserve_pmc_hardware()) err = -EBUSY; else - err = reserve_bts_hardware(); + err = reserve_ds_buffers(); } if (!err) atomic_inc(&active_events); @@ -537,7 +438,7 @@ static int __hw_perf_event_init(struct perf_event *event) if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && (hwc->sample_period == 1)) { /* BTS is not supported by this architecture. */ - if (!bts_available()) + if (!x86_pmu.bts) return -EOPNOTSUPP; /* BTS is currently only allowed for user-mode. */ @@ -995,6 +896,7 @@ static void x86_pmu_unthrottle(struct perf_event *event) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + u64 pebs; struct cpu_hw_events *cpuc; unsigned long flags; int cpu, idx; @@ -1012,12 +914,14 @@ void perf_event_print_debug(void) rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); pr_info("\n"); pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); pr_info("CPU#%d: status: %016llx\n", cpu, status); pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); + pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); @@ -1333,6 +1237,7 @@ undo: #include "perf_event_amd.c" #include "perf_event_p6.c" +#include "perf_event_intel_ds.c" #include "perf_event_intel.c" static int __cpuinit @@ -1464,6 +1369,32 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; +/* + * validate that we can schedule this event + */ +static int validate_event(struct perf_event *event) +{ + struct cpu_hw_events *fake_cpuc; + struct event_constraint *c; + int ret = 0; + + fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); + if (!fake_cpuc) + return -ENOMEM; + + c = x86_pmu.get_event_constraints(fake_cpuc, event); + + if (!c || !c->weight) + ret = -ENOSPC; + + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(fake_cpuc, event); + + kfree(fake_cpuc); + + return ret; +} + /* * validate a single event group * @@ -1529,6 +1460,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (event->group_leader != event) err = validate_group(event); + else + err = validate_event(event); event->pmu = tmp; } -- cgit v1.2.1 From caff2befffe899e63df5cc760b7ed01cfd902685 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Mar 2010 12:02:30 +0100 Subject: perf, x86: Implement simple LBR support Implement simple suport Intel Last-Branch-Record, it supports all hardware that implements FREEZE_LBRS_ON_PMI, but does not (yet) implement the LBR config register. The Intel LBR is a FIFO of From,To addresses describing the last few branches the hardware took. This patch does not add perf interface to the LBR, but merely provides an interface for internal use. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.544191154@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0c03d5c1671f..1badff6b6b28 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -48,6 +48,8 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; +#define MAX_LBR_ENTRIES 16 + struct cpu_hw_events { /* * Generic x86 PMC bits @@ -69,6 +71,14 @@ struct cpu_hw_events { struct debug_store *ds; u64 pebs_enabled; + /* + * Intel LBR bits + */ + int lbr_users; + void *lbr_context; + struct perf_branch_stack lbr_stack; + struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + /* * AMD specific bits */ @@ -159,6 +169,13 @@ struct x86_pmu { int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; + + /* + * Intel LBR + */ + unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ + int lbr_nr; /* hardware stack size */ + int lbr_format; /* hardware format */ }; static struct x86_pmu x86_pmu __read_mostly; @@ -1237,6 +1254,7 @@ undo: #include "perf_event_amd.c" #include "perf_event_p6.c" +#include "perf_event_intel_lbr.c" #include "perf_event_intel_ds.c" #include "perf_event_intel.c" -- cgit v1.2.1 From ef21f683a045a79b6aa86ad81e5fdfc0d5ddd250 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Mar 2010 13:12:23 +0100 Subject: perf, x86: use LBR for PEBS IP+1 fixup Use the LBR to fix up the PEBS IP+1 issue. As said, PEBS reports the next instruction, here we use the LBR to find the last branch and from that construct the actual IP. If the IP matches the LBR-TO, we use LBR-FROM, otherwise we use the LBR-TO address as the beginning of the last basic block and decode forward. Once we find a match to the current IP, we use the previous location. This patch introduces a new ABI element: PERF_RECORD_MISC_EXACT, which conveys that the reported IP (PERF_SAMPLE_IP) is the exact instruction that caused the event (barring CPU errata). The fixup can fail due to various reasons: 1) LBR contains invalid data (quite possible) 2) part of the basic block got paged out 3) the reported IP isn't part of the basic block (see 1) Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: "Zhang, Yanmin" Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.619375431@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 70 ++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1badff6b6b28..5cb4e8dcee4b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -29,6 +29,41 @@ #include #include +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; + + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; + + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); + + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} + static u64 perf_event_mask __read_mostly; struct event_constraint { @@ -1550,41 +1585,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } -/* - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context - */ -static unsigned long -copy_from_user_nmi(void *to, const void __user *from, unsigned long n) -{ - unsigned long offset, addr = (unsigned long)from; - int type = in_nmi() ? KM_NMI : KM_IRQ0; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page, type); - memcpy(to, map+offset, size); - kunmap_atomic(map, type); - put_page(page); - - len += size; - to += size; - addr += size; - - } while (len < n); - - return len; -} - static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) { unsigned long bytes; -- cgit v1.2.1 From 8db909a7e3c888b5d45aef7650d74ccebe3ce725 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Mar 2010 17:07:40 +0100 Subject: perf, x86: Clean up IA32_PERF_CAPABILITIES usage Saner PERF_CAPABILITIES support, which also exposes pebs_trap. Use that latter to make PEBS's use of LBR conditional since a fault-like pebs should already report the correct IP. ( As of this writing there is no known hardware that implements !pebs_trap ) Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.770650663@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5cb4e8dcee4b..7b5430b2efe7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -154,6 +154,17 @@ struct cpu_hw_events { #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->cmask; (e)++) +union perf_capabilities { + struct { + u64 lbr_format : 6; + u64 pebs_trap : 1; + u64 pebs_arch_reg : 1; + u64 pebs_format : 4; + u64 smm_freeze : 1; + }; + u64 capabilities; +}; + /* * struct x86_pmu - generic x86 pmu */ @@ -195,7 +206,8 @@ struct x86_pmu { /* * Intel Arch Perfmon v2+ */ - u64 intel_ctrl; + u64 intel_ctrl; + union perf_capabilities intel_cap; /* * Intel DebugStore bits @@ -210,7 +222,6 @@ struct x86_pmu { */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ int lbr_nr; /* hardware stack size */ - int lbr_format; /* hardware format */ }; static struct x86_pmu x86_pmu __read_mostly; -- cgit v1.2.1 From 3c44780b220e876b01e39d4028cd6f4205fbf5d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Mar 2010 21:49:01 +0100 Subject: perf, x86: Disable PEBS on clovertown chips This CPU has just too many handycaps to be really useful. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100305154128.890278662@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7b5430b2efe7..335ee1d38b79 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -197,6 +197,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; + void (*quirks)(void); void (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); @@ -1373,6 +1374,9 @@ void __init init_hw_perf_events(void) pr_cont("%s PMU driver.\n", x86_pmu.name); + if (x86_pmu.quirks) + x86_pmu.quirks(); + if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", x86_pmu.num_events, X86_PMC_MAX_GENERIC); -- cgit v1.2.1 From 7645a24cbd01cbf4865d1273d5ddaa8d8c2ccb3a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2010 13:51:31 +0100 Subject: perf, x86: Remove checking_{wr,rd}msr() usage We don't need checking_{wr,rd}msr() calls, since we should know what cpu we're running on and not use blindly poke at msrs. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 335ee1d38b79..e24f6374f9f5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -29,6 +29,17 @@ #include #include +#if 0 +#undef wrmsrl +#define wrmsrl(msr, val) \ +do { \ + trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ + (unsigned long)(val)); \ + native_write_msr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)); \ +} while (0) +#endif + /* * best effort, GUP based copy_from_user() that assumes IRQ or NMI context */ @@ -821,14 +832,15 @@ void hw_perf_enable(void) static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) { - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + wrmsrl(hwc->config_base + hwc->idx, hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); } static inline void x86_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); + + wrmsrl(hwc->config_base + hwc->idx, hwc->config); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -843,7 +855,7 @@ x86_perf_event_set_period(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; - int err, ret = 0, idx = hwc->idx; + int ret = 0, idx = hwc->idx; if (idx == X86_PMC_IDX_FIXED_BTS) return 0; @@ -881,8 +893,8 @@ x86_perf_event_set_period(struct perf_event *event) */ atomic64_set(&hwc->prev_count, (u64)-left); - err = checking_wrmsrl(hwc->event_base + idx, - (u64)(-left) & x86_pmu.event_mask); + wrmsrl(hwc->event_base + idx, + (u64)(-left) & x86_pmu.event_mask); perf_event_update_userpage(event); @@ -987,7 +999,7 @@ void perf_event_print_debug(void) pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } - pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); + pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_events; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); -- cgit v1.2.1 From a072738e04f0eb26370e39ec679e9a0d65e49aea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 11 Mar 2010 19:54:39 +0300 Subject: perf, x86: Implement initial P4 PMU driver The netburst PMU is way different from the "architectural perfomance monitoring" specification that current CPUs use. P4 uses a tuple of ESCR+CCCR+COUNTER MSR registers to handle perfomance monitoring events. A few implementational details: 1) We need a separate x86_pmu::hw_config helper in struct x86_pmu since register bit-fields are quite different from P6, Core and later cpu series. 2) For the same reason is a x86_pmu::schedule_events helper introduced. 3) hw_perf_event::config consists of packed ESCR+CCCR values. It's allowed since in reality both registers only use a half of their size. Of course before making a real write into a particular MSR we need to unpack the value and extend it to a proper size. 4) The tuple of packed ESCR+CCCR in hw_perf_event::config doesn't describe the memory address of ESCR MSR register so that we need to keep a mapping between these tuples used and available ESCR (various P4 events may use same ESCRs but not simultaneously), for this sake every active event has a per-cpu map of hw_perf_event::idx <--> ESCR addresses. 5) Since hw_perf_event::idx is an offset to counter/control register we need to lift X86_PMC_MAX_GENERIC up, otherwise kernel strips it down to 8 registers and event armed may never be turned off (ie the bit in active_mask is set but the loop never reaches this index to check), thanks to Peter Zijlstra Restrictions: - No cascaded counters support (do we ever need them?) - No dependent events support (so PERF_COUNT_HW_INSTRUCTIONS doesn't work for now) - There are events with same counters which can't work simultaneously (need to use intersected ones due to broken counter 1) - No PERF_COUNT_HW_CACHE_ events yet Todo: - Implement dependent events - Need proper hashing for event opcodes (no linear search, good for debugging stage but not in real loads) - Some events counted during a clock cycle -- need to set threshold for them and count every clock cycle just to get summary statistics (ie to behave the same way as other PMUs do) - Need to swicth to use event_constraints - To support RAW events we need to encode a global list of P4 events into p4_templates - Cache events need to be added Event support status matrix: Event status ----------------------------- cycles works cache-references works cache-misses works branch-misses works bus-cycles partially (does not work on 64bit cpu with HT enabled) instruction doesnt work (needs dependent event [mop tagging]) branches doesnt work Signed-off-by: Cyrill Gorcunov Signed-off-by: Lin Ming Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Robert Richter Cc: Frederic Weisbecker LKML-Reference: <20100311165439.GB5129@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 46 +++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e24f6374f9f5..e6a3f5f81c96 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -190,6 +190,8 @@ struct x86_pmu { void (*enable_all)(void); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); + int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); + int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; u64 (*event_map)(int); @@ -415,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return 0; } +static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) +{ + /* + * Generate PMC IRQs: + * (keep 'enabled' bit clear for now) + */ + hwc->config = ARCH_PERFMON_EVENTSEL_INT; + + /* + * Count user and OS events unless requested not to + */ + if (!attr->exclude_user) + hwc->config |= ARCH_PERFMON_EVENTSEL_USR; + if (!attr->exclude_kernel) + hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + + return 0; +} + /* * Setup the hardware configuration for a given attr_type */ @@ -446,23 +467,13 @@ static int __hw_perf_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; - /* - * Generate PMC IRQs: - * (keep 'enabled' bit clear for now) - */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; - hwc->idx = -1; hwc->last_cpu = -1; hwc->last_tag = ~0ULL; - /* - * Count user and OS events unless requested not to. - */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + /* Processor specifics */ + if (x86_pmu.hw_config(attr, hwc)) + return -EOPNOTSUPP; if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; @@ -517,7 +528,7 @@ static int __hw_perf_event_init(struct perf_event *event) return -EOPNOTSUPP; /* BTS is currently only allowed for user-mode. */ - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + if (!attr->exclude_kernel) return -EOPNOTSUPP; } @@ -931,7 +942,7 @@ static int x86_pmu_enable(struct perf_event *event) if (n < 0) return n; - ret = x86_schedule_events(cpuc, n, assign); + ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) return ret; /* @@ -1263,7 +1274,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, if (n0 < 0) return n0; - ret = x86_schedule_events(cpuc, n0, assign); + ret = x86_pmu.schedule_events(cpuc, n0, assign); if (ret) return ret; @@ -1313,6 +1324,7 @@ undo: #include "perf_event_amd.c" #include "perf_event_p6.c" +#include "perf_event_p4.c" #include "perf_event_intel_lbr.c" #include "perf_event_intel_ds.c" #include "perf_event_intel.c" @@ -1515,7 +1527,7 @@ static int validate_group(struct perf_event *event) fake_cpuc->n_events = n; - ret = x86_schedule_events(fake_cpuc, n, NULL); + ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); out_free: kfree(fake_cpuc); -- cgit v1.2.1 From 0b861225a5890f22445f08ca9cc7a87cff276ff7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 12 Mar 2010 00:50:16 +0300 Subject: x86, perf: Fix NULL deref on not assigned x86_pmu In case of not assigned x86_pmu and software events NULL dereference may being hit via x86_pmu::schedule_events method. Fix it by checking if x86_pmu is initialized at all. Signed-off-by: Cyrill Gorcunov Cc: Lin Ming Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Peter Zijlstra LKML-Reference: <20100311215016.GG25162@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e6a3f5f81c96..5586a02067d8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1269,6 +1269,9 @@ int hw_perf_group_sched_in(struct perf_event *leader, int assign[X86_PMC_IDX_MAX]; int n0, n1, ret; + if (!x86_pmu_initialized()) + return 0; + /* n0 = total number of events */ n0 = collect_events(cpuc, leader, true); if (n0 < 0) -- cgit v1.2.1 From 1d199b1ad606ae8b88acebd295b101c4e1cf2a57 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 16 Mar 2010 01:05:02 +0100 Subject: perf: Fix unexported generic perf_arch_fetch_caller_regs perf_arch_fetch_caller_regs() is exported for the overriden x86 version, but not for the generic weak version. As a general rule, weak functions should not have their symbol exported in the same file they are defined. So let's export it on trace_event_perf.c as it is used by trace events only. This fixes: ERROR: ".perf_arch_fetch_caller_regs" [fs/xfs/xfs.ko] undefined! ERROR: ".perf_arch_fetch_caller_regs" [arch/powerpc/platforms/cell/spufs/spufs.ko] undefined! -v2: And also only build it if trace events are enabled. -v3: Fix changelog mistake Reported-by: Stephen Rothwell Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Xiao Guangrong Cc: Paul Mackerras LKML-Reference: <1268697902-9518-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 978d297170a1..0d3466cf7f57 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1695,6 +1695,7 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } +#ifdef CONFIG_EVENT_TRACING void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) { regs->ip = ip; @@ -1706,4 +1707,4 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski regs->cs = __KERNEL_CS; local_save_flags(regs->flags); } -EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); +#endif -- cgit v1.2.1 From 984763cb90d4b5444baa0c3e43feff7926bf1834 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 16 Mar 2010 17:07:33 +0100 Subject: perf, x86: Report error code that returned from x86_pmu.hw_config() If x86_pmu.hw_config() fails a fixed error code (-EOPNOTSUPP) is returned even if a different error was reported. This patch fixes this. Signed-off-by: Robert Richter Acked-by: Cyrill Gorcunov Acked-by: Lin Ming Cc: acme@redhat.com Cc: eranian@google.com Cc: gorcunov@openvz.org Cc: peterz@infradead.org Cc: fweisbec@gmail.com LKML-Reference: <20100316160733.GR1585@erda.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0d3466cf7f57..5dacf63f913e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -472,8 +472,9 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->last_tag = ~0ULL; /* Processor specifics */ - if (x86_pmu.hw_config(attr, hwc)) - return -EOPNOTSUPP; + err = x86_pmu.hw_config(attr, hwc); + if (err) + return err; if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; -- cgit v1.2.1 From b27ea29c6267889be255f2217fa7a6106e6a8b04 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 17 Mar 2010 12:49:10 +0100 Subject: perf/core, x86: Reduce number of CONFIG_X86_LOCAL_APIC macros The function reserve_pmc_hardware() and release_pmc_hardware() were hard to read. This patch improves readability of the code by removing most of the CONFIG_X86_LOCAL_APIC macros. Signed-off-by: Robert Richter Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: <1268826553-19518-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5dacf63f913e..793e63f6c420 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -313,9 +313,10 @@ again: static atomic_t active_events; static DEFINE_MUTEX(pmc_reserve_mutex); +#ifdef CONFIG_X86_LOCAL_APIC + static bool reserve_pmc_hardware(void) { -#ifdef CONFIG_X86_LOCAL_APIC int i; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -330,11 +331,9 @@ static bool reserve_pmc_hardware(void) if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } -#endif return true; -#ifdef CONFIG_X86_LOCAL_APIC eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); @@ -349,12 +348,10 @@ perfctr_fail: enable_lapic_nmi_watchdog(); return false; -#endif } static void release_pmc_hardware(void) { -#ifdef CONFIG_X86_LOCAL_APIC int i; for (i = 0; i < x86_pmu.num_events; i++) { @@ -364,9 +361,15 @@ static void release_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); -#endif } +#else + +static bool reserve_pmc_hardware(void) { return true; } +static void release_pmc_hardware(void) {} + +#endif + static int reserve_ds_buffers(void); static void release_ds_buffers(void); -- cgit v1.2.1 From 10f1014d86fd4fe5087080d609b51183396c5e4c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 17 Mar 2010 12:49:12 +0100 Subject: perf/core, x86: Remove cpu_hw_events.interrupts This member in the struct is not used anymore and can be removed. Signed-off-by: Robert Richter Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: <1268826553-19518-4-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 793e63f6c420..104292a58c2b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -102,7 +102,6 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long interrupts; int enabled; int n_events; -- cgit v1.2.1 From d6dc0b4ead6e8720096ecfa3d9e899b47ddbc8ed Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 17 Mar 2010 12:49:13 +0100 Subject: perf/core, x86: Remove duplicate perf_event_mask variable The same information is stored also in x86_pmu.intel_ctrl. This patch removes perf_event_mask and instead uses x86_pmu.intel_ctrl directly. Signed-off-by: Robert Richter Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: <1268826553-19518-5-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 104292a58c2b..c97d5b52d12a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -75,8 +75,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } -static u64 perf_event_mask __read_mostly; - struct event_constraint { union { unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; @@ -1406,7 +1404,7 @@ void __init init_hw_perf_events(void) x86_pmu.num_events, X86_PMC_MAX_GENERIC); x86_pmu.num_events = X86_PMC_MAX_GENERIC; } - perf_event_mask = (1 << x86_pmu.num_events) - 1; + x86_pmu.intel_ctrl = (1 << x86_pmu.num_events) - 1; perf_max_events = x86_pmu.num_events; if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { @@ -1415,9 +1413,8 @@ void __init init_hw_perf_events(void) x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; } - perf_event_mask |= + x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; - x86_pmu.intel_ctrl = perf_event_mask; perf_events_lapic_init(); register_die_notifier(&perf_event_nmi_notifier); @@ -1442,7 +1439,7 @@ void __init init_hw_perf_events(void) pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); pr_info("... max period: %016Lx\n", x86_pmu.max_period); pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); - pr_info("... event mask: %016Lx\n", perf_event_mask); + pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); perf_cpu_notifier(x86_pmu_notifier); } -- cgit v1.2.1 From 7335f75e9ca166044e38a96abad422d8e6e364b5 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 17 Mar 2010 13:37:01 +0300 Subject: x86, perf: Use apic_write unconditionally Since apic_write() maps to a plain noop in the !CONFIG_X86_LOCAL_APIC case we're safe to remove this conditional compilation and clean up the code a bit. Signed-off-by: Cyrill Gorcunov Cc: fweisbec@gmail.com Cc: acme@redhat.com Cc: eranian@google.com Cc: peterz@infradead.org LKML-Reference: <20100317104356.232371479@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c97d5b52d12a..14eca80918dc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1136,7 +1136,6 @@ void set_perf_event_pending(void) void perf_events_lapic_init(void) { -#ifdef CONFIG_X86_LOCAL_APIC if (!x86_pmu.apic || !x86_pmu_initialized()) return; @@ -1144,7 +1143,6 @@ void perf_events_lapic_init(void) * Always use NMI for PMU */ apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif } static int __kprobes @@ -1168,9 +1166,7 @@ perf_event_nmi_handler(struct notifier_block *self, regs = args->regs; -#ifdef CONFIG_X86_LOCAL_APIC apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif /* * Can't rely on the handled return value to say it was our NMI, two * events could trigger 'simultaneously' raising two back-to-back NMIs. -- cgit v1.2.1 From 4b24a88b35e15e04bd8f2c5dda65b5dc8ebca05f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 17 Mar 2010 23:21:01 +0200 Subject: perf_events: Fix resource leak in x86 __hw_perf_event_init() If reserve_pmc_hardware() succeeds but reserve_ds_buffers() fails, then we need to release_pmc_hardware. It won't be done by the destroy() callback because we return before setting it in case of error. Signed-off-by: Stephane Eranian Cc: Cc: peterz@infradead.org Cc: paulus@samba.org Cc: davem@davemloft.net Cc: fweisbec@gmail.com Cc: robert.richter@amd.com Cc: perfmon2-devel@lists.sf.net LKML-Reference: <4ba1568b.15185e0a.182a.7802@mx.google.com> Signed-off-by: Ingo Molnar -- arch/x86/kernel/cpu/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) --- arch/x86/kernel/cpu/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 14eca80918dc..f571f514de2a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -455,8 +455,11 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&active_events) == 0) { if (!reserve_pmc_hardware()) err = -EBUSY; - else + else { err = reserve_ds_buffers(); + if (err) + release_pmc_hardware(); + } } if (!err) atomic_inc(&active_events); -- cgit v1.2.1 From 11164cd4f6dab326a88bdf27f2f8f7c11977e91a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Mar 2010 14:08:44 +0100 Subject: perf, x86: Add Nehelem PMU programming errata workaround Implement the workaround for Intel Errata AAK100 and AAP53. Also, remove the Core-i7 name for Nehalem events since there are also Westmere based i7 chips. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <1269608924.12097.147.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f571f514de2a..6f66d4a845ff 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -184,7 +184,7 @@ struct x86_pmu { int version; int (*handle_irq)(struct pt_regs *); void (*disable_all)(void); - void (*enable_all)(void); + void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); @@ -576,7 +576,7 @@ void hw_perf_disable(void) x86_pmu.disable_all(); } -static void x86_pmu_enable_all(void) +static void x86_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; @@ -784,7 +784,7 @@ void hw_perf_enable(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; - int i; + int i, added = cpuc->n_added; if (!x86_pmu_initialized()) return; @@ -836,7 +836,7 @@ void hw_perf_enable(void) cpuc->enabled = 1; barrier(); - x86_pmu.enable_all(); + x86_pmu.enable_all(added); } static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) -- cgit v1.2.1 From 948b1bb89a44561560531394c18da4a99215f772 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 29 Mar 2010 18:36:50 +0200 Subject: perf, x86: Undo some some *_counter* -> *_event* renames The big rename: cdd6c48 perf: Do the big rename: Performance Counters -> Performance Events accidentally renamed some members of stucts that were named after registers in the spec. To avoid confusion this patch reverts some changes. The related specs are MSR descriptions in AMD's BKDGs and the ARCHITECTURAL PERFORMANCE MONITORING section in the Intel 64 and IA-32 Architectures Software Developer's Manuals. This patch does: $ sed -i -e 's:num_events:num_counters:g' \ arch/x86/include/asm/perf_event.h \ arch/x86/kernel/cpu/perf_event_amd.c \ arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_intel.c \ arch/x86/kernel/cpu/perf_event_p6.c \ arch/x86/kernel/cpu/perf_event_p4.c \ arch/x86/oprofile/op_model_ppro.c $ sed -i -e 's:event_bits:cntval_bits:g' -e 's:event_mask:cntval_mask:g' \ arch/x86/kernel/cpu/perf_event_amd.c \ arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_intel.c \ arch/x86/kernel/cpu/perf_event_p6.c \ arch/x86/kernel/cpu/perf_event_p4.c Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1269880612-25800-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 74 ++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b53435661813..9daaa1ef504c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -195,10 +195,10 @@ struct x86_pmu { u64 (*event_map)(int); u64 (*raw_event)(u64); int max_events; - int num_events; - int num_events_fixed; - int event_bits; - u64 event_mask; + int num_counters; + int num_counters_fixed; + int cntval_bits; + u64 cntval_mask; int apic; u64 max_period; struct event_constraint * @@ -268,7 +268,7 @@ static u64 x86_perf_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - int shift = 64 - x86_pmu.event_bits; + int shift = 64 - x86_pmu.cntval_bits; u64 prev_raw_count, new_raw_count; int idx = hwc->idx; s64 delta; @@ -320,12 +320,12 @@ static bool reserve_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) disable_lapic_nmi_watchdog(); - for (i = 0; i < x86_pmu.num_events; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; } - for (i = 0; i < x86_pmu.num_events; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } @@ -336,7 +336,7 @@ eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); - i = x86_pmu.num_events; + i = x86_pmu.num_counters; perfctr_fail: for (i--; i >= 0; i--) @@ -352,7 +352,7 @@ static void release_pmc_hardware(void) { int i; - for (i = 0; i < x86_pmu.num_events; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } @@ -547,7 +547,7 @@ static void x86_pmu_disable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; if (!test_bit(idx, cpuc->active_mask)) @@ -582,7 +582,7 @@ static void x86_pmu_enable_all(int added) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { struct perf_event *event = cpuc->events[idx]; u64 val; @@ -657,14 +657,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * assign events to counters starting with most * constrained events. */ - wmax = x86_pmu.num_events; + wmax = x86_pmu.num_counters; /* * when fixed event counters are present, * wmax is incremented by 1 to account * for one more choice */ - if (x86_pmu.num_events_fixed) + if (x86_pmu.num_counters_fixed) wmax++; for (w = 1, num = n; num && w <= wmax; w++) { @@ -714,7 +714,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, struct perf_event *event; int n, max_count; - max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; + max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; /* current number of events already accepted */ n = cpuc->n_events; @@ -904,7 +904,7 @@ x86_perf_event_set_period(struct perf_event *event) atomic64_set(&hwc->prev_count, (u64)-left); wrmsrl(hwc->event_base + idx, - (u64)(-left) & x86_pmu.event_mask); + (u64)(-left) & x86_pmu.cntval_mask); perf_event_update_userpage(event); @@ -987,7 +987,7 @@ void perf_event_print_debug(void) unsigned long flags; int cpu, idx; - if (!x86_pmu.num_events) + if (!x86_pmu.num_counters) return; local_irq_save(flags); @@ -1011,7 +1011,7 @@ void perf_event_print_debug(void) } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); rdmsrl(x86_pmu.perfctr + idx, pmc_count); @@ -1024,7 +1024,7 @@ void perf_event_print_debug(void) pr_info("CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } - for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", @@ -1089,7 +1089,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -1097,7 +1097,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) hwc = &event->hw; val = x86_perf_event_update(event); - if (val & (1ULL << (x86_pmu.event_bits - 1))) + if (val & (1ULL << (x86_pmu.cntval_bits - 1))) continue; /* @@ -1401,46 +1401,46 @@ void __init init_hw_perf_events(void) if (x86_pmu.quirks) x86_pmu.quirks(); - if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { + if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", - x86_pmu.num_events, X86_PMC_MAX_GENERIC); - x86_pmu.num_events = X86_PMC_MAX_GENERIC; + x86_pmu.num_counters, X86_PMC_MAX_GENERIC); + x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } - x86_pmu.intel_ctrl = (1 << x86_pmu.num_events) - 1; - perf_max_events = x86_pmu.num_events; + x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + perf_max_events = x86_pmu.num_counters; - if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { + if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", - x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); - x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; + x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; } x86_pmu.intel_ctrl |= - ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; + ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; perf_events_lapic_init(); register_die_notifier(&perf_event_nmi_notifier); unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, - 0, x86_pmu.num_events); + __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, + 0, x86_pmu.num_counters); if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { if (c->cmask != INTEL_ARCH_FIXED_MASK) continue; - c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; - c->weight += x86_pmu.num_events; + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; + c->weight += x86_pmu.num_counters; } } pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.event_bits); - pr_info("... generic registers: %d\n", x86_pmu.num_events); - pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); + pr_info("... bit width: %d\n", x86_pmu.cntval_bits); + pr_info("... generic registers: %d\n", x86_pmu.num_counters); + pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); + pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); perf_cpu_notifier(x86_pmu_notifier); -- cgit v1.2.1 From a098f4484bc7dae23f5b62360954007b99b64600 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 30 Mar 2010 11:28:21 +0200 Subject: perf, x86: implement ARCH_PERFMON_EVENTSEL bit masks ARCH_PERFMON_EVENTSEL bit masks are often used in the kernel. This patch adds macros for the bit masks and removes local defines. The function intel_pmu_raw_event() becomes x86_pmu_raw_event() which is generic for x86 models and same also for p6. Duplicate code is removed. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <20100330092821.GH11907@erda.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9daaa1ef504c..1dd42c18f1cb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -143,13 +143,21 @@ struct cpu_hw_events { * Constraint on the Event code. */ #define INTEL_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) + EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) /* * Constraint on the Event code + UMask + fixed-mask + * + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. */ #define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) + EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) /* * Constraint on the Event code + UMask @@ -437,6 +445,11 @@ static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc return 0; } +static u64 x86_pmu_raw_event(u64 hw_event) +{ + return hw_event & X86_RAW_EVENT_MASK; +} + /* * Setup the hardware configuration for a given attr_type */ @@ -1427,7 +1440,7 @@ void __init init_hw_perf_events(void) if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != INTEL_ARCH_FIXED_MASK) + if (c->cmask != X86_RAW_EVENT_MASK) continue; c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; -- cgit v1.2.1 From b4cdc5c264b35c67007800dec3928e9547a9d70b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Mar 2010 17:00:06 +0200 Subject: perf, x86: Fix up the ANY flag stuff Stephane noticed that the ANY flag was in generic arch code, and Cyrill reported that it broke the P4 code. Solve this by merging x86_pmu::raw_event into x86_pmu::hw_config and provide intel_pmu and amd_pmu specific versions of this callback. The intel_pmu one deals with the ANY flag, the amd_pmu adds the few extra event bits AMD64 has. Reported-by: Stephane Eranian Reported-by: Cyrill Gorcunov Acked-by: Robert Richter Acked-by: Cyrill Gorcunov Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <1269968113.5258.442.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1dd42c18f1cb..65e9c5efb618 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -196,12 +196,11 @@ struct x86_pmu { void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); - int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); + int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; u64 (*event_map)(int); - u64 (*raw_event)(u64); int max_events; int num_counters; int num_counters_fixed; @@ -426,28 +425,26 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return 0; } -static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) +static int x86_pmu_hw_config(struct perf_event *event) { /* * Generate PMC IRQs: * (keep 'enabled' bit clear for now) */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; + event->hw.config = ARCH_PERFMON_EVENTSEL_INT; /* * Count user and OS events unless requested not to */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + if (!event->attr.exclude_user) + event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; + if (!event->attr.exclude_kernel) + event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; - return 0; -} + if (event->attr.type == PERF_TYPE_RAW) + event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; -static u64 x86_pmu_raw_event(u64 hw_event) -{ - return hw_event & X86_RAW_EVENT_MASK; + return 0; } /* @@ -489,7 +486,7 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->last_tag = ~0ULL; /* Processor specifics */ - err = x86_pmu.hw_config(attr, hwc); + err = x86_pmu.hw_config(event); if (err) return err; @@ -508,16 +505,8 @@ static int __hw_perf_event_init(struct perf_event *event) return -EOPNOTSUPP; } - /* - * Raw hw_event type provide the config in the hw_event structure - */ - if (attr->type == PERF_TYPE_RAW) { - hwc->config |= x86_pmu.raw_event(attr->config); - if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && - perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (attr->type == PERF_TYPE_RAW) return 0; - } if (attr->type == PERF_TYPE_HW_CACHE) return set_ext_hw_attr(hwc, attr); -- cgit v1.2.1 From 6f4dee06fbf0133917f3d76fa3fb50e18b10c1f5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Mar 2010 23:47:01 +0100 Subject: perf: Drop the frame reliablity check It is useless now that we have a pure stack frame walker, as given addr are always reliable. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 65e9c5efb618..353a174adb44 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1602,8 +1602,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (reliable) - callchain_store(entry, addr); + callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { -- cgit v1.2.1 From 39447b386c846bbf1c56f6403c5282837486200f Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Mon, 19 Apr 2010 13:32:41 +0800 Subject: perf: Enhance perf to allow for guest statistic collection from host Below patch introduces perf_guest_info_callbacks and related register/unregister functions. Add more PERF_RECORD_MISC_XXX bits meaning guest kernel and guest user space. Signed-off-by: Zhang Yanmin Signed-off-by: Avi Kivity --- arch/x86/kernel/cpu/perf_event.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 626154a9f535..2ea78abf69d9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1720,6 +1720,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) { struct perf_callchain_entry *entry; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return NULL; + } + if (in_nmi()) entry = &__get_cpu_var(pmc_nmi_entry); else @@ -1743,3 +1748,29 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski regs->cs = __KERNEL_CS; local_save_flags(regs->flags); } + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + unsigned long ip; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + ip = perf_guest_cbs->get_guest_ip(); + else + ip = instruction_pointer(regs); + return ip; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + misc |= perf_guest_cbs->is_user_mode() ? + PERF_RECORD_MISC_GUEST_USER : + PERF_RECORD_MISC_GUEST_KERNEL; + } else + misc |= user_mode(regs) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; + if (regs->flags & PERF_EFLAGS_EXACT) + misc |= PERF_RECORD_MISC_EXACT; + + return misc; +} -- cgit v1.2.1 From dcf46b9443ad48a227a61713adea001228925adf Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Tue, 20 Apr 2010 10:13:58 +0800 Subject: perf & kvm: Clean up some of the guest profiling callback API details Fix some build bug and programming style issues: - use valid C - fix up various style details Signed-off-by: Zhang Yanmin Cc: Avi Kivity Cc: Peter Zijlstra Cc: Sheng Yang Cc: Marcelo Tosatti Cc: oerg Roedel Cc: Jes Sorensen Cc: Gleb Natapov Cc: Zachary Amsden Cc: zhiteng.huang@intel.com Cc: tim.c.chen@intel.com Cc: Arnaldo Carvalho de Melo LKML-Reference: <1271729638.2078.624.camel@ymzhang.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2ea78abf69d9..7de70613e6c3 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1752,23 +1752,31 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) ip = perf_guest_cbs->get_guest_ip(); else ip = instruction_pointer(regs); + return ip; } unsigned long perf_misc_flags(struct pt_regs *regs) { int misc = 0; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - misc |= perf_guest_cbs->is_user_mode() ? - PERF_RECORD_MISC_GUEST_USER : - PERF_RECORD_MISC_GUEST_KERNEL; - } else - misc |= user_mode(regs) ? PERF_RECORD_MISC_USER : - PERF_RECORD_MISC_KERNEL; + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + if (regs->flags & PERF_EFLAGS_EXACT) misc |= PERF_RECORD_MISC_EXACT; -- cgit v1.2.1 From 4261e0e0efd9e04b6c69e0773c3cf4d6f337c416 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 13 Apr 2010 22:23:10 +0200 Subject: perf, x86: Move perfctr init code to x86_setup_perfctr() Split __hw_perf_event_init() to configure pmu events other than perfctrs. Perfctr code is moved to a separate function x86_setup_perfctr(). This and the following patches refactor the code. Split in multiple patches for better review. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1271190201-25705-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7de70613e6c3..801441a54245 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -426,6 +426,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return 0; } +static int x86_setup_perfctr(struct perf_event *event); + static int x86_pmu_hw_config(struct perf_event *event) { /* @@ -453,9 +455,6 @@ static int x86_pmu_hw_config(struct perf_event *event) */ static int __hw_perf_event_init(struct perf_event *event) { - struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - u64 config; int err; if (!x86_pmu_initialized()) @@ -482,15 +481,24 @@ static int __hw_perf_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; - hwc->idx = -1; - hwc->last_cpu = -1; - hwc->last_tag = ~0ULL; + event->hw.idx = -1; + event->hw.last_cpu = -1; + event->hw.last_tag = ~0ULL; /* Processor specifics */ err = x86_pmu.hw_config(event); if (err) return err; + return x86_setup_perfctr(event); +} + +static int x86_setup_perfctr(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + u64 config; + if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; -- cgit v1.2.1 From c1726f343b3bfc2ee037e191907c632a31903021 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 13 Apr 2010 22:23:11 +0200 Subject: perf, x86: Move x86_setup_perfctr() Move x86_setup_perfctr(), no other changes made. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1271190201-25705-3-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 120 +++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 61 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 801441a54245..3d3bceb9e830 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -426,7 +426,65 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return 0; } -static int x86_setup_perfctr(struct perf_event *event); +static int x86_setup_perfctr(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + u64 config; + + if (!hwc->sample_period) { + hwc->sample_period = x86_pmu.max_period; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } else { + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + if (!x86_pmu.apic) + return -EOPNOTSUPP; + } + + if (attr->type == PERF_TYPE_RAW) + return 0; + + if (attr->type == PERF_TYPE_HW_CACHE) + return set_ext_hw_attr(hwc, attr); + + if (attr->config >= x86_pmu.max_events) + return -EINVAL; + + /* + * The generic map: + */ + config = x86_pmu.event_map(attr->config); + + if (config == 0) + return -ENOENT; + + if (config == -1LL) + return -EINVAL; + + /* + * Branch tracing: + */ + if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && + (hwc->sample_period == 1)) { + /* BTS is not supported by this architecture. */ + if (!x86_pmu.bts) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (!attr->exclude_kernel) + return -EOPNOTSUPP; + } + + hwc->config |= config; + + return 0; +} static int x86_pmu_hw_config(struct perf_event *event) { @@ -493,66 +551,6 @@ static int __hw_perf_event_init(struct perf_event *event) return x86_setup_perfctr(event); } -static int x86_setup_perfctr(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - u64 config; - - if (!hwc->sample_period) { - hwc->sample_period = x86_pmu.max_period; - hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * events (user-space has to fall back and - * sample via a hrtimer based software event): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; - } - - if (attr->type == PERF_TYPE_RAW) - return 0; - - if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, attr); - - if (attr->config >= x86_pmu.max_events) - return -EINVAL; - - /* - * The generic map: - */ - config = x86_pmu.event_map(attr->config); - - if (config == 0) - return -ENOENT; - - if (config == -1LL) - return -EINVAL; - - /* - * Branch tracing: - */ - if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && - (hwc->sample_period == 1)) { - /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (!attr->exclude_kernel) - return -EOPNOTSUPP; - } - - hwc->config |= config; - - return 0; -} - static void x86_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -- cgit v1.2.1 From 9d0fcba67e47ff398a6fa86476d4884d472dc98a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 13 Apr 2010 22:23:12 +0200 Subject: perf, x86: Call x86_setup_perfctr() from .hw_config() The perfctr setup calls are in the corresponding .hw_config() functions now. This makes it possible to introduce config functions for other pmu events that are not perfctr specific. Also, all of a sudden the code looks much nicer. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1271190201-25705-4-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3d3bceb9e830..c2c1e10f7b03 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -505,7 +505,7 @@ static int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; - return 0; + return x86_setup_perfctr(event); } /* @@ -543,12 +543,7 @@ static int __hw_perf_event_init(struct perf_event *event) event->hw.last_cpu = -1; event->hw.last_tag = ~0ULL; - /* Processor specifics */ - err = x86_pmu.hw_config(event); - if (err) - return err; - - return x86_setup_perfctr(event); + return x86_pmu.hw_config(event); } static void x86_pmu_disable_all(void) -- cgit v1.2.1 From 31fa58af57c41d2912debf62d47d5811062411f1 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 13 Apr 2010 22:23:14 +0200 Subject: perf, x86: Pass enable bit mask to __x86_pmu_enable_event() To reuse this function for events with different enable bit masks, this mask is part of the function's argument list now. The function will be used later to control ibs events too. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1271190201-25705-6-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c2c1e10f7b03..4e218d7ac852 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -844,10 +844,10 @@ void hw_perf_enable(void) x86_pmu.enable_all(added); } -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, + u64 enable_mask) { - wrmsrl(hwc->config_base + hwc->idx, - hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); + wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); } static inline void x86_pmu_disable_event(struct perf_event *event) @@ -919,7 +919,8 @@ static void x86_pmu_enable_event(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (cpuc->enabled) - __x86_pmu_enable_event(&event->hw); + __x86_pmu_enable_event(&event->hw, + ARCH_PERFMON_EVENTSEL_ENABLE); } /* -- cgit v1.2.1 From a1f2b70a942b8d858a0ab02567da3999b60a99b2 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 13 Apr 2010 22:23:15 +0200 Subject: perf, x86: Use weight instead of cmask in for_each_event_constraint() There may exist constraints with a cmask set to zero. In this case for_each_event_constraint() will not work properly. Now weight is used instead of the cmask for loop exit detection. Weight is always a value other than zero since the default contains the HWEIGHT from the counter mask and in other cases a value of zero does not fit too. This is in preparation of ibs event constraints that wont have a cmask. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1271190201-25705-7-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4e218d7ac852..4a3f1f2b9b91 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -170,7 +170,7 @@ struct cpu_hw_events { EVENT_CONSTRAINT(0, 0, 0) #define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->cmask; (e)++) + for ((e) = (c); (e)->weight; (e)++) union perf_capabilities { struct { -- cgit v1.2.1 From ab608344bcbde4f55ec4cd911b686b0ce3eae076 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Apr 2010 23:03:20 +0200 Subject: perf, x86: Improve the PEBS ABI Rename perf_event_attr::precise to perf_event_attr::precise_ip and widen it to 2 bits. This new field describes the required precision of the PERF_SAMPLE_IP field: 0 - SAMPLE_IP can have arbitrary skid 1 - SAMPLE_IP must have constant skid 2 - SAMPLE_IP requested to have 0 skid 3 - SAMPLE_IP must have 0 skid And modify the Intel PEBS code accordingly. The PEBS implementation now supports up to precise_ip == 2, where we perform the IP fixup. Also s/PERF_RECORD_MISC_EXACT/&_IP/ to clarify its meaning, this bit should be set for each PERF_SAMPLE_IP field known to match the actual instruction triggering the event. This new scheme allows for a PEBS mode that uses the buffer for more than a single event. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4a3f1f2b9b91..27fa9eeed024 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -488,6 +488,21 @@ static int x86_setup_perfctr(struct perf_event *event) static int x86_pmu_hw_config(struct perf_event *event) { + if (event->attr.precise_ip) { + int precise = 0; + + /* Support for constant skid */ + if (x86_pmu.pebs) + precise++; + + /* Support for IP fixup */ + if (x86_pmu.lbr_nr) + precise++; + + if (event->attr.precise_ip > precise) + return -EOPNOTSUPP; + } + /* * Generate PMC IRQs: * (keep 'enabled' bit clear for now) @@ -1780,7 +1795,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) } if (regs->flags & PERF_EFLAGS_EXACT) - misc |= PERF_RECORD_MISC_EXACT; + misc |= PERF_RECORD_MISC_EXACT_IP; return misc; } -- cgit v1.2.1 From 4d1c52b02d977d884abb21d0bbaba6b5d6bc8374 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Fri, 23 Apr 2010 13:56:12 +0800 Subject: perf, x86: implement group scheduling transactional APIs Convert to the transactional PMU API and remove the duplication of group_sched_in(). Reviewed-by: Stephane Eranian Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra Cc: David Miller Cc: Paul Mackerras LKML-Reference: <1272002172.5707.61.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 180 +++++++++++++++------------------------ 1 file changed, 67 insertions(+), 113 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 27fa9eeed024..fd4db0db3708 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -110,6 +110,8 @@ struct cpu_hw_events { u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + unsigned int group_flag; + /* * Intel DebugStore bits */ @@ -961,6 +963,14 @@ static int x86_pmu_enable(struct perf_event *event) if (n < 0) return n; + /* + * If group events scheduling transaction was started, + * skip the schedulability test here, it will be peformed + * at commit time(->commit_txn) as a whole + */ + if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + goto out; + ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) return ret; @@ -970,6 +980,7 @@ static int x86_pmu_enable(struct perf_event *event) */ memcpy(cpuc->assign, assign, n*sizeof(int)); +out: cpuc->n_events = n; cpuc->n_added += n - n0; @@ -1227,119 +1238,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) return &unconstrained; } -static int x86_event_sched_in(struct perf_event *event, - struct perf_cpu_context *cpuctx) -{ - int ret = 0; - - event->state = PERF_EVENT_STATE_ACTIVE; - event->oncpu = smp_processor_id(); - event->tstamp_running += event->ctx->time - event->tstamp_stopped; - - if (!is_x86_event(event)) - ret = event->pmu->enable(event); - - if (!ret && !is_software_event(event)) - cpuctx->active_oncpu++; - - if (!ret && event->attr.exclusive) - cpuctx->exclusive = 1; - - return ret; -} - -static void x86_event_sched_out(struct perf_event *event, - struct perf_cpu_context *cpuctx) -{ - event->state = PERF_EVENT_STATE_INACTIVE; - event->oncpu = -1; - - if (!is_x86_event(event)) - event->pmu->disable(event); - - event->tstamp_running -= event->ctx->time - event->tstamp_stopped; - - if (!is_software_event(event)) - cpuctx->active_oncpu--; - - if (event->attr.exclusive || !cpuctx->active_oncpu) - cpuctx->exclusive = 0; -} - -/* - * Called to enable a whole group of events. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - * - * called with PMU disabled. If successful and return value 1, - * then guaranteed to call perf_enable() and hw_perf_enable() - */ -int hw_perf_group_sched_in(struct perf_event *leader, - struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct perf_event *sub; - int assign[X86_PMC_IDX_MAX]; - int n0, n1, ret; - - if (!x86_pmu_initialized()) - return 0; - - /* n0 = total number of events */ - n0 = collect_events(cpuc, leader, true); - if (n0 < 0) - return n0; - - ret = x86_pmu.schedule_events(cpuc, n0, assign); - if (ret) - return ret; - - ret = x86_event_sched_in(leader, cpuctx); - if (ret) - return ret; - - n1 = 1; - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - if (sub->state > PERF_EVENT_STATE_OFF) { - ret = x86_event_sched_in(sub, cpuctx); - if (ret) - goto undo; - ++n1; - } - } - /* - * copy new assignment, now we know it is possible - * will be used by hw_perf_enable() - */ - memcpy(cpuc->assign, assign, n0*sizeof(int)); - - cpuc->n_events = n0; - cpuc->n_added += n1; - ctx->nr_active += n1; - - /* - * 1 means successful and events are active - * This is not quite true because we defer - * actual activation until hw_perf_enable() but - * this way we* ensure caller won't try to enable - * individual events - */ - return 1; -undo: - x86_event_sched_out(leader, cpuctx); - n0 = 1; - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - if (sub->state == PERF_EVENT_STATE_ACTIVE) { - x86_event_sched_out(sub, cpuctx); - if (++n0 == n1) - break; - } - } - return ret; -} - #include "perf_event_amd.c" #include "perf_event_p6.c" #include "perf_event_p4.c" @@ -1471,6 +1369,59 @@ static inline void x86_pmu_read(struct perf_event *event) x86_perf_event_update(event); } +/* + * Start group events scheduling transaction + * Set the flag to make pmu::enable() not perform the + * schedulability test, it will be performed at commit time + */ +static void x86_pmu_start_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + cpuc->group_flag |= PERF_EVENT_TXN_STARTED; +} + +/* + * Stop group events scheduling transaction + * Clear the flag and pmu::enable() will perform the + * schedulability test. + */ +static void x86_pmu_cancel_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; +} + +/* + * Commit group events scheduling transaction + * Perform the group schedulability test as a whole + * Return 0 if success + */ +static int x86_pmu_commit_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int assign[X86_PMC_IDX_MAX]; + int n, ret; + + n = cpuc->n_events; + + if (!x86_pmu_initialized()) + return -EAGAIN; + + ret = x86_pmu.schedule_events(cpuc, n, assign); + if (ret) + return ret; + + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n*sizeof(int)); + + return 0; +} + static const struct pmu pmu = { .enable = x86_pmu_enable, .disable = x86_pmu_disable, @@ -1478,6 +1429,9 @@ static const struct pmu pmu = { .stop = x86_pmu_stop, .read = x86_pmu_read, .unthrottle = x86_pmu_unthrottle, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, }; /* -- cgit v1.2.1 From 87f44bbc246c5244c76a701f8eefba7788bce64a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 May 2010 11:02:55 +0200 Subject: perf, trace: Fix !x86 build bug Patch b7e2ecef92 (perf, trace: Optimize tracepoints by removing IRQ-disable from perf/tracepoint interaction) made the unfortunate mistake of assuming the world is x86 only, correct this. The problem was that perf_fetch_caller_regs() did local_save_flags() into regs->flags, and I re-used that to remove another local_save_flags(), forgetting !x86 doesn't have regs->flags. Do the reverse, remove the local_save_flags() from perf_fetch_caller_regs() and let the ftrace site do the local_save_flags() instead. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Cc: acme@redhat.com Cc: efault@gmx.de Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org LKML-Reference: <1274778175.5882.623.camel@twins> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fd4db0db3708..c77586061bcb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1717,7 +1717,11 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski */ regs->bp = rewind_frame_pointer(skip + 1); regs->cs = __KERNEL_CS; - local_save_flags(regs->flags); + /* + * We abuse bit 3 to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ + regs->flags = 0; } unsigned long perf_instruction_pointer(struct pt_regs *regs) -- cgit v1.2.1