diff options
Diffstat (limited to 'arch/x86/events/intel')
-rw-r--r-- | arch/x86/events/intel/bts.c | 126 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 19 | ||||
-rw-r--r-- | arch/x86/events/intel/cqm.c | 58 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 51 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 19 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.c | 18 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c | 84 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 135 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 14 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 10 |
11 files changed, 264 insertions, 271 deletions
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 0a6e393a2e62..982c9e31daca 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -31,7 +31,17 @@ struct bts_ctx { struct perf_output_handle handle; struct debug_store ds_back; - int started; + int state; +}; + +/* BTS context states: */ +enum { + /* no ongoing AUX transactions */ + BTS_STATE_STOPPED = 0, + /* AUX transaction is on, BTS tracing is disabled */ + BTS_STATE_INACTIVE, + /* AUX transaction is on, BTS tracing is running */ + BTS_STATE_ACTIVE, }; static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); @@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts) static int bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +/* + * Ordering PMU callbacks wrt themselves and the PMI is done by means + * of bts::state, which: + * - is set when bts::handle::event is valid, that is, between + * perf_aux_output_begin() and perf_aux_output_end(); + * - is zero otherwise; + * - is ordered against bts::handle::event with a compiler barrier. + */ + static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); @@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event) /* * local barrier to make sure that ds configuration made it - * before we enable BTS + * before we enable BTS and bts::state goes ACTIVE */ wmb(); + /* INACTIVE/STOPPED -> ACTIVE */ + WRITE_ONCE(bts->state, BTS_STATE_ACTIVE); + intel_pmu_enable_bts(config); } @@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags) __bts_event_start(event); - /* PMI handler: this counter is running and likely generating PMIs */ - ACCESS_ONCE(bts->started) = 1; - return; fail_end_stop: @@ -263,30 +282,34 @@ fail_stop: event->hw.state = PERF_HES_STOPPED; } -static void __bts_event_stop(struct perf_event *event) +static void __bts_event_stop(struct perf_event *event, int state) { + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ + WRITE_ONCE(bts->state, state); + /* * No extra synchronization is mandated by the documentation to have * BTS data stores globally visible. */ intel_pmu_disable_bts(); - - if (event->hw.state & PERF_HES_STOPPED) - return; - - ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED; } static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *buf = NULL; + int state = READ_ONCE(bts->state); - /* PMI handler: don't restart this counter */ - ACCESS_ONCE(bts->started) = 0; + if (state == BTS_STATE_ACTIVE) + __bts_event_stop(event, BTS_STATE_STOPPED); - __bts_event_stop(event); + if (state != BTS_STATE_STOPPED) + buf = perf_get_aux(&bts->handle); + + event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); @@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); } @@ -310,8 +334,20 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + int state = READ_ONCE(bts->state); + + /* + * Here we transition from INACTIVE to ACTIVE; + * if we instead are STOPPED from the interrupt handler, + * stay that way. Can't be ACTIVE here though. + */ + if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE)) + return; + + if (state == BTS_STATE_STOPPED) + return; - if (bts->handle.event && bts->started) + if (bts->handle.event) __bts_event_start(bts->handle.event); } @@ -319,8 +355,15 @@ void intel_bts_disable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + /* + * Here we transition from ACTIVE to INACTIVE; + * do nothing for STOPPED or INACTIVE. + */ + if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE) + return; + if (bts->handle.event) - __bts_event_stop(bts->handle.event); + __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE); } static int @@ -335,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) return 0; head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); - if (WARN_ON_ONCE(head != local_read(&buf->head))) - return -EINVAL; phys = &buf->buf[buf->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; @@ -403,22 +444,37 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { + struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err; + int err = -ENOSPC, handled = 0; - if (!event || !bts->started) - return 0; + /* + * The only surefire way of knowing if this NMI is ours is by checking + * the write ptr against the PMI threshold. + */ + if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) + handled = 1; + + /* + * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, + * so we can only be INACTIVE or STOPPED + */ + if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) + return handled; buf = perf_get_aux(&bts->handle); + if (!buf) + return handled; + /* * Skip snapshot counters: they don't use the interrupt, but * there's no other way of telling, because the pointer will * keep moving */ - if (!buf || buf->snapshot) + if (buf->snapshot) return 0; old_head = local_read(&buf->head); @@ -426,18 +482,27 @@ int intel_bts_interrupt(void) /* no new data */ if (old_head == local_read(&buf->head)) - return 0; + return handled; perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) - return 1; + if (buf) + err = bts_buffer_reset(buf, &bts->handle); + + if (err) { + WRITE_ONCE(bts->state, BTS_STATE_STOPPED); - err = bts_buffer_reset(buf, &bts->handle); - if (err) - perf_aux_output_end(&bts->handle, 0, false); + if (buf) { + /* + * BTS_STATE_STOPPED should be visible before + * cleared handle::event + */ + barrier(); + perf_aux_output_end(&bts->handle, 0, false); + } + } return 1; } @@ -519,7 +584,8 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; - bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; bts_pmu.event_init = bts_event_init; bts_pmu.add = bts_event_add; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0974ba11e954..4c9a79b9cd69 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1730,9 +1730,11 @@ static __initconst const u64 knl_hw_cache_extra_regs * disabled state if called consecutively. * * During consecutive calls, the same disable value will be written to related - * registers, so the PMU state remains unchanged. hw.state in - * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive - * calls. + * registers, so the PMU state remains unchanged. + * + * intel_bts events don't coexist with intel PMU's BTS events because of + * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them + * disabled around intel PMU's event batching etc, only inside the PMI handler. */ static void __intel_pmu_disable_all(void) { @@ -1742,8 +1744,6 @@ static void __intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - else - intel_bts_disable_local(); intel_pmu_pebs_disable_all(); } @@ -1771,8 +1771,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) return; intel_pmu_enable_bts(event->hw.config); - } else - intel_bts_enable_local(); + } } static void intel_pmu_enable_all(int added) @@ -2073,6 +2072,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); + intel_bts_disable_local(); __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2172,6 +2172,7 @@ done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ if (cpuc->enabled) __intel_pmu_enable_all(0, true); + intel_bts_enable_local(); /* * Only unmask the NMI after the overflow counters @@ -3109,7 +3110,7 @@ static int intel_pmu_cpu_prepare(int cpu) cpuc->excl_thread_id = 0; } - return NOTIFY_OK; + return 0; err_constraint_list: kfree(cpuc->constraint_list); @@ -3120,7 +3121,7 @@ err_shared_regs: cpuc->shared_regs = NULL; err: - return NOTIFY_BAD; + return -ENOMEM; } static void intel_pmu_cpu_starting(int cpu) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 7b5fd811ef45..8f82b02934fa 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info); static void init_mbm_sample(u32 rmid, u32 evt_type); static void __intel_mbm_event_count(void *info); +static bool is_cqm_event(int e) +{ + return (e == QOS_L3_OCCUP_EVENT_ID); +} + static bool is_mbm_event(int e) { return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID); @@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event) (event->attr.config > QOS_MBM_LOCAL_EVENT_ID)) return -EINVAL; + if ((is_cqm_event(event->attr.config) && !cqm_enabled) || + (is_mbm_event(event->attr.config) && !mbm_enabled)) + return -EINVAL; + /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || @@ -1577,7 +1586,7 @@ static inline void cqm_pick_event_reader(int cpu) cpumask_set_cpu(cpu, &cqm_cpumask); } -static void intel_cqm_cpu_starting(unsigned int cpu) +static int intel_cqm_cpu_starting(unsigned int cpu) { struct intel_pqr_state *state = &per_cpu(pqr_state, cpu); struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -1588,39 +1597,26 @@ static void intel_cqm_cpu_starting(unsigned int cpu) WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); + + cqm_pick_event_reader(cpu); + return 0; } -static void intel_cqm_cpu_exit(unsigned int cpu) +static int intel_cqm_cpu_exit(unsigned int cpu) { int target; /* Is @cpu the current cqm reader for this package ? */ if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) - return; + return 0; /* Find another online reader in this package */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); if (target < nr_cpu_ids) cpumask_set_cpu(target, &cqm_cpumask); -} -static int intel_cqm_cpu_notifier(struct notifier_block *nb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_PREPARE: - intel_cqm_cpu_exit(cpu); - break; - case CPU_STARTING: - intel_cqm_cpu_starting(cpu); - cqm_pick_event_reader(cpu); - break; - } - - return NOTIFY_OK; + return 0; } static const struct x86_cpu_id intel_cqm_match[] = { @@ -1682,7 +1678,7 @@ out: static int __init intel_cqm_init(void) { char *str = NULL, scale[20]; - int i, cpu, ret; + int cpu, ret; if (x86_match_cpu(intel_cqm_match)) cqm_enabled = true; @@ -1705,8 +1701,7 @@ static int __init intel_cqm_init(void) * * Also, check that the scales match on all cpus. */ - cpu_notifier_register_begin(); - + get_online_cpus(); for_each_online_cpu(cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -1743,11 +1738,6 @@ static int __init intel_cqm_init(void) if (ret) goto out; - for_each_online_cpu(i) { - intel_cqm_cpu_starting(i); - cqm_pick_event_reader(i); - } - if (mbm_enabled) ret = intel_mbm_init(); if (ret && !cqm_enabled) @@ -1772,12 +1762,18 @@ static int __init intel_cqm_init(void) pr_info("Intel MBM enabled\n"); /* - * Register the hot cpu notifier once we are sure cqm + * Setup the hot cpu notifier once we are sure cqm * is enabled to avoid notifier leak. */ - __perf_cpu_notifier(intel_cqm_cpu_notifier); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING, + "AP_PERF_X86_CQM_STARTING", + intel_cqm_cpu_starting, NULL); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "AP_PERF_X86_CQM_ONLINE", + NULL, intel_cqm_cpu_exit); + out: - cpu_notifier_register_done(); + put_online_cpus(); + if (ret) { kfree(str); cqm_cleanup(); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4c7638b91fa5..3ca87b5a8677 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -366,7 +366,7 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) * Check if exiting cpu is the designated reader. If so migrate the * events when there is a valid target available */ -static void cstate_cpu_exit(int cpu) +static int cstate_cpu_exit(unsigned int cpu) { unsigned int target; @@ -391,9 +391,10 @@ static void cstate_cpu_exit(int cpu) perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); } } + return 0; } -static void cstate_cpu_init(int cpu) +static int cstate_cpu_init(unsigned int cpu) { unsigned int target; @@ -415,31 +416,10 @@ static void cstate_cpu_init(int cpu) topology_core_cpumask(cpu)); if (has_cstate_pkg && target >= nr_cpu_ids) cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); -} -static int cstate_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - cstate_cpu_init(cpu); - break; - case CPU_DOWN_PREPARE: - cstate_cpu_exit(cpu); - break; - default: - break; - } - return NOTIFY_OK; + return 0; } -static struct notifier_block cstate_cpu_nb = { - .notifier_call = cstate_cpu_notifier, - .priority = CPU_PRI_PERF + 1, -}; - static struct pmu cstate_core_pmu = { .attr_groups = core_attr_groups, .name = "cstate_core", @@ -600,18 +580,20 @@ static inline void cstate_cleanup(void) static int __init cstate_init(void) { - int cpu, err; + int err; - cpu_notifier_register_begin(); - for_each_online_cpu(cpu) - cstate_cpu_init(cpu); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, + "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init, + NULL); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, + "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit); if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { has_cstate_core = false; pr_info("Failed to register cstate core pmu\n"); - goto out; + return err; } } @@ -621,12 +603,10 @@ static int __init cstate_init(void) has_cstate_pkg = false; pr_info("Failed to register cstate pkg pmu\n"); cstate_cleanup(); - goto out; + return err; } } - __register_cpu_notifier(&cstate_cpu_nb); -out: - cpu_notifier_register_done(); + return err; } @@ -652,9 +632,8 @@ module_init(cstate_pmu_init); static void __exit cstate_pmu_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&cstate_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); cstate_cleanup(); - cpu_notifier_register_done(); } module_exit(cstate_pmu_exit); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ce9f3f669e6..9b983a474253 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1274,18 +1274,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct pebs_record_nhm *p = at; u64 pebs_status; - /* PEBS v3 has accurate status bits */ + pebs_status = p->status & cpuc->pebs_enabled; + pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; + + /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { - for_each_set_bit(bit, (unsigned long *)&p->status, - MAX_PEBS_EVENTS) + for_each_set_bit(bit, (unsigned long *)&pebs_status, + x86_pmu.max_pebs_events) counts[bit]++; continue; } - pebs_status = p->status & cpuc->pebs_enabled; - pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; - /* * On some CPUs the PEBS status can be zero when PEBS is * racing with clearing of GLOBAL_STATUS. @@ -1333,8 +1333,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; event = cpuc->events[bit]; - WARN_ON_ONCE(!event); - WARN_ON_ONCE(!event->attr.precise_ip); + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; /* log dropped samples number */ if (error[bit]) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 04bb5fb5a8d7..861a7d9cb60f 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1074,6 +1074,11 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } +static inline bool valid_kernel_ip(unsigned long ip) +{ + return virt_addr_valid(ip) && kernel_ip(ip); +} + static int pt_event_addr_filters_validate(struct list_head *filters) { struct perf_addr_filter *filter; @@ -1081,11 +1086,16 @@ static int pt_event_addr_filters_validate(struct list_head *filters) list_for_each_entry(filter, filters, entry) { /* PT doesn't support single address triggers */ - if (!filter->range) + if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode && !kernel_ip(filter->offset)) - return -EINVAL; + if (!filter->inode) { + if (!valid_kernel_ip(filter->offset)) + return -EINVAL; + + if (!valid_kernel_ip(filter->offset + filter->size)) + return -EINVAL; + } if (++range > pt_cap_get(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; @@ -1111,7 +1121,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event) } else { /* apply the offset */ msr_a = filter->offset + offs[range]; - msr_b = filter->size + msr_a; + msr_b = filter->size + msr_a - 1; } filters->filter[range].msr_a = msr_a; diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index d0c58b35155f..28865938aadf 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -556,14 +556,14 @@ const struct attribute_group *rapl_attr_groups[] = { NULL, }; -static void rapl_cpu_exit(int cpu) +static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; /* Check if exiting cpu is used for collecting rapl events */ if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) - return; + return 0; pmu->cpu = -1; /* Find a new cpu to collect rapl events */ @@ -575,9 +575,10 @@ static void rapl_cpu_exit(int cpu) pmu->cpu = target; perf_pmu_migrate_context(pmu->pmu, cpu, target); } + return 0; } -static void rapl_cpu_init(int cpu) +static int rapl_cpu_online(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; @@ -588,13 +589,14 @@ static void rapl_cpu_init(int cpu) */ target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); if (target < nr_cpu_ids) - return; + return 0; cpumask_set_cpu(cpu, &rapl_cpu_mask); pmu->cpu = cpu; + return 0; } -static int rapl_cpu_prepare(int cpu) +static int rapl_cpu_prepare(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); @@ -615,33 +617,6 @@ static int rapl_cpu_prepare(int cpu) return 0; } -static int rapl_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - rapl_cpu_prepare(cpu); - break; - - case CPU_DOWN_FAILED: - case CPU_ONLINE: - rapl_cpu_init(cpu); - break; - - case CPU_DOWN_PREPARE: - rapl_cpu_exit(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block rapl_cpu_nb = { - .notifier_call = rapl_cpu_notifier, - .priority = CPU_PRI_PERF + 1, -}; - static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -692,24 +667,6 @@ static void __init rapl_advertise(void) } } -static int __init rapl_prepare_cpus(void) -{ - unsigned int cpu, pkg; - int ret; - - for_each_online_cpu(cpu) { - pkg = topology_logical_package_id(cpu); - if (rapl_pmus->pmus[pkg]) - continue; - - ret = rapl_cpu_prepare(cpu); - if (ret) - return ret; - rapl_cpu_init(cpu); - } - return 0; -} - static void cleanup_rapl_pmus(void) { int i; @@ -837,35 +794,44 @@ static int __init rapl_pmu_init(void) if (ret) return ret; - cpu_notifier_register_begin(); + /* + * Install callbacks. Core will call them for each online cpu. + */ - ret = rapl_prepare_cpus(); + ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP", + rapl_cpu_prepare, NULL); if (ret) goto out; + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, + "AP_PERF_X86_RAPL_ONLINE", + rapl_cpu_online, rapl_cpu_offline); + if (ret) + goto out1; + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); if (ret) - goto out; + goto out2; - __register_cpu_notifier(&rapl_cpu_nb); - cpu_notifier_register_done(); rapl_advertise(); return 0; +out2: + cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); +out1: + cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP); out: pr_warn("Initialization failed (%d), disabled\n", ret); cleanup_rapl_pmus(); - cpu_notifier_register_done(); return ret; } module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&rapl_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP); perf_pmu_unregister(&rapl_pmus->pmu); cleanup_rapl_pmus(); - cpu_notifier_register_done(); } module_exit(intel_rapl_exit); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 59b4974c697f..463dc7a5a6c3 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,3 +1,5 @@ +#include <linux/module.h> + #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "uncore.h" @@ -1052,7 +1054,7 @@ static void uncore_pci_exit(void) } } -static void uncore_cpu_dying(int cpu) +static int uncore_cpu_dying(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; @@ -1069,16 +1071,19 @@ static void uncore_cpu_dying(int cpu) uncore_box_exit(box); } } + return 0; } -static void uncore_cpu_starting(int cpu, bool init) +static int first_init; + +static int uncore_cpu_starting(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; int i, pkg, ncpus = 1; - if (init) { + if (first_init) { /* * On init we get the number of online cpus in the package * and set refcount for all of them. @@ -1099,9 +1104,11 @@ static void uncore_cpu_starting(int cpu, bool init) uncore_box_init(box); } } + + return 0; } -static int uncore_cpu_prepare(int cpu) +static int uncore_cpu_prepare(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; @@ -1164,13 +1171,13 @@ static void uncore_change_context(struct intel_uncore_type **uncores, uncore_change_type_ctx(*uncores, old_cpu, new_cpu); } -static void uncore_event_exit_cpu(int cpu) +static int uncore_event_cpu_offline(unsigned int cpu) { int target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) - return; + return 0; /* Find a new cpu to collect uncore events */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); @@ -1183,9 +1190,10 @@ static void uncore_event_exit_cpu(int cpu) uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); + return 0; } -static void uncore_event_init_cpu(int cpu) +static int uncore_event_cpu_online(unsigned int cpu) { int target; @@ -1195,50 +1203,15 @@ static void uncore_event_init_cpu(int cpu) */ target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); if (target < nr_cpu_ids) - return; + return 0; cpumask_set_cpu(cpu, &uncore_cpu_mask); uncore_change_context(uncore_msr_uncores, -1, cpu); uncore_change_context(uncore_pci_uncores, -1, cpu); + return 0; } -static int uncore_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - return notifier_from_errno(uncore_cpu_prepare(cpu)); - - case CPU_STARTING: - uncore_cpu_starting(cpu, false); - case CPU_DOWN_FAILED: - uncore_event_init_cpu(cpu); - break; - - case CPU_UP_CANCELED: - case CPU_DYING: - uncore_cpu_dying(cpu); - break; - - case CPU_DOWN_PREPARE: - uncore_event_exit_cpu(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block uncore_cpu_nb = { - .notifier_call = uncore_cpu_notifier, - /* - * to migrate uncore events, our notifier should be executed - * before perf core's notifier. - */ - .priority = CPU_PRI_PERF + 1, -}; - static int __init type_pmu_register(struct intel_uncore_type *type) { int i, ret; @@ -1282,41 +1255,6 @@ err: return ret; } -static void __init uncore_cpu_setup(void *dummy) -{ - uncore_cpu_starting(smp_processor_id(), true); -} - -/* Lazy to avoid allocation of a few bytes for the normal case */ -static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC); - -static int __init uncore_cpumask_init(bool msr) -{ - unsigned int cpu; - - for_each_online_cpu(cpu) { - unsigned int pkg = topology_logical_package_id(cpu); - int ret; - - if (test_and_set_bit(pkg, packages)) - continue; - /* - * The first online cpu of each package allocates and takes - * the refcounts for all other online cpus in that package. - * If msrs are not enabled no allocation is required. - */ - if (msr) { - ret = uncore_cpu_prepare(cpu); - if (ret) - return ret; - } - uncore_event_init_cpu(cpu); - smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1); - } - __register_cpu_notifier(&uncore_cpu_nb); - return 0; -} - #define X86_UNCORE_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } @@ -1440,11 +1378,33 @@ static int __init intel_uncore_init(void) if (cret && pret) return -ENODEV; - cpu_notifier_register_begin(); - ret = uncore_cpumask_init(!cret); - if (ret) - goto err; - cpu_notifier_register_done(); + /* + * Install callbacks. Core will call them for each online cpu. + * + * The first online cpu of each package allocates and takes + * the refcounts for all other online cpus in that package. + * If msrs are not enabled no allocation is required and + * uncore_cpu_prepare() is not called for each online cpu. + */ + if (!cret) { + ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP, + "PERF_X86_UNCORE_PREP", + uncore_cpu_prepare, NULL); + if (ret) + goto err; + } else { + cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP, + "PERF_X86_UNCORE_PREP", + uncore_cpu_prepare, NULL); + } + first_init = 1; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, + "AP_PERF_X86_UNCORE_STARTING", + uncore_cpu_starting, uncore_cpu_dying); + first_init = 0; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, + "AP_PERF_X86_UNCORE_ONLINE", + uncore_event_cpu_online, uncore_event_cpu_offline); return 0; err: @@ -1452,17 +1412,16 @@ err: on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); - cpu_notifier_register_done(); return ret; } module_init(intel_uncore_init); static void __exit intel_uncore_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&uncore_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING); + cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); - cpu_notifier_register_done(); } module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index d6063e438158..78b9c23e2d8d 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -1,4 +1,3 @@ -#include <linux/module.h> #include <linux/slab.h> #include <linux/pci.h> #include <asm/apicdef.h> diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 97a69dbba649..9d35ec0cb8fc 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static void snb_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); +} + static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) @@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = { static struct intel_uncore_ops snb_uncore_msr_ops = { .init_box = snb_uncore_msr_init_box, + .enable_box = snb_uncore_msr_enable_box, .exit_box = snb_uncore_msr_exit_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, @@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box) } } +static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); +} + static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) @@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) static struct intel_uncore_ops skl_uncore_msr_ops = { .init_box = skl_uncore_msr_init_box, + .enable_box = skl_uncore_msr_enable_box, .exit_box = skl_uncore_msr_exit_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 824e54086e07..8aee83bcf71f 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void) static struct intel_uncore_type hswep_uncore_ha = { .name = "ha", - .num_counters = 5, + .num_counters = 4, .num_boxes = 2, .perf_ctr_bits = 48, SNBEP_UNCORE_PCI_COMMON_INIT(), @@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = { static struct intel_uncore_type hswep_uncore_imc = { .name = "imc", - .num_counters = 5, + .num_counters = 4, .num_boxes = 8, .perf_ctr_bits = 48, .fixed_ctr_bits = 48, @@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = { static struct intel_uncore_type hswep_uncore_qpi = { .name = "qpi", - .num_counters = 5, + .num_counters = 4, .num_boxes = 3, .perf_ctr_bits = 48, .perf_ctr = SNBEP_PCI_PMON_CTR0, @@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = { static struct intel_uncore_type hswep_uncore_r3qpi = { .name = "r3qpi", - .num_counters = 4, + .num_counters = 3, .num_boxes = 3, .perf_ctr_bits = 44, .constraints = hswep_uncore_r3qpi_constraints, @@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = { static struct intel_uncore_type bdx_uncore_imc = { .name = "imc", - .num_counters = 5, + .num_counters = 4, .num_boxes = 8, .perf_ctr_bits = 48, .fixed_ctr_bits = 48, |