diff options
Diffstat (limited to 'arch/x86/events')
-rw-r--r-- | arch/x86/events/amd/core.c | 8 | ||||
-rw-r--r-- | arch/x86/events/core.c | 12 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 15 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 31 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 35 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 4 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 9 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 44 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 2 |
10 files changed, 107 insertions, 54 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index f5f4b3fbbbc2..afb222b63cae 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void) pr_cont("Fam15h "); x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; break; - + case 0x17: + pr_cont("Fam17h "); + /* + * In family 17h, there are no event constraints in the PMC hardware. + * We fallback to using default amd_get_event_constraints. + */ + break; default: pr_err("core perfctr but no constraints; unknown hardware!\n"); return -ENODEV; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d31735f37ed7..6e395c996900 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -69,7 +69,7 @@ u64 x86_perf_event_update(struct perf_event *event) int shift = 64 - x86_pmu.cntval_bits; u64 prev_raw_count, new_raw_count; int idx = hwc->idx; - s64 delta; + u64 delta; if (idx == INTEL_PMC_IDX_FIXED_BTS) return 0; @@ -2352,7 +2352,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent frame.next_frame = 0; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, 8)) + if (!valid_user_frame(fp, sizeof(frame))) break; bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4); @@ -2362,9 +2362,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent if (bytes != 0) break; - if (!valid_user_frame(fp, sizeof(frame))) - break; - perf_callchain_store(entry, cs_base + frame.return_address); fp = compat_ptr(ss_base + frame.next_frame); } @@ -2413,7 +2410,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs frame.next_frame = NULL; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2)) + if (!valid_user_frame(fp, sizeof(frame))) break; bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); @@ -2423,9 +2420,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (bytes != 0) break; - if (!valid_user_frame(fp, sizeof(frame))) - break; - perf_callchain_store(entry, frame.return_address); fp = (void __user *)frame.next_frame; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a3a9eb84b5cf..cb8522290e6a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3607,10 +3607,14 @@ __init int intel_pmu_init(void) /* * Quirk: v2 perfmon does not report fixed-purpose events, so - * assume at least 3 events: + * assume at least 3 events, when not running in a hypervisor: */ - if (version > 1) - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); + if (version > 1) { + int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); + + x86_pmu.num_counters_fixed = + max((int)edx.split.num_counters_fixed, assume); + } if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; @@ -3898,6 +3902,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -3912,7 +3917,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - pr_cont("Knights Landing events, "); + pr_cont("Knights Landing/Mill events, "); break; case INTEL_FAM6_SKYLAKE_MOBILE: @@ -4029,7 +4034,7 @@ __init int intel_pmu_init(void) /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { - x86_pmu.max_period = x86_pmu.cntval_mask; + x86_pmu.max_period = x86_pmu.cntval_mask >> 1; x86_pmu.perfctr = MSR_IA32_PMC0; pr_cont("full-width counters, "); } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 3ca87b5a8677..da51e5a3e2ff 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -48,7 +48,8 @@ * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -56,15 +57,16 @@ * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL + * Available model: SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -118,6 +120,7 @@ struct cstate_model { /* Quirk flags */ #define SLM_PKG_C6_USE_C7_MSR (1UL << 0) +#define KNL_CORE_C6_MSR (1UL << 1) struct perf_cstate_msr { u64 msr; @@ -488,6 +491,18 @@ static const struct cstate_model slm_cstates __initconst = { .quirks = SLM_PKG_C6_USE_C7_MSR, }; + +static const struct cstate_model knl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = KNL_CORE_C6_MSR, +}; + + + #define X86_CSTATES_MODEL(model, states) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } @@ -523,6 +538,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); @@ -558,6 +576,11 @@ static int __init cstate_probe(const struct cstate_model *cm) if (cm->quirks & SLM_PKG_C6_USE_C7_MSR) pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + /* KNL has different MSR for CORE C6 */ + if (cm->quirks & KNL_CORE_C6_MSR) + pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; + + has_cstate_core = cstate_probe_msr(cm->core_events, PERF_CSTATE_CORE_EVENT_MAX, core_msr, core_events_attrs); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 0319311dbdbb..be202390bbd3 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event, } /* - * We use the interrupt regs as a base because the PEBS record - * does not contain a full regs set, specifically it seems to - * lack segment descriptors, which get used by things like - * user_mode(). + * We use the interrupt regs as a base because the PEBS record does not + * contain a full regs set, specifically it seems to lack segment + * descriptors, which get used by things like user_mode(). * - * In the simple case fix up only the IP and BP,SP regs, for - * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. - * A possible PERF_SAMPLE_REGS will have to transfer all regs. + * In the simple case fix up only the IP for PERF_SAMPLE_IP. + * + * We must however always use BP,SP from iregs for the unwinder to stay + * sane; the record BP,SP can point into thin air when the record is + * from a previous PMI context or an (I)RET happend between the record + * and PMI. */ *regs = *iregs; regs->flags = pebs->flags; set_linear_ip(regs, pebs->ip); - regs->bp = pebs->bp; - regs->sp = pebs->sp; if (sample_type & PERF_SAMPLE_REGS_INTR) { regs->ax = pebs->ax; @@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event, regs->dx = pebs->dx; regs->si = pebs->si; regs->di = pebs->di; - regs->bp = pebs->bp; - regs->sp = pebs->sp; - regs->flags = pebs->flags; + /* + * Per the above; only set BP,SP if we don't need callchains. + * + * XXX: does this make sense? + */ + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + regs->bp = pebs->bp; + regs->sp = pebs->sp; + } + + /* + * Preserve PERF_EFLAGS_VM from set_linear_ip(). + */ + regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM); #ifndef CONFIG_X86_32 regs->r8 = pebs->r8; regs->r9 = pebs->r9; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index fc6cf21c535e..81b321ace8e0 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -458,8 +458,8 @@ void intel_pmu_lbr_del(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - if (branch_user_callstack(cpuc->br_sel) && event->ctx && - event->ctx->task_ctx_data) { + if (branch_user_callstack(cpuc->br_sel) && + event->ctx->task_ctx_data) { task_ctx = event->ctx->task_ctx_data; task_ctx->lbr_callstack_users--; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index b0f0e835a770..0a535cea8ff3 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -763,6 +763,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d9844cc74486..dbaaf7dc8373 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, */ static int uncore_pmu_event_init(struct perf_event *event); -static bool is_uncore_event(struct perf_event *event) +static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) { - return event->pmu->event_init == uncore_pmu_event_init; + return &box->pmu->pmu == event->pmu; } static int @@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, n = box->n_events; - if (is_uncore_event(leader)) { + if (is_box_event(box, leader)) { box->event_list[n] = leader; n++; } @@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (!is_uncore_event(event) || + if (!is_box_event(box, event) || event->state <= PERF_EVENT_STATE_OFF) continue; @@ -1349,6 +1349,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 5f845eef9a4d..a3dcc12bef4a 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -8,8 +8,12 @@ #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -486,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags) snb_uncore_imc_event_start(event, 0); - box->n_events++; - return 0; } static void snb_uncore_imc_event_del(struct perf_event *event, int flags) { - struct intel_uncore_box *box = uncore_event_to_box(event); - int i; - snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); - - for (i = 0; i < box->n_events; i++) { - if (event == box->event_list[i]) { - --box->n_events; - break; - } - } } int snb_pci2phy_map_init(int devid) @@ -616,13 +608,29 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = { static const struct pci_device_id skl_uncore_pci_ids[] = { { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -666,8 +674,12 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ - IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver), /* 6th Gen Core Y */ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ + IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Dual Core */ + IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ + IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ + IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ { /* end marker */ } }; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 5874d8de1f8d..a77ee026643d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -113,7 +113,7 @@ struct debug_store { * Per register state. */ struct er_account { - raw_spinlock_t lock; /* per-core: protect structure */ + raw_spinlock_t lock; /* per-core: protect structure */ u64 config; /* extra MSR config */ u64 reg; /* extra MSR number */ atomic_t ref; /* reference count */ |