summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event.h24
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c34
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c504
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.h40
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c138
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c178
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h4
-rw-r--r--arch/x86/kernel/head_64.S6
-rw-r--r--arch/x86/kernel/i387.c14
-rw-r--r--arch/x86/kernel/kprobes/core.c14
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kernel/nmi.c36
-rw-r--r--arch/x86/kernel/process.c12
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S2
-rw-r--r--arch/x86/kernel/signal.c16
-rw-r--r--arch/x86/kernel/smpboot.c8
22 files changed, 1050 insertions, 148 deletions
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 31cb9ae992b7..a698d7165c96 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -9,6 +9,7 @@
*
*/
#include <asm/apic.h>
+#include <asm/nmi.h>
#include <linux/cpumask.h>
#include <linux/kdebug.h>
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b0684e4a73aa..47b56a7e99cb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
+endif
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
endif
+
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 35ffda5d0727..5f90b85ff22e 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -714,15 +714,15 @@ int __init mtrr_cleanup(unsigned address_bits)
if (mtrr_tom2)
x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
- nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
/*
* [0, 1M) should always be covered by var mtrr with WB
* and fixed mtrrs should take effect before var mtrr for it:
*/
- nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
+ nr_range = add_range_with_merge(range, RANGE_NUM, 0, 0,
1ULL<<(20 - PAGE_SHIFT));
- /* Sort the ranges: */
- sort_range(range, nr_range);
+ /* add from var mtrr at last */
+ nr_range = x86_get_mtrr_mem_range(range, nr_range,
+ x_remove_base, x_remove_size);
range_sums = sum_ranges(range, nr_range);
printk(KERN_INFO "total RAM covered: %ldM\n",
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c99d20..9e581c5cf6d0 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
- if (event->attr.precise_ip > 1) {
+ if (event->attr.precise_ip > 1 &&
+ x86_pmu.intel_cap.pebs_format < 2) {
u64 *br_type = &event->attr.branch_sample_type;
if (has_branch_stack(event)) {
@@ -568,7 +569,7 @@ struct sched_state {
struct perf_sched {
int max_weight;
int max_events;
- struct event_constraint **constraints;
+ struct perf_event **events;
struct sched_state state;
int saved_states;
struct sched_state saved[SCHED_STATES_MAX];
@@ -577,7 +578,7 @@ struct perf_sched {
/*
* Initialize interator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
int num, int wmin, int wmax)
{
int idx;
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
- sched->constraints = c;
+ sched->events = events;
for (idx = 0; idx < num; idx++) {
- if (c[idx]->weight == wmin)
+ if (events[idx]->hw.constraint->weight == wmin)
break;
}
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
if (sched->state.event >= sched->max_events)
return false;
- c = sched->constraints[sched->state.event];
-
+ c = sched->events[sched->state.event]->hw.constraint;
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
if (sched->state.weight > sched->max_weight)
return false;
}
- c = sched->constraints[sched->state.event];
+ c = sched->events[sched->state.event]->hw.constraint;
} while (c->weight != sched->state.weight);
sched->state.counter = 0; /* start with first counter */
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
/*
* Assign a counter for each event.
*/
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
int wmin, int wmax, int *assign)
{
struct perf_sched sched;
- perf_sched_init(&sched, constraints, n, wmin, wmax);
+ perf_sched_init(&sched, events, n, wmin, wmax);
do {
if (!perf_sched_find_counter(&sched))
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n,
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
- struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+ struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct perf_event *e;
int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ hwc = &cpuc->event_list[i]->hw;
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
- constraints[i] = c;
+ hwc->constraint = c;
+
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
*/
for (i = 0; i < n; i++) {
hwc = &cpuc->event_list[i]->hw;
- c = constraints[i];
+ c = hwc->constraint;
/* never assigned */
if (hwc->idx == -1)
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/* slow path */
if (i != n)
- num = perf_assign_events(constraints, n, wmin, wmax, assign);
+ num = perf_assign_events(cpuc->event_list, n, wmin,
+ wmax, assign);
/*
+ * Mark the event as committed, so we do not put_constraint()
+ * in case new events are added and fail scheduling.
+ */
+ if (!num && assign) {
+ for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+ }
+ }
+ /*
* scheduling failed or is just a simulation,
* free resources if necessary
*/
if (!assign || num) {
for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ /*
+ * do not put_constraint() on comitted events,
+ * because they are good to go
+ */
+ if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
+ continue;
+
if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+ x86_pmu.put_event_constraints(cpuc, e);
}
}
return num ? -EINVAL : 0;
@@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
int i;
/*
+ * event is descheduled
+ */
+ event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
+
+ /*
* If we're called during a txn, we don't need to do anything.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
@@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void)
static int __kprobes
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
+ int ret;
+ u64 start_clock;
+ u64 finish_clock;
+
if (!atomic_read(&active_events))
return NMI_DONE;
- return x86_pmu.handle_irq(regs);
+ start_clock = local_clock();
+ ret = x86_pmu.handle_irq(regs);
+ finish_clock = local_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
+ return ret;
}
struct event_constraint emptyconstraint;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ba9aadfa683b..97e557bc4c91 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -63,10 +63,12 @@ struct event_constraint {
int flags;
};
/*
- * struct event_constraint flags
+ * struct hw_perf_event.flags flags
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
+#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -227,11 +229,14 @@ struct cpu_hw_events {
* - inv
* - edge
* - cnt-mask
+ * - in_tx
+ * - in_tx_checkpointed
* The other filters are supported by fixed counters.
* The any-thread option is supported starting with v3.
*/
+#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
#define FIXED_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
+ EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
/*
* Constraint on the Event code + UMask
@@ -247,6 +252,11 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+/* DataLA version of store sampling without extra enable bit. */
+#define INTEL_PST_HSW_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
#define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0)
@@ -301,6 +311,11 @@ union perf_capabilities {
u64 pebs_arch_reg:1;
u64 pebs_format:4;
u64 smm_freeze:1;
+ /*
+ * PMU supports separate counter range for writing
+ * values > 32bit.
+ */
+ u64 full_width_write:1;
};
u64 capabilities;
};
@@ -375,6 +390,7 @@ struct x86_pmu {
struct event_constraint *event_constraints;
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
+ bool late_ack;
/*
* sysfs attrs
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
void x86_pmu_enable_all(int added);
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
int wmin, int wmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
@@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
extern struct event_constraint intel_ivb_pebs_event_constraints[];
+extern struct event_constraint intel_hsw_pebs_event_constraints[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 7e28d9467bb4..4cbe03287b08 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};
-static int setup_event_constraints(void)
+static int __init amd_core_pmu_init(void)
{
- if (boot_cpu_data.x86 == 0x15)
+ if (!cpu_has_perfctr_core)
+ return 0;
+
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ pr_cont("Fam15h ");
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
- return 0;
-}
+ break;
-static int setup_perfctr_core(void)
-{
- if (!cpu_has_perfctr_core) {
- WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
- KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
+ default:
+ pr_err("core perfctr but no constraints; unknown hardware!\n");
return -ENODEV;
}
- WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
- KERN_ERR "hw perf events core counters need constraints handler!");
-
/*
* If core performance counter extensions exists, we must use
* MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
- * x86_pmu_addr_offset().
+ * amd_pmu_addr_offset().
*/
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
- printk(KERN_INFO "perf: AMD core performance counters detected\n");
-
+ pr_cont("core perfctr, ");
return 0;
}
__init int amd_pmu_init(void)
{
+ int ret;
+
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
x86_pmu = amd_pmu;
- setup_event_constraints();
- setup_perfctr_core();
+ ret = amd_core_pmu_init();
+ if (ret)
+ return ret;
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
new file mode 100644
index 000000000000..0db655ef3918
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+
+#include "perf_event.h"
+#include "perf_event_amd_iommu.h"
+
+#define COUNTER_SHIFT 16
+
+#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
+#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
+
+/* iommu pmu config masks */
+#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
+#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
+#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
+#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
+#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
+#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
+#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
+
+static struct perf_amd_iommu __perf_iommu;
+
+struct perf_amd_iommu {
+ struct pmu pmu;
+ u8 max_banks;
+ u8 max_counters;
+ u64 cntr_assign_mask;
+ raw_spinlock_t lock;
+ const struct attribute_group *attr_groups[4];
+};
+
+#define format_group attr_groups[0]
+#define cpumask_group attr_groups[1]
+#define events_group attr_groups[2]
+#define null_group attr_groups[3]
+
+/*---------------------------------------------
+ * sysfs format attributes
+ *---------------------------------------------*/
+PMU_FORMAT_ATTR(csource, "config:0-7");
+PMU_FORMAT_ATTR(devid, "config:8-23");
+PMU_FORMAT_ATTR(pasid, "config:24-39");
+PMU_FORMAT_ATTR(domid, "config:40-55");
+PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
+PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
+PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
+
+static struct attribute *iommu_format_attrs[] = {
+ &format_attr_csource.attr,
+ &format_attr_devid.attr,
+ &format_attr_pasid.attr,
+ &format_attr_domid.attr,
+ &format_attr_devid_mask.attr,
+ &format_attr_pasid_mask.attr,
+ &format_attr_domid_mask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_format_group = {
+ .name = "format",
+ .attrs = iommu_format_attrs,
+};
+
+/*---------------------------------------------
+ * sysfs events attributes
+ *---------------------------------------------*/
+struct amd_iommu_event_desc {
+ struct kobj_attribute attr;
+ const char *event;
+};
+
+static ssize_t _iommu_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amd_iommu_event_desc *event =
+ container_of(attr, struct amd_iommu_event_desc, attr);
+ return sprintf(buf, "%s\n", event->event);
+}
+
+#define AMD_IOMMU_EVENT_DESC(_name, _event) \
+{ \
+ .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
+ .event = _event, \
+}
+
+static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
+ AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
+ AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
+ AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
+ AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
+ AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
+ AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
+ AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
+ AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
+ AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
+ AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
+ AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
+ AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
+ { /* end: all zeroes */ },
+};
+
+/*---------------------------------------------
+ * sysfs cpumask attributes
+ *---------------------------------------------*/
+static cpumask_t iommu_cpumask;
+
+static ssize_t _iommu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ return n;
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
+
+static struct attribute *iommu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_cpumask_group = {
+ .attrs = iommu_cpumask_attrs,
+};
+
+/*---------------------------------------------*/
+
+static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
+{
+ unsigned long flags;
+ int shift, bank, cntr, retval;
+ int max_banks = perf_iommu->max_banks;
+ int max_cntrs = perf_iommu->max_counters;
+
+ raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+
+ for (bank = 0, shift = 0; bank < max_banks; bank++) {
+ for (cntr = 0; cntr < max_cntrs; cntr++) {
+ shift = bank + (bank*3) + cntr;
+ if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
+ continue;
+ } else {
+ perf_iommu->cntr_assign_mask |= (1ULL<<shift);
+ retval = ((u16)((u16)bank<<8) | (u8)(cntr));
+ goto out;
+ }
+ }
+ }
+ retval = -ENOSPC;
+out:
+ raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+ return retval;
+}
+
+static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
+ u8 bank, u8 cntr)
+{
+ unsigned long flags;
+ int max_banks, max_cntrs;
+ int shift = 0;
+
+ max_banks = perf_iommu->max_banks;
+ max_cntrs = perf_iommu->max_counters;
+
+ if ((bank > max_banks) || (cntr > max_cntrs))
+ return -EINVAL;
+
+ shift = bank + cntr + (bank*3);
+
+ raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+ perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
+ raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+
+ return 0;
+}
+
+static int perf_iommu_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_amd_iommu *perf_iommu;
+ u64 config, config1;
+
+ /* test the event attr type check for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * IOMMU counters are shared across all cores.
+ * Therefore, it does not support per-process mode.
+ * Also, it does not support event sampling mode.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* IOMMU counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ perf_iommu = &__perf_iommu;
+
+ if (event->pmu != &perf_iommu->pmu)
+ return -ENOENT;
+
+ if (perf_iommu) {
+ config = event->attr.config;
+ config1 = event->attr.config1;
+ } else {
+ return -EINVAL;
+ }
+
+ /* integrate with iommu base devid (0000), assume one iommu */
+ perf_iommu->max_banks =
+ amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
+ perf_iommu->max_counters =
+ amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
+ if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
+ return -EINVAL;
+
+ /* update the hw_perf_event struct with the iommu config data */
+ hwc->config = config;
+ hwc->extra_reg.config = config1;
+
+ return 0;
+}
+
+static void perf_iommu_enable_event(struct perf_event *ev)
+{
+ u8 csource = _GET_CSOURCE(ev);
+ u16 devid = _GET_DEVID(ev);
+ u64 reg = 0ULL;
+
+ reg = csource;
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+
+ reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_DEVID_MATCH_REG, &reg, true);
+
+ reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_PASID_MATCH_REG, &reg, true);
+
+ reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_DOMID_MATCH_REG, &reg, true);
+}
+
+static void perf_iommu_disable_event(struct perf_event *event)
+{
+ u64 reg = 0ULL;
+
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+}
+
+static void perf_iommu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ pr_debug("perf: amd_iommu:perf_iommu_start\n");
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ if (flags & PERF_EF_RELOAD) {
+ u64 prev_raw_count = local64_read(&hwc->prev_count);
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
+ }
+
+ perf_iommu_enable_event(event);
+ perf_event_update_userpage(event);
+
+}
+
+static void perf_iommu_read(struct perf_event *event)
+{
+ u64 count = 0ULL;
+ u64 prev_raw_count = 0ULL;
+ u64 delta = 0ULL;
+ struct hw_perf_event *hwc = &event->hw;
+ pr_debug("perf: amd_iommu:perf_iommu_read\n");
+
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_REG, &count, false);
+
+ /* IOMMU pc counter register is only 48 bits */
+ count &= 0xFFFFFFFFFFFFULL;
+
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ count) != prev_raw_count)
+ return;
+
+ /* Handling 48-bit counter overflowing */
+ delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+
+}
+
+static void perf_iommu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 config;
+
+ pr_debug("perf: amd_iommu:perf_iommu_stop\n");
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ perf_iommu_disable_event(event);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ config = hwc->config;
+ perf_iommu_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_iommu_add(struct perf_event *event, int flags)
+{
+ int retval;
+ struct perf_amd_iommu *perf_iommu =
+ container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+ pr_debug("perf: amd_iommu:perf_iommu_add\n");
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* request an iommu bank/counter */
+ retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
+ if (retval != -ENOSPC)
+ event->hw.extra_reg.reg = (u16)retval;
+ else
+ return retval;
+
+ if (flags & PERF_EF_START)
+ perf_iommu_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void perf_iommu_del(struct perf_event *event, int flags)
+{
+ struct perf_amd_iommu *perf_iommu =
+ container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+ pr_debug("perf: amd_iommu:perf_iommu_del\n");
+ perf_iommu_stop(event, PERF_EF_UPDATE);
+
+ /* clear the assigned iommu bank/counter */
+ clear_avail_iommu_bnk_cntr(perf_iommu,
+ _GET_BANK(event),
+ _GET_CNTR(event));
+
+ perf_event_update_userpage(event);
+}
+
+static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
+{
+ struct attribute **attrs;
+ struct attribute_group *attr_group;
+ int i = 0, j;
+
+ while (amd_iommu_v2_event_descs[i].attr.attr.name)
+ i++;
+
+ attr_group = kzalloc(sizeof(struct attribute *)
+ * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group)
+ return -ENOMEM;
+
+ attrs = (struct attribute **)(attr_group + 1);
+ for (j = 0; j < i; j++)
+ attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
+
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+ perf_iommu->events_group = attr_group;
+
+ return 0;
+}
+
+static __init void amd_iommu_pc_exit(void)
+{
+ if (__perf_iommu.events_group != NULL) {
+ kfree(__perf_iommu.events_group);
+ __perf_iommu.events_group = NULL;
+ }
+}
+
+static __init int _init_perf_amd_iommu(
+ struct perf_amd_iommu *perf_iommu, char *name)
+{
+ int ret;
+
+ raw_spin_lock_init(&perf_iommu->lock);
+
+ /* Init format attributes */
+ perf_iommu->format_group = &amd_iommu_format_group;
+
+ /* Init cpumask attributes to only core 0 */
+ cpumask_set_cpu(0, &iommu_cpumask);
+ perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
+
+ /* Init events attributes */
+ if (_init_events_attrs(perf_iommu) != 0)
+ pr_err("perf: amd_iommu: Only support raw events.\n");
+
+ /* Init null attributes */
+ perf_iommu->null_group = NULL;
+ perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
+
+ ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
+ if (ret) {
+ pr_err("perf: amd_iommu: Failed to initialized.\n");
+ amd_iommu_pc_exit();
+ } else {
+ pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
+ amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
+ amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
+ }
+
+ return ret;
+}
+
+static struct perf_amd_iommu __perf_iommu = {
+ .pmu = {
+ .event_init = perf_iommu_event_init,
+ .add = perf_iommu_add,
+ .del = perf_iommu_del,
+ .start = perf_iommu_start,
+ .stop = perf_iommu_stop,
+ .read = perf_iommu_read,
+ },
+ .max_banks = 0x00,
+ .max_counters = 0x00,
+ .cntr_assign_mask = 0ULL,
+ .format_group = NULL,
+ .cpumask_group = NULL,
+ .events_group = NULL,
+ .null_group = NULL,
+};
+
+static __init int amd_iommu_pc_init(void)
+{
+ /* Make sure the IOMMU PC resource is available */
+ if (!amd_iommu_pc_supported()) {
+ pr_err("perf: amd_iommu PMU not installed. No support!\n");
+ return -ENODEV;
+ }
+
+ _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
+
+ return 0;
+}
+
+device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
new file mode 100644
index 000000000000..845d173278e3
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PERF_EVENT_AMD_IOMMU_H_
+#define _PERF_EVENT_AMD_IOMMU_H_
+
+/* iommu pc mmio region register indexes */
+#define IOMMU_PC_COUNTER_REG 0x00
+#define IOMMU_PC_COUNTER_SRC_REG 0x08
+#define IOMMU_PC_PASID_MATCH_REG 0x10
+#define IOMMU_PC_DOMID_MATCH_REG 0x18
+#define IOMMU_PC_DEVID_MATCH_REG 0x20
+#define IOMMU_PC_COUNTER_REPORT_REG 0x28
+
+/* maximun specified bank/counters */
+#define PC_MAX_SPEC_BNKS 64
+#define PC_MAX_SPEC_CNTRS 16
+
+/* iommu pc reg masks*/
+#define IOMMU_BASE_DEVID 0x0000
+
+/* amd_iommu_init.c external support functions */
+extern bool amd_iommu_pc_supported(void);
+
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+
+extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
+ u8 fxn, u64 *value, bool is_write);
+
+#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f60d41ff9a97..fbc9210b45bc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/export.h>
+#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
@@ -165,13 +166,13 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
- INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
NULL,
};
+static struct event_constraint intel_hsw_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
+ /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+ INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
+ /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+ INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
return true;
/* implicit branch sampling to correct PEBS skid */
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+ if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
+ x86_pmu.intel_cap.pebs_format < 2)
return true;
return false;
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_events);
/*
- * Some chipsets need to unmask the LVTPC in a particular spot
- * inside the nmi handler. As a result, the unmasking was pushed
- * into all the nmi handlers.
- *
- * This handler doesn't seem to have any issues with the unmasking
- * so it was left at the top.
+ * No known reason to not always do late ACK,
+ * but just in case do it opt-in.
*/
- apic_write(APIC_LVTPC, APIC_DM_NMI);
-
+ if (!x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_pmu_disable_all();
handled = intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status();
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
again:
intel_pmu_ack_status(status);
if (++loops > 100) {
- WARN_ONCE(1, "perfevents: irq loop stuck!\n");
- perf_event_print_debug();
+ static bool warned = false;
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
intel_pmu_reset();
goto done;
}
@@ -1235,6 +1253,13 @@ again:
done:
intel_pmu_enable_all(0);
+ /*
+ * Only unmask the NMI after the overflow counters
+ * have been reset. This avoids spurious NMIs on
+ * Haswell CPUs.
+ */
+ if (x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
return handled;
}
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
if ((event->hw.config & c->cmask) == c->code) {
- /* hw.flags zeroed at initialization */
event->hw.flags |= c->flags;
return c;
}
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- event->hw.flags = 0;
intel_put_shared_regs_event_constraints(cpuc, event);
}
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added)
}
}
+static int hsw_hw_config(struct perf_event *event)
+{
+ int ret = intel_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+ if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+ return 0;
+ event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+
+ /*
+ * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
+ * PEBS or in ANY thread mode. Since the results are non-sensical forbid
+ * this combination.
+ */
+ if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
+ ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+ event->attr.precise_ip > 0))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static struct event_constraint counter2_constraint =
+ EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct event_constraint *c = intel_get_event_constraints(cpuc, event);
+
+ /* Handle special quirk on in_tx_checkpointed only in counter 2 */
+ if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
+ if (c->idxmsk64 & (1U << 2))
+ return &counter2_constraint;
+ return &emptyconstraint;
+ }
+
+ return c;
+}
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" );
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );
+PMU_FORMAT_ATTR(in_tx, "config:32");
+PMU_FORMAT_ATTR(in_tx_cp, "config:33");
static struct attribute *intel_arch_formats_attr[] = {
&format_attr_event.attr,
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_any.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
+ &format_attr_in_tx.attr,
+ &format_attr_in_tx_cp.attr,
&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
&format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void)
}
}
+EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
+
+static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL
+};
+
__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void)
break;
+ case 60: /* Haswell Client */
+ case 70:
+ case 71:
+ case 63:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_hsw_event_constraints;
+ x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_snb_extra_regs;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.cpu_events = hsw_events_attrs;
+ pr_cont("Haswell events, ");
+ break;
+
default:
switch (x86_pmu.version) {
case 1:
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void)
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask != X86_RAW_EVENT_MASK
+ if (c->cmask != FIXED_EVENT_FLAGS
|| c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
continue;
}
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void)
}
}
+ /* Support full width counters using alternative MSR range */
+ if (x86_pmu.intel_cap.full_width_write) {
+ x86_pmu.max_period = x86_pmu.cntval_mask;
+ x86_pmu.perfctr = MSR_IA32_PMC0;
+ pr_cont("full-width counters, ");
+ }
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 60250f687052..3065c57a63c1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
return val;
}
+static u64 precise_store_data_hsw(u64 status)
+{
+ union perf_mem_data_src dse;
+
+ dse.val = 0;
+ dse.mem_op = PERF_MEM_OP_STORE;
+ dse.mem_lvl = PERF_MEM_LVL_NA;
+ if (status & 1)
+ dse.mem_lvl = PERF_MEM_LVL_L1;
+ /* Nothing else supported. Sorry. */
+ return dse.val;
+}
+
static u64 load_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -165,6 +178,22 @@ struct pebs_record_nhm {
u64 status, dla, dse, lat;
};
+/*
+ * Same as pebs_record_nhm, with two additional fields.
+ */
+struct pebs_record_hsw {
+ struct pebs_record_nhm nhm;
+ /*
+ * Real IP of the event. In the Intel documentation this
+ * is called eventingrip.
+ */
+ u64 real_ip;
+ /*
+ * TSX tuning information field: abort cycles and abort flags.
+ */
+ u64 tsx_tuning;
+};
+
void init_debug_store_on_cpu(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_hsw_pebs_event_constraints[] = {
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
+ INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
+ INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
+ /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+ INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
+ /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+ INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+ /* MEM_UOPS_RETIRED.SPLIT_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+ INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
+ /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
+ INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
+ /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
+ INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
+ /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
+ /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
+ INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
+ INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
+
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+
+ if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+ cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
+ else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+ cpuc->pebs_enabled &= ~(1ULL << 63);
+
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
*/
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_nhm *pebs = __pebs;
+ struct pebs_record_hsw *pebs_hsw = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
u64 sample_type;
@@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
return;
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
- fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
+ fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
+ PERF_X86_EVENT_PEBS_ST_HSW);
perf_sample_data_init(&data, 0, event->hw.last_period);
@@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* if PEBS-LL or PreciseStore
*/
if (fll || fst) {
- if (sample_type & PERF_SAMPLE_ADDR)
- data.addr = pebs->dla;
-
/*
* Use latency for weight (only avail with PEBS-LL)
*/
@@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_SRC) {
if (fll)
data.data_src.val = load_latency_data(pebs->dse);
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ data.data_src.val =
+ precise_store_data_hsw(pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
@@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.bp = pebs->bp;
regs.sp = pebs->sp;
- if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
+ if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
+ regs.ip = pebs_hsw->real_ip;
+ regs.flags |= PERF_EFLAGS_EXACT;
+ } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
regs.flags |= PERF_EFLAGS_EXACT;
else
regs.flags &= ~PERF_EFLAGS_EXACT;
+ if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
+ x86_pmu.intel_cap.pebs_format >= 1)
+ data.addr = pebs->dla;
+
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
@@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
__intel_pmu_pebs_event(event, iregs, at);
}
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
+ void *top)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
- struct pebs_record_nhm *at, *top;
struct perf_event *event = NULL;
u64 status = 0;
- int bit, n;
-
- if (!x86_pmu.pebs_active)
- return;
-
- at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+ int bit;
ds->pebs_index = ds->pebs_buffer_base;
- n = top - at;
- if (n <= 0)
- return;
-
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
+ for (; at < top; at += x86_pmu.pebs_record_size) {
+ struct pebs_record_nhm *p = at;
- for ( ; at < top; at++) {
- for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
+ for_each_set_bit(bit, (unsigned long *)&p->status,
+ x86_pmu.max_pebs_events) {
event = cpuc->events[bit];
if (!test_bit(bit, cpuc->active_mask))
continue;
@@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
}
}
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct pebs_record_nhm *at, *top;
+ int n;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+ ds->pebs_index = ds->pebs_buffer_base;
+
+ n = top - at;
+ if (n <= 0)
+ return;
+
+ /*
+ * Should not happen, we program the threshold at 1 and do not
+ * set a reset value.
+ */
+ WARN_ONCE(n > x86_pmu.max_pebs_events,
+ "Unexpected number of pebs records %d\n", n);
+
+ return __intel_pmu_drain_pebs_nhm(iregs, at, top);
+}
+
+static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct pebs_record_hsw *at, *top;
+ int n;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
+
+ n = top - at;
+ if (n <= 0)
+ return;
+ /*
+ * Should not happen, we program the threshold at 1 and do not
+ * set a reset value.
+ */
+ WARN_ONCE(n > x86_pmu.max_pebs_events,
+ "Unexpected number of pebs records %d\n", n);
+
+ return __intel_pmu_drain_pebs_nhm(iregs, at, top);
+}
+
/*
* BTS, PEBS probe and setup
*/
@@ -888,6 +1010,12 @@ void intel_ds_init(void)
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
break;
+ case 2:
+ pr_cont("PEBS fmt2%c, ", pebs_type);
+ x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+ break;
+
default:
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d978353c939b..d5be06a5005e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,16 @@ enum {
LBR_FORMAT_LIP = 0x01,
LBR_FORMAT_EIP = 0x02,
LBR_FORMAT_EIP_FLAGS = 0x03,
+ LBR_FORMAT_EIP_FLAGS2 = 0x04,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
+};
+
+static enum {
+ LBR_EIP_FLAGS = 1,
+ LBR_TSX = 2,
+} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+ [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
+ [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
};
/*
@@ -56,6 +66,8 @@ enum {
LBR_FAR)
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
+#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
+#define LBR_FROM_FLAG_ABORT (1ULL << 61)
#define for_each_branch_sample_type(x) \
for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -81,9 +93,13 @@ enum {
X86_BR_JMP = 1 << 9, /* jump */
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+ X86_BR_ABORT = 1 << 12,/* transaction abort */
+ X86_BR_IN_TX = 1 << 13,/* in transaction */
+ X86_BR_NO_TX = 1 << 14,/* not in transaction */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
#define X86_BR_ANY \
(X86_BR_CALL |\
@@ -95,6 +111,7 @@ enum {
X86_BR_JCC |\
X86_BR_JMP |\
X86_BR_IRQ |\
+ X86_BR_ABORT |\
X86_BR_IND_CALL)
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
- u64 from, to, mis = 0, pred = 0;
+ u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+ int skip = 0;
+ int lbr_flags = lbr_desc[lbr_format];
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
- if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
+ if (lbr_flags & LBR_EIP_FLAGS) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
- from = (u64)((((s64)from) << 1) >> 1);
+ skip = 1;
+ }
+ if (lbr_flags & LBR_TSX) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
}
+ from = (u64)((((s64)from) << skip) >> skip);
cpuc->lbr_entries[i].from = from;
cpuc->lbr_entries[i].to = to;
cpuc->lbr_entries[i].mispred = mis;
cpuc->lbr_entries[i].predicted = pred;
+ cpuc->lbr_entries[i].in_tx = in_tx;
+ cpuc->lbr_entries[i].abort = abort;
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void)
* - in case there is no HW filter
* - in case the HW filter has errata or limitations
*/
-static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
{
u64 br_type = event->attr.branch_sample_type;
int mask = 0;
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_USER)
mask |= X86_BR_USER;
- if (br_type & PERF_SAMPLE_BRANCH_KERNEL) {
- if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
mask |= X86_BR_KERNEL;
- }
/* we ignore BRANCH_HV here */
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
mask |= X86_BR_IND_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
+ mask |= X86_BR_ABORT;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
+ mask |= X86_BR_IN_TX;
+
+ if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
+ mask |= X86_BR_NO_TX;
+
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
*/
event->hw.branch_reg.reg = mask;
-
- return 0;
}
/*
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
/*
* setup SW LBR filter
*/
- ret = intel_pmu_setup_sw_lbr_filter(event);
- if (ret)
- return ret;
+ intel_pmu_setup_sw_lbr_filter(event);
/*
* setup HW LBR filter, if any
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
* decoded (e.g., text page not present), then X86_BR_NONE is
* returned.
*/
-static int branch_type(unsigned long from, unsigned long to)
+static int branch_type(unsigned long from, unsigned long to, int abort)
{
struct insn insn;
void *addr;
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
if (from == 0 || to == 0)
return X86_BR_NONE;
+ if (abort)
+ return X86_BR_ABORT | to_plm;
+
if (from_plm == X86_BR_USER) {
/*
* can happen if measuring at the user level only
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
from = cpuc->lbr_entries[i].from;
to = cpuc->lbr_entries[i].to;
- type = branch_type(from, to);
+ type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+ if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+ if (cpuc->lbr_entries[i].in_tx)
+ type |= X86_BR_IN_TX;
+ else
+ type |= X86_BR_NO_TX;
+ }
/* if type does not correspond, then discard */
if (type == X86_BR_NONE || (br_sel & type) != type) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 52441a2af538..9dd99751ccf9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve
if (!uncore_box_is_fake(box))
reg1->alloc |= alloc;
- return 0;
+ return NULL;
fail:
for (; i >= 0; i--) {
if (alloc & (0x1 << i))
@@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
(!uncore_box_is_fake(box) && reg1->alloc))
return NULL;
again:
- mask = 0xff << (idx * 8);
+ mask = 0xffULL << (idx * 8);
raw_spin_lock_irqsave(&er->lock, flags);
if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
!((config1 ^ er->config) & mask)) {
@@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif
{
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+ u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
u64 config = reg1->config;
/* get the non-shared control bits and shift them */
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
{
unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
- struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
+ struct event_constraint *c;
int i, wmin, wmax, ret = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ hwc = &box->event_list[i]->hw;
c = uncore_get_event_constraint(box, box->event_list[i]);
- constraints[i] = c;
+ hwc->constraint = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
/* fastpath, try to reuse previous register */
for (i = 0; i < n; i++) {
hwc = &box->event_list[i]->hw;
- c = constraints[i];
+ c = hwc->constraint;
/* never assigned */
if (hwc->idx == -1)
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
}
/* slow path */
if (i != n)
- ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+ ret = perf_assign_events(box->event_list, n,
+ wmin, wmax, assign);
if (!assign || ret) {
for (i = 0; i < n; i++)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f9528917f6e8..47b3d00c9d89 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -337,10 +337,10 @@
NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
-#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n)))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
-#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n)))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
/*
* use the 9~13 bits to select event If the 7th bit is not set,
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 08f7e8039099..321d65ebaffe 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -115,8 +115,10 @@ startup_64:
movq %rdi, %rax
shrq $PUD_SHIFT, %rax
andl $(PTRS_PER_PUD-1), %eax
- movq %rdx, (4096+0)(%rbx,%rax,8)
- movq %rdx, (4096+8)(%rbx,%rax,8)
+ movq %rdx, 4096(%rbx,%rax,8)
+ incl %eax
+ andl $(PTRS_PER_PUD-1), %eax
+ movq %rdx, 4096(%rbx,%rax,8)
addq $8192, %rbx
movq %rdi, %rax
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 245a71db401a..cb339097b9ea 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -22,23 +22,19 @@
/*
* Were we in an interrupt that interrupted kernel mode?
*
- * For now, with eagerfpu we will return interrupted kernel FPU
- * state as not-idle. TBD: Ideally we can change the return value
- * to something like __thread_has_fpu(current). But we need to
- * be careful of doing __thread_clear_has_fpu() before saving
- * the FPU etc for supporting nested uses etc. For now, take
- * the simple route!
- *
* On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
* pair does nothing at all: the thread must not have fpu (so
* that we don't try to save the FPU state), and TS must
* be set (so that the clts/stts pair does nothing that is
* visible in the interrupted kernel thread).
+ *
+ * Except for the eagerfpu case when we return 1 unless we've already
+ * been eager and saved the state in kernel_fpu_begin().
*/
static inline bool interrupted_kernel_fpu_idle(void)
{
if (use_eager_fpu())
- return 0;
+ return __thread_has_fpu(current);
return !__thread_has_fpu(current) &&
(read_cr0() & X86_CR0_TS);
@@ -78,8 +74,8 @@ void __kernel_fpu_begin(void)
struct task_struct *me = current;
if (__thread_has_fpu(me)) {
- __save_init_fpu(me);
__thread_clear_has_fpu(me);
+ __save_init_fpu(me);
/* We do 'stts()' in __kernel_fpu_end() */
} else if (!use_eager_fpu()) {
this_cpu_write(fpu_owner_task, NULL);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 9895a9a41380..211bce445522 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -365,10 +365,14 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src)
return insn.length;
}
-static void __kprobes arch_copy_kprobe(struct kprobe *p)
+static int __kprobes arch_copy_kprobe(struct kprobe *p)
{
+ int ret;
+
/* Copy an instruction with recovering if other optprobe modifies it.*/
- __copy_instruction(p->ainsn.insn, p->addr);
+ ret = __copy_instruction(p->ainsn.insn, p->addr);
+ if (!ret)
+ return -EINVAL;
/*
* __copy_instruction can modify the displacement of the instruction,
@@ -384,6 +388,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
/* Also, displacement change doesn't affect the first byte */
p->opcode = p->ainsn.insn[0];
+
+ return 0;
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -397,8 +403,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
return -ENOMEM;
- arch_copy_kprobe(p);
- return 0;
+
+ return arch_copy_kprobe(p);
}
void __kprobes arch_arm_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d2c381280e3c..3dd37ebd591b 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -242,6 +242,7 @@ void __init kvmclock_init(void)
if (!mem)
return;
hv_clock = __va(mem);
+ memset(hv_clock, 0, size);
if (kvm_register_clock("boot clock")) {
hv_clock = NULL;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 60308053fdb2..0920212e6159 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -14,6 +14,7 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/nmi.h>
+#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/hardirq.h>
#include <linux/slab.h>
@@ -29,6 +30,9 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/nmi.h>
+
struct nmi_desc {
spinlock_t lock;
struct list_head head;
@@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
#define nmi_to_desc(type) (&nmi_desc[type])
+static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
+static int __init nmi_warning_debugfs(void)
+{
+ debugfs_create_u64("nmi_longest_ns", 0644,
+ arch_debugfs_dir, &nmi_longest_ns);
+ return 0;
+}
+fs_initcall(nmi_warning_debugfs);
+
static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
{
struct nmi_desc *desc = nmi_to_desc(type);
@@ -96,8 +109,27 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
* can be latched at any given time. Walk the whole list
* to handle those situations.
*/
- list_for_each_entry_rcu(a, &desc->head, list)
- handled += a->handler(type, regs);
+ list_for_each_entry_rcu(a, &desc->head, list) {
+ u64 before, delta, whole_msecs;
+ int decimal_msecs, thishandled;
+
+ before = local_clock();
+ thishandled = a->handler(type, regs);
+ handled += thishandled;
+ delta = local_clock() - before;
+ trace_nmi_handler(a->handler, (int)delta, thishandled);
+
+ if (delta < nmi_longest_ns)
+ continue;
+
+ nmi_longest_ns = delta;
+ whole_msecs = do_div(delta, (1000 * 1000));
+ decimal_msecs = do_div(delta, 1000) % 1000;
+ printk_ratelimited(KERN_INFO
+ "INFO: NMI handler (%ps) took too long to run: "
+ "%lld.%03d msecs\n", a->handler, whole_msecs,
+ decimal_msecs);
+ }
rcu_read_unlock();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4e7a37ff03ab..81a5f5e8f142 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -277,18 +277,6 @@ void exit_idle(void)
}
#endif
-void arch_cpu_idle_prepare(void)
-{
- /*
- * If we're the non-boot CPU, nothing set the stack canary up
- * for us. CPU0 already has it initialized but no harm in
- * doing it again. This is a good place for updating it, as
- * we wont ever return from this function (so the invalid
- * canaries already on the stack wont ever trigger).
- */
- boot_init_stack_canary();
-}
-
void arch_cpu_idle_enter(void)
{
local_touch_nmi();
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 7a6f3b3be3cf..f2bb9c96720a 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -160,7 +160,7 @@ identity_mapped:
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
- xorq %r10, %r9
+ xorq %r10, %r10
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 69562992e457..cf913587d4dd 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -43,12 +43,6 @@
#include <asm/sigframe.h>
-#ifdef CONFIG_X86_32
-# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
-#else
-# define FIX_EFLAGS __FIX_EFLAGS
-#endif
-
#define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \
} while (0)
@@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
if (!failed) {
/*
* Clear the direction flag as per the ABI for function entry.
- */
- regs->flags &= ~X86_EFLAGS_DF;
- /*
+ *
+ * Clear RF when entering the signal handler, because
+ * it might disable possible debug exception from the
+ * signal handler.
+ *
* Clear TF when entering the signal handler, but
* notify any tracer that was single-stepping it.
* The tracer may want to single-step inside the
* handler too.
*/
- regs->flags &= ~X86_EFLAGS_TF;
+ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
}
signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9c73b51817e4..bfd348e99369 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -372,15 +372,15 @@ static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
void __cpuinit set_cpu_sibling_map(int cpu)
{
- bool has_mc = boot_cpu_data.x86_max_cores > 1;
bool has_smt = smp_num_siblings > 1;
+ bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
int i;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
- if (!has_smt && !has_mc) {
+ if (!has_mp) {
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, cpu_core_mask(cpu));
@@ -394,7 +394,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
if ((i == cpu) || (has_smt && match_smt(c, o)))
link_mask(sibling, cpu, i);
- if ((i == cpu) || (has_mc && match_llc(c, o)))
+ if ((i == cpu) || (has_mp && match_llc(c, o)))
link_mask(llc_shared, cpu, i);
}
@@ -406,7 +406,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
- if ((i == cpu) || (has_mc && match_mc(c, o))) {
+ if ((i == cpu) || (has_mp && match_mc(c, o))) {
link_mask(core, cpu, i);
/*
OpenPOWER on IntegriCloud