summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2015-06-19 22:07:07 +0800
committerHerbert Xu <herbert@gondor.apana.org.au>2015-06-19 22:07:07 +0800
commitc0b59fafe31bf91f589736be304d739b13952fdd (patch)
tree0088a41c6b68132739294643be06734e3af67677 /arch/x86/kernel
parent28bceeaaf81140d69647acd0eb7dc9312f27844a (diff)
parentbfa1ce5f38938cc9e6c7f2d1011f88eba2b9e2b2 (diff)
downloadblackbird-obmc-linux-c0b59fafe31bf91f589736be304d739b13952fdd.tar.gz
blackbird-obmc-linux-c0b59fafe31bf91f589736be304d739b13952fdd.zip
Merge branch 'mvebu/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
Merge the mvebu/drivers branch of the arm-soc tree which contains just a single patch bfa1ce5f38938cc9e6c7f2d1011f88eba2b9e2b2 ("bus: mvebu-mbus: add mv_mbus_dram_info_nooverlap()") that happens to be a prerequisite of the new marvell/cesa crypto driver.
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.h18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c33
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c94
-rw-r--r--arch/x86/kernel/e820.c26
-rw-r--r--arch/x86/kernel/early_printk.c6
-rw-r--r--arch/x86/kernel/entry_64.S9
-rw-r--r--arch/x86/kernel/i387.c8
-rw-r--r--arch/x86/kernel/pmem.c53
-rw-r--r--arch/x86/kernel/process_64.c28
-rw-r--r--arch/x86/kernel/signal.c22
16 files changed, 240 insertions, 77 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c887cd944f0c..9bcd0b56ca17 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 803b684676ff..dbe76a14c3c9 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
}
/* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
{
return _acpi_map_lsapic(handle, physid, pcpu);
}
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index d9d0bd2faaf4..ab3219b3fbda 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -171,8 +171,8 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
for_each_online_cpu(cpu) {
if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
continue;
- __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu));
- __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+ cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
}
free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
free_cpumask_var(per_cpu(ipi_mask, this_cpu));
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fd470ebf924e..e4cf63301ff4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
+
+ /* AMD CPUs don't reset SS attributes on SYSRET */
+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 329f0356ad4a..6ac5cb7a9e14 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -65,15 +65,15 @@ struct event_constraint {
/*
* struct hw_perf_event.flags flags
*/
-#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x80 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
+#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
+#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
struct amd_nb {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9da2400c2ec3..219d3fb423a1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -3275,7 +3275,7 @@ __init int intel_pmu_init(void)
hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
- intel_pmu_lbr_init_snb();
+ intel_pmu_lbr_init_hsw();
x86_pmu.event_constraints = intel_bdw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ca69ea56c712..813f75d71175 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -558,6 +558,8 @@ struct event_constraint intel_core2_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
EVENT_CONSTRAINT_END
};
@@ -565,6 +567,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
EVENT_CONSTRAINT_END
};
@@ -588,6 +592,8 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
EVENT_CONSTRAINT_END
};
@@ -603,6 +609,8 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index f2770641c0fd..ffe666c2c6b5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -988,39 +988,36 @@ static int pt_event_add(struct perf_event *event, int mode)
int ret = -EBUSY;
if (pt->handle.event)
- goto out;
+ goto fail;
buf = perf_aux_output_begin(&pt->handle, event);
- if (!buf) {
- ret = -EINVAL;
- goto out;
- }
+ ret = -EINVAL;
+ if (!buf)
+ goto fail_stop;
pt_buffer_reset_offsets(buf, pt->handle.head);
if (!buf->snapshot) {
ret = pt_buffer_reset_markers(buf, &pt->handle);
- if (ret) {
- perf_aux_output_end(&pt->handle, 0, true);
- goto out;
- }
+ if (ret)
+ goto fail_end_stop;
}
if (mode & PERF_EF_START) {
pt_event_start(event, 0);
- if (hwc->state == PERF_HES_STOPPED) {
- pt_event_del(event, 0);
- ret = -EBUSY;
- }
+ ret = -EBUSY;
+ if (hwc->state == PERF_HES_STOPPED)
+ goto fail_end_stop;
} else {
hwc->state = PERF_HES_STOPPED;
}
- ret = 0;
-out:
-
- if (ret)
- hwc->state = PERF_HES_STOPPED;
+ return 0;
+fail_end_stop:
+ perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+ hwc->state = PERF_HES_STOPPED;
+fail:
return ret;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index c4bb8b8e5017..999289b94025 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -62,6 +62,14 @@
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
+#define NR_RAPL_DOMAINS 0x4
+static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+ "pp0-core",
+ "package",
+ "dram",
+ "pp1-gpu",
+};
+
/* Clients have PP0, PKG */
#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
1<<RAPL_IDX_PKG_NRG_STAT|\
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \
struct rapl_pmu {
spinlock_t lock;
- int hw_unit; /* 1/2^hw_unit Joule */
int n_active; /* number of active events */
struct list_head active_list;
struct pmu *pmu; /* pointer to rapl_pmu_class */
@@ -120,6 +127,7 @@ struct rapl_pmu {
struct hrtimer hrtimer;
};
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
static struct pmu rapl_pmu_class;
static cpumask_t rapl_cpu_mask;
static int rapl_cntr_mask;
@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
+static struct x86_pmu_quirk *rapl_quirks;
static inline u64 rapl_read_counter(struct perf_event *event)
{
u64 raw;
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
return raw;
}
-static inline u64 rapl_scale(u64 v)
+#define rapl_add_quirk(func_) \
+do { \
+ static struct x86_pmu_quirk __quirk __initdata = { \
+ .func = func_, \
+ }; \
+ __quirk.next = rapl_quirks; \
+ rapl_quirks = &__quirk; \
+} while (0)
+
+static inline u64 rapl_scale(u64 v, int cfg)
{
+ if (cfg > NR_RAPL_DOMAINS) {
+ pr_warn("invalid domain %d, failed to scale data\n", cfg);
+ return v;
+ }
/*
* scale delta to smallest unit (1/2^32)
* users must then scale back: count * 1/(1e9*2^32) to get Joules
* or use ldexp(count, -32).
* Watts = Joules/Time delta
*/
- return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
+ return v << (32 - rapl_hw_unit[cfg - 1]);
}
static u64 rapl_event_update(struct perf_event *event)
@@ -173,7 +195,7 @@ again:
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- sdelta = rapl_scale(delta);
+ sdelta = rapl_scale(delta, event->hw.config);
local64_add(sdelta, &event->count);
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
cpumask_set_cpu(cpu, &rapl_cpu_mask);
}
+static __init void rapl_hsw_server_quirk(void)
+{
+ /*
+ * DRAM domain on HSW server has fixed energy unit which can be
+ * different than the unit from power unit MSR.
+ * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+ * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+ */
+ rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+}
+
static int rapl_cpu_prepare(int cpu)
{
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
int phys_id = topology_physical_package_id(cpu);
u64 ms;
- u64 msr_rapl_power_unit_bits;
if (pmu)
return 0;
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
if (phys_id < 0)
return -1;
- /* protect rdmsrl() to handle virtualization */
- if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
- return -1;
-
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
return -1;
-
spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list);
- /*
- * grab power unit as: 1/2^unit Joules
- *
- * we cache in local PMU instance
- */
- pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
pmu->pmu = &rapl_pmu_class;
/*
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
* divide interval by 2 to avoid lockstep (2 * 100)
* if hw unit is 32, then we use 2 ms 1/200/2
*/
- if (pmu->hw_unit < 32)
- ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
+ if (rapl_hw_unit[0] < 32)
+ ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
else
ms = 2;
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
return NOTIFY_OK;
}
+static int rapl_check_hw_unit(void)
+{
+ u64 msr_rapl_power_unit_bits;
+ int i;
+
+ /* protect rdmsrl() to handle virtualization */
+ if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+ return -1;
+ for (i = 0; i < NR_RAPL_DOMAINS; i++)
+ rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+ return 0;
+}
+
static const struct x86_cpu_id rapl_cpu_match[] = {
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
[1] = {},
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
{
struct rapl_pmu *pmu;
int cpu, ret;
+ struct x86_pmu_quirk *quirk;
+ int i;
/*
* check for Intel processor family 6
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
rapl_cntr_mask = RAPL_IDX_CLN;
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
break;
+ case 63: /* Haswell-Server */
+ rapl_add_quirk(rapl_hsw_server_quirk);
+ rapl_cntr_mask = RAPL_IDX_SRV;
+ rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+ break;
case 60: /* Haswell */
case 69: /* Haswell-Celeron */
rapl_cntr_mask = RAPL_IDX_HSW;
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
/* unsupported */
return 0;
}
+ ret = rapl_check_hw_unit();
+ if (ret)
+ return ret;
+ /* run cpu model quirks */
+ for (quirk = rapl_quirks; quirk; quirk = quirk->next)
+ quirk->func();
cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
pmu = __this_cpu_read(rapl_pmu);
- pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
+ pr_info("RAPL PMU detected,"
" API unit is 2^-32 Joules,"
" %d fixed counters"
" %llu ms ovfl timer\n",
- pmu->hw_unit,
hweight32(rapl_cntr_mask),
ktime_to_ms(pmu->timer_interval));
-
+ for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+ if (rapl_cntr_mask & (1 << i)) {
+ pr_info("hw unit of domain %s 2^-%d Joules\n",
+ rapl_domain_names[i], rapl_hw_unit[i]);
+ }
+ }
out:
cpu_notifier_register_done();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7d46bb260334..e2ce85db2283 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -149,6 +149,9 @@ static void __init e820_print_type(u32 type)
case E820_UNUSABLE:
printk(KERN_CONT "unusable");
break;
+ case E820_PRAM:
+ printk(KERN_CONT "persistent (type %u)", type);
+ break;
default:
printk(KERN_CONT "type %u", type);
break;
@@ -343,7 +346,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
* continue building up new bios map based on this
* information
*/
- if (current_type != last_type) {
+ if (current_type != last_type || current_type == E820_PRAM) {
if (last_type != 0) {
new_bios[new_bios_entry].size =
change_point[chgidx]->addr - last_addr;
@@ -688,6 +691,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
register_nosave_region(pfn, PFN_UP(ei->addr));
pfn = PFN_DOWN(ei->addr + ei->size);
+
if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
register_nosave_region(PFN_UP(ei->addr), pfn);
@@ -748,7 +752,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
/*
* Find the highest page frame number we have available
*/
-static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
{
int i;
unsigned long last_pfn = 0;
@@ -759,7 +763,11 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
unsigned long start_pfn;
unsigned long end_pfn;
- if (ei->type != type)
+ /*
+ * Persistent memory is accounted as ram for purposes of
+ * establishing max_pfn and mem_map.
+ */
+ if (ei->type != E820_RAM && ei->type != E820_PRAM)
continue;
start_pfn = ei->addr >> PAGE_SHIFT;
@@ -784,12 +792,12 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
}
unsigned long __init e820_end_of_ram_pfn(void)
{
- return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+ return e820_end_pfn(MAX_ARCH_PFN);
}
unsigned long __init e820_end_of_low_ram_pfn(void)
{
- return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+ return e820_end_pfn(1UL << (32-PAGE_SHIFT));
}
static void early_panic(char *msg)
@@ -866,6 +874,9 @@ static int __init parse_memmap_one(char *p)
} else if (*p == '$') {
start_at = memparse(p+1, &p);
e820_add_region(start_at, mem_size, E820_RESERVED);
+ } else if (*p == '!') {
+ start_at = memparse(p+1, &p);
+ e820_add_region(start_at, mem_size, E820_PRAM);
} else
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
@@ -907,6 +918,7 @@ static inline const char *e820_type_to_string(int e820_type)
case E820_ACPI: return "ACPI Tables";
case E820_NVS: return "ACPI Non-volatile Storage";
case E820_UNUSABLE: return "Unusable memory";
+ case E820_PRAM: return "Persistent RAM";
default: return "reserved";
}
}
@@ -940,7 +952,9 @@ void __init e820_reserve_resources(void)
* pci device BAR resource and insert them later in
* pcibios_resource_survey()
*/
- if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
+ if (((e820.map[i].type != E820_RESERVED) &&
+ (e820.map[i].type != E820_PRAM)) ||
+ res->start < (1ULL<<20)) {
res->flags |= IORESOURCE_BUSY;
insert_resource(&iomem_resource, res);
}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 49ff55ef9b26..89427d8d4fc5 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -375,12 +375,6 @@ static int __init setup_early_printk(char *buf)
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
-#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
- if (!strncmp(buf, "hsu", 3)) {
- hsu_early_console_init(buf + 3);
- early_console_register(&early_hsu_console, keep);
- }
-#endif
#ifdef CONFIG_EARLY_PRINTK_EFI
if (!strncmp(buf, "efi", 3))
early_console_register(&early_efi_console, keep);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7b238494b31..02c2eff7478d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,15 @@ system_call_fastpath:
* rflags from r11 (but RF and VM bits are forced to 0),
* cs and ss are loaded from MSRs.
* Restoration of rflags re-enables interrupts.
+ *
+ * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+ * descriptor is not reinitialized. This means that we should
+ * avoid SYSRET with SS == NULL, which could happen if we schedule,
+ * exit the kernel, and re-enter using an interrupt vector. (All
+ * interrupt entries on x86_64 set SS to NULL.) We prevent that
+ * from happening by reloading SS in __switch_to. (Actually
+ * detecting the failure in 64-bit userspace is tricky but can be
+ * done.)
*/
USERGS_SYSRET64
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 367f39d35e9c..009183276bb7 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -341,7 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
+ struct xsave_struct *xsave;
int ret;
if (!cpu_has_xsave)
@@ -351,6 +351,8 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
+ xsave = &target->thread.fpu.state->xsave;
+
/*
* Copy the 48bytes defined by the software first into the xstate
* memory layout in the thread struct, so that we can copy the entire
@@ -369,7 +371,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
+ struct xsave_struct *xsave;
int ret;
if (!cpu_has_xsave)
@@ -379,6 +381,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
+ xsave = &target->thread.fpu.state->xsave;
+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
/*
* mxcsr reserved bits must be masked to zero for security reasons.
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
new file mode 100644
index 000000000000..3420c874ddc5
--- /dev/null
+++ b/arch/x86/kernel/pmem.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015, Christoph Hellwig.
+ */
+#include <linux/memblock.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <asm/e820.h>
+#include <asm/page_types.h>
+#include <asm/setup.h>
+
+static __init void register_pmem_device(struct resource *res)
+{
+ struct platform_device *pdev;
+ int error;
+
+ pdev = platform_device_alloc("pmem", PLATFORM_DEVID_AUTO);
+ if (!pdev)
+ return;
+
+ error = platform_device_add_resources(pdev, res, 1);
+ if (error)
+ goto out_put_pdev;
+
+ error = platform_device_add(pdev);
+ if (error)
+ goto out_put_pdev;
+ return;
+
+out_put_pdev:
+ dev_warn(&pdev->dev, "failed to add 'pmem' (persistent memory) device!\n");
+ platform_device_put(pdev);
+}
+
+static __init int register_pmem_devices(void)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (ei->type == E820_PRAM) {
+ struct resource res = {
+ .flags = IORESOURCE_MEM,
+ .start = ei->addr,
+ .end = ei->addr + ei->size - 1,
+ };
+ register_pmem_device(&res);
+ }
+ }
+
+ return 0;
+}
+device_initcall(register_pmem_devices);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4baaa972f52a..ddfdbf74f174 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
+ if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
+ /*
+ * AMD CPUs have a misfeature: SYSRET sets the SS selector but
+ * does not update the cached descriptor. As a result, if we
+ * do SYSRET while SS is NULL, we'll end up in user mode with
+ * SS apparently equal to __USER_DS but actually unusable.
+ *
+ * The straightforward workaround would be to fix it up just
+ * before SYSRET, but that would slow down the system call
+ * fast paths. Instead, we ensure that SS is never NULL in
+ * system call context. We do this by replacing NULL SS
+ * selectors at every context switch. SYSCALL sets up a valid
+ * SS, so the only way to get NULL is to re-enter the kernel
+ * from CPL 3 through an interrupt. Since that can't happen
+ * in the same task as a running syscall, we are guaranteed to
+ * context switch between every interrupt vector entry and a
+ * subsequent SYSRET.
+ *
+ * We read SS first because SS reads are much faster than
+ * writes. Out of caution, we force SS to __KERNEL_DS even if
+ * it previously had a different non-NULL value.
+ */
+ unsigned short ss_sel;
+ savesegment(ss, ss_sel);
+ if (ss_sel != __KERNEL_DS)
+ loadsegment(ss, __KERNEL_DS);
+ }
+
return prev_p;
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index f9804080ccb3..1ea14fd53933 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -616,7 +616,8 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
static void
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
- bool failed;
+ bool stepping, failed;
+
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
@@ -640,12 +641,13 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
}
/*
- * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
- * flag so that register information in the sigcontext is correct.
+ * If TF is set due to a debugger (TIF_FORCED_TF), clear TF now
+ * so that register information in the sigcontext is correct and
+ * then notify the tracer before entering the signal handler.
*/
- if (unlikely(regs->flags & X86_EFLAGS_TF) &&
- likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
- regs->flags &= ~X86_EFLAGS_TF;
+ stepping = test_thread_flag(TIF_SINGLESTEP);
+ if (stepping)
+ user_disable_single_step(current);
failed = (setup_rt_frame(ksig, regs) < 0);
if (!failed) {
@@ -656,10 +658,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* it might disable possible debug exception from the
* signal handler.
*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
+ * Clear TF for the case when it wasn't set by debugger to
+ * avoid the recursive send_sigtrap() in SIGTRAP handler.
*/
regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
/*
@@ -668,7 +668,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
if (used_math())
fpu_reset_state(current);
}
- signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
+ signal_setup_done(failed, ksig, stepping);
}
#ifdef CONFIG_X86_32
OpenPOWER on IntegriCloud