summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/boot/compressed/kaslr.c5
-rw-r--r--arch/x86/entry/entry_64.S14
-rw-r--r--arch/x86/events/intel/core.c35
-rw-r--r--arch/x86/events/intel/uncore.c4
-rw-r--r--arch/x86/events/intel/uncore.h2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c10
-rw-r--r--arch/x86/include/asm/elf.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h8
-rw-r--r--arch/x86/include/asm/hypertransport.h46
-rw-r--r--arch/x86/include/asm/insn-eval.h2
-rw-r--r--arch/x86/include/asm/io.h4
-rw-r--r--arch/x86/include/asm/irqdomain.h6
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c61
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/htirq.c198
-rw-r--r--arch/x86/kernel/apic/vector.c5
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/mpparse.c6
-rw-r--r--arch/x86/kernel/smpboot.c128
-rw-r--r--arch/x86/kernel/sys_x86_64.c10
-rw-r--r--arch/x86/kernel/umip.c88
-rw-r--r--arch/x86/kvm/svm.c7
-rw-r--r--arch/x86/kvm/vmx.c161
-rw-r--r--arch/x86/lib/insn-eval.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt2
-rw-r--r--arch/x86/mm/hugetlbpage.c11
-rw-r--r--arch/x86/mm/mmap.c62
29 files changed, 427 insertions, 469 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index df3276d6bfe3..8eed3f94bfc7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1804,14 +1804,20 @@ config X86_SMAP
If unsure, say Y.
config X86_INTEL_UMIP
- def_bool n
+ def_bool y
depends on CPU_SUP_INTEL
prompt "Intel User Mode Instruction Prevention" if EXPERT
---help---
The User Mode Instruction Prevention (UMIP) is a security
feature in newer Intel processors. If enabled, a general
- protection fault is issued if the instructions SGDT, SLDT,
- SIDT, SMSW and STR are executed in user mode.
+ protection fault is issued if the SGDT, SLDT, SIDT, SMSW
+ or STR instructions are executed in user mode. These instructions
+ unnecessarily expose information about the hardware state.
+
+ The vast majority of applications do not use these instructions.
+ For the very few that do, software emulation is provided in
+ specific cases in protected and virtual-8086 modes. Emulated
+ results are dummy.
config X86_INTEL_MPX
prompt "Intel MPX (Memory Protection Extensions)"
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a63fbc25ce84..8199a6187251 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -171,7 +171,6 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
static void mem_avoid_memmap(char *str)
{
static int i;
- int rc;
if (i >= MAX_MEMMAP_REGIONS)
return;
@@ -219,7 +218,7 @@ static int handle_mem_memmap(void)
return 0;
tmp_cmdline = malloc(len + 1);
- if (!tmp_cmdline )
+ if (!tmp_cmdline)
error("Failed to allocate space for tmp_cmdline");
memcpy(tmp_cmdline, args, len);
@@ -363,7 +362,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
cmd_line |= boot_params->hdr.cmd_line_ptr;
/* Calculate size of cmd_line. */
ptr = (char *)(unsigned long)cmd_line;
- for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+ for (cmd_line_size = 0; ptr[cmd_line_size++];)
;
mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a2b30ec69497..f81d50d7ceac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64)
END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
-.macro TRACE_IRQS_IRETQ
+.macro TRACE_IRQS_FLAGS flags:req
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, EFLAGS(%rsp) /* interrupts off? */
+ bt $9, \flags /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
#endif
.endm
+.macro TRACE_IRQS_IRETQ
+ TRACE_IRQS_FLAGS EFLAGS(%rsp)
+.endm
+
/*
* When dynamic function tracer is enabled it will add a breakpoint
* to all locations that it is about to modify, sync CPUs, update
@@ -148,8 +152,6 @@ ENTRY(entry_SYSCALL_64)
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- TRACE_IRQS_OFF
-
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
@@ -170,6 +172,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
UNWIND_HINT_REGS extra=0
+ TRACE_IRQS_OFF
+
/*
* If we need to do entry work or if we guess we'll need to do
* exit work, go straight to the slow path.
@@ -943,11 +947,13 @@ ENTRY(native_load_gs_index)
FRAME_BEGIN
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+ TRACE_IRQS_OFF
SWAPGS
.Lgs_change:
movl %edi, %gs
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS
+ TRACE_IRQS_FLAGS (%rsp)
popfq
FRAME_END
ret
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 43445da30cea..09c26a4f139c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3734,6 +3734,19 @@ EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
+ NULL
+};
+
+static struct attribute *hsw_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_commit),
EVENT_PTR(tx_abort),
@@ -3746,18 +3759,16 @@ static struct attribute *hsw_events_attrs[] = {
EVENT_PTR(el_conflict),
EVENT_PTR(cycles_t),
EVENT_PTR(cycles_ct),
- EVENT_PTR(mem_ld_hsw),
- EVENT_PTR(mem_st_hsw),
- EVENT_PTR(td_slots_issued),
- EVENT_PTR(td_slots_retired),
- EVENT_PTR(td_fetch_bubbles),
- EVENT_PTR(td_total_slots),
- EVENT_PTR(td_total_slots_scale),
- EVENT_PTR(td_recovery_bubbles),
- EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
+static __init struct attribute **get_hsw_events_attrs(void)
+{
+ return boot_cpu_has(X86_FEATURE_RTM) ?
+ merge_attr(hsw_events_attrs, hsw_tsx_events_attrs) :
+ hsw_events_attrs;
+}
+
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -4186,7 +4197,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.cpu_events = get_hsw_events_attrs();
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
@@ -4225,7 +4236,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.cpu_events = get_hsw_events_attrs();
x86_pmu.limit_period = bdw_limit_period;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
@@ -4283,7 +4294,7 @@ __init int intel_pmu_init(void)
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
- x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.cpu_events = get_hsw_events_attrs();
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
pr_cont("Skylake events, ");
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d45e06346f14..7874c980d569 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -975,10 +975,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
int i, phys_id, pkg;
phys_id = uncore_pcibus_to_physid(pdev->bus);
- pkg = topology_phys_to_logical_pkg(phys_id);
box = pci_get_drvdata(pdev);
if (!box) {
+ pkg = topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
uncore_extra_pci_dev[pkg].dev[i] = NULL;
@@ -994,7 +994,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
return;
pci_set_drvdata(pdev, NULL);
- pmu->boxes[pkg] = NULL;
+ pmu->boxes[box->pkgid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
uncore_box_exit(box);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 4364191e7c6b..414dc7e7c950 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -100,7 +100,7 @@ struct intel_uncore_extra_reg {
struct intel_uncore_box {
int pci_phys_id;
- int pkgid;
+ int pkgid; /* Logical package ID */
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 95cb19f4e06f..6d8044ab1060 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1057,7 +1057,7 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+ int pkg = box->pkgid;
struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
if (filter_pdev) {
@@ -3035,11 +3035,19 @@ static struct intel_uncore_type *bdx_msr_uncores[] = {
NULL,
};
+/* Bit 7 'Use Occupancy' is not available for counter 0 on BDX */
+static struct event_constraint bdx_uncore_pcu_constraints[] = {
+ EVENT_CONSTRAINT(0x80, 0xe, 0x80),
+ EVENT_CONSTRAINT_END
+};
+
void bdx_uncore_cpu_init(void)
{
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
uncore_msr_uncores = bdx_msr_uncores;
+
+ hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints;
}
static struct intel_uncore_type bdx_uncore_ha = {
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 3a091cea36c5..0d157d2a1e2a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -309,6 +309,7 @@ static inline int mmap_is_ia32(void)
extern unsigned long task_size_32bit(void);
extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy);
+extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b80e46733909..2851077b6051 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -99,14 +99,6 @@ struct irq_alloc_info {
void *dmar_data;
};
#endif
-#ifdef CONFIG_HT_IRQ
- struct {
- int ht_pos;
- int ht_idx;
- struct pci_dev *ht_dev;
- void *ht_update;
- };
-#endif
#ifdef CONFIG_X86_UV
struct {
int uv_limit;
diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h
deleted file mode 100644
index 5d55df352879..000000000000
--- a/arch/x86/include/asm/hypertransport.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_HYPERTRANSPORT_H
-#define _ASM_X86_HYPERTRANSPORT_H
-
-/*
- * Constants for x86 Hypertransport Interrupts.
- */
-
-#define HT_IRQ_LOW_BASE 0xf8000000
-
-#define HT_IRQ_LOW_VECTOR_SHIFT 16
-#define HT_IRQ_LOW_VECTOR_MASK 0x00ff0000
-#define HT_IRQ_LOW_VECTOR(v) \
- (((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK)
-
-#define HT_IRQ_LOW_DEST_ID_SHIFT 8
-#define HT_IRQ_LOW_DEST_ID_MASK 0x0000ff00
-#define HT_IRQ_LOW_DEST_ID(v) \
- (((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK)
-
-#define HT_IRQ_LOW_DM_PHYSICAL 0x0000000
-#define HT_IRQ_LOW_DM_LOGICAL 0x0000040
-
-#define HT_IRQ_LOW_RQEOI_EDGE 0x0000000
-#define HT_IRQ_LOW_RQEOI_LEVEL 0x0000020
-
-
-#define HT_IRQ_LOW_MT_FIXED 0x0000000
-#define HT_IRQ_LOW_MT_ARBITRATED 0x0000004
-#define HT_IRQ_LOW_MT_SMI 0x0000008
-#define HT_IRQ_LOW_MT_NMI 0x000000c
-#define HT_IRQ_LOW_MT_INIT 0x0000010
-#define HT_IRQ_LOW_MT_STARTUP 0x0000014
-#define HT_IRQ_LOW_MT_EXTINT 0x0000018
-#define HT_IRQ_LOW_MT_LINT1 0x000008c
-#define HT_IRQ_LOW_MT_LINT0 0x0000098
-
-#define HT_IRQ_LOW_IRQ_MASKED 0x0000001
-
-
-#define HT_IRQ_HIGH_DEST_ID_SHIFT 0
-#define HT_IRQ_HIGH_DEST_ID_MASK 0x00ffffff
-#define HT_IRQ_HIGH_DEST_ID(v) \
- ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
-
-#endif /* _ASM_X86_HYPERTRANSPORT_H */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index e1d3b4ce8a92..2b6ccf2c49f1 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -18,6 +18,6 @@
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
-char insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_code_seg_params(struct pt_regs *regs);
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 93ae8aee1780..95e948627fd0 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -111,6 +111,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
#endif
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
/**
* virt_to_phys - map virtual addresses to physical
* @address: address to remap
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index f695cc6b8e1f..139feef467f7 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -56,10 +56,4 @@ extern void arch_init_msi_domain(struct irq_domain *domain);
static inline void arch_init_msi_domain(struct irq_domain *domain) { }
#endif
-#ifdef CONFIG_HT_IRQ
-extern void arch_init_htirq_domain(struct irq_domain *domain);
-#else
-static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
-#endif
-
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2db7cf720b04..cc16fa882e3e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,7 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
+ unsigned initialized : 1;
} __randomize_layout;
struct cpuid_regs {
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ef9e02e614d0..f4c463df8b08 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi);
+
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi)
{
- int ioapic;
- int pin;
- struct mpc_intsrc mp_irq;
-
/*
* Check bus_irq boundary.
*/
@@ -358,14 +357,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
}
/*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = mp_find_ioapic_pin(ioapic, gsi);
-
- /*
* TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE
* increase of timer interrupts!
@@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger << 2) | polarity;
- mp_irq.srcbus = MP_ISA_BUS;
- mp_irq.srcbusirq = bus_irq; /* IRQ */
- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
- mp_irq.dstirq = pin; /* INTIN# */
-
- mp_save_irq(&mp_irq);
-
+ if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+ return;
/*
* Reset default identity mapping if gsi is also an legacy IRQ,
* otherwise there will be more than one entry with the same GSI
@@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
return 0;
}
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi)
+{
+ struct mpc_intsrc mp_irq;
+ int ioapic, pin;
+
+ /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+ return ioapic;
+ }
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq;
+ mp_irq.dstapic = mpc_ioapic_id(ioapic);
+ mp_irq.dstirq = pin;
+
+ mp_save_irq(&mp_irq);
+
+ return 0;
+}
+
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
@@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
- mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ if (bus_irq < NR_IRQS_LEGACY)
+ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ else
+ mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index a9e08924927e..a6fcaf16cdbf 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -12,7 +12,6 @@ obj-y += hw_nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_HT_IRQ) += htirq.o
obj-$(CONFIG_SMP) += ipi.o
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
deleted file mode 100644
index b07075dce8b7..000000000000
--- a/arch/x86/kernel/apic/htirq.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Support Hypertransport IRQ
- *
- * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
- * Moved from arch/x86/kernel/apic/io_apic.c.
- * Jiang Liu <jiang.liu@linux.intel.com>
- * Add support of hierarchical irqdomain
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/htirq.h>
-#include <asm/irqdomain.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/hypertransport.h>
-
-static struct irq_domain *htirq_domain;
-
-/*
- * Hypertransport interrupt support
- */
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
- struct irq_data *parent = data->parent_data;
- int ret;
-
- ret = parent->chip->irq_set_affinity(parent, mask, force);
- if (ret >= 0) {
- struct ht_irq_msg msg;
- struct irq_cfg *cfg = irqd_cfg(data);
-
- fetch_ht_irq_msg(data->irq, &msg);
- msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
- HT_IRQ_LOW_DEST_ID_MASK);
- msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
- HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
- write_ht_irq_msg(data->irq, &msg);
- }
-
- return ret;
-}
-
-static struct irq_chip ht_irq_chip = {
- .name = "PCI-HT",
- .irq_mask = mask_ht_irq,
- .irq_unmask = unmask_ht_irq,
- .irq_ack = irq_chip_ack_parent,
- .irq_set_affinity = ht_set_affinity,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
- .flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
- unsigned int nr_irqs, void *arg)
-{
- struct ht_irq_cfg *ht_cfg;
- struct irq_alloc_info *info = arg;
- struct pci_dev *dev;
- irq_hw_number_t hwirq;
- int ret;
-
- if (nr_irqs > 1 || !info)
- return -EINVAL;
-
- dev = info->ht_dev;
- hwirq = (info->ht_idx & 0xFF) |
- PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
- (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
- if (irq_find_mapping(domain, hwirq) > 0)
- return -EEXIST;
-
- ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
- if (!ht_cfg)
- return -ENOMEM;
-
- ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
- if (ret < 0) {
- kfree(ht_cfg);
- return ret;
- }
-
- /* Initialize msg to a value that will never match the first write. */
- ht_cfg->msg.address_lo = 0xffffffff;
- ht_cfg->msg.address_hi = 0xffffffff;
- ht_cfg->dev = info->ht_dev;
- ht_cfg->update = info->ht_update;
- ht_cfg->pos = info->ht_pos;
- ht_cfg->idx = 0x10 + (info->ht_idx * 2);
- irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
- handle_edge_irq, ht_cfg, "edge");
-
- return 0;
-}
-
-static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
- unsigned int nr_irqs)
-{
- struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
-
- BUG_ON(nr_irqs != 1);
- kfree(irq_data->chip_data);
- irq_domain_free_irqs_top(domain, virq, nr_irqs);
-}
-
-static int htirq_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early)
-{
- struct ht_irq_msg msg;
- struct irq_cfg *cfg = irqd_cfg(irq_data);
-
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
- msg.address_lo =
- HT_IRQ_LOW_BASE |
- HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
- HT_IRQ_LOW_VECTOR(cfg->vector) |
- ((apic->irq_dest_mode == 0) ?
- HT_IRQ_LOW_DM_PHYSICAL :
- HT_IRQ_LOW_DM_LOGICAL) |
- HT_IRQ_LOW_RQEOI_EDGE |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED) |
- HT_IRQ_LOW_IRQ_MASKED;
- write_ht_irq_msg(irq_data->irq, &msg);
- return 0;
-}
-
-static void htirq_domain_deactivate(struct irq_domain *domain,
- struct irq_data *irq_data)
-{
- struct ht_irq_msg msg;
-
- memset(&msg, 0, sizeof(msg));
- write_ht_irq_msg(irq_data->irq, &msg);
-}
-
-static const struct irq_domain_ops htirq_domain_ops = {
- .alloc = htirq_domain_alloc,
- .free = htirq_domain_free,
- .activate = htirq_domain_activate,
- .deactivate = htirq_domain_deactivate,
-};
-
-void __init arch_init_htirq_domain(struct irq_domain *parent)
-{
- struct fwnode_handle *fn;
-
- if (disable_apic)
- return;
-
- fn = irq_domain_alloc_named_fwnode("PCI-HT");
- if (!fn)
- goto warn;
-
- htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
- irq_domain_free_fwnode(fn);
- if (!htirq_domain)
- goto warn;
-
- htirq_domain->parent = parent;
- return;
-
-warn:
- pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
-}
-
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
- ht_irq_update_t *update)
-{
- struct irq_alloc_info info;
-
- if (!htirq_domain)
- return -ENOSYS;
-
- init_irq_alloc_info(&info, NULL);
- info.ht_idx = idx;
- info.ht_pos = pos;
- info.ht_dev = dev;
- info.ht_update = update;
-
- return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
- &info);
-}
-
-void arch_teardown_ht_irq(unsigned int irq)
-{
- irq_domain_free_irqs(irq, 1);
-}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 05c85e693a5d..6a823a25eaff 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -1,5 +1,5 @@
/*
- * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ * Local APIC related interfaces to support IOAPIC, MSI, etc.
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
@@ -601,7 +601,7 @@ int __init arch_probe_nr_irqs(void)
nr_irqs = NR_VECTORS * nr_cpu_ids;
nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+#if defined(CONFIG_PCI_MSI)
/*
* for MSI and HT dyn irq
*/
@@ -663,7 +663,6 @@ int __init arch_early_irq_init(void)
irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain);
- arch_init_htirq_domain(x86_vector_domain);
BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 13ae9e5eec2f..fa998ca8aa5a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -341,6 +341,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_set_bits(X86_CR4_UMIP);
+ pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+
return;
out:
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 410c5dadcee3..3a4b12809ab5 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -431,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
}
static unsigned long mpf_base;
+static bool mpf_found;
static unsigned long __init get_mpc_size(unsigned long physptr)
{
@@ -504,7 +505,7 @@ void __init default_get_smp_config(unsigned int early)
if (!smp_found_config)
return;
- if (!mpf_base)
+ if (!mpf_found)
return;
if (acpi_lapic && early)
@@ -593,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
smp_found_config = 1;
#endif
mpf_base = base;
+ mpf_found = true;
pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
base, base + sizeof(*mpf) - 1, mpf);
@@ -858,7 +860,7 @@ static int __init update_mp_table(void)
if (!enable_update_mptable)
return 0;
- if (!mpf_base)
+ if (!mpf_found)
return 0;
mpf = early_memremap(mpf_base, sizeof(*mpf));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5f59e6bee123..3d01df7d7cf6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
/* Logical package management. We might want to allocate that dynamically */
-static int *physical_to_logical_pkg __read_mostly;
-static unsigned long *physical_package_map __read_mostly;;
-static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
@@ -281,108 +278,48 @@ static void notrace start_secondary(void *unused)
}
/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && c->phys_proc_id == phys_pkg)
+ return c->logical_proc_id;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+/**
* topology_update_package_map - Update the physical to logical package map
* @pkg: The physical package id as retrieved via CPUID
* @cpu: The cpu for which this is updated
*/
int topology_update_package_map(unsigned int pkg, unsigned int cpu)
{
- unsigned int new;
+ int new;
- /* Called from early boot ? */
- if (!physical_package_map)
- return 0;
-
- if (pkg >= max_physical_pkg_id)
- return -EINVAL;
-
- /* Set the logical package id */
- if (test_and_set_bit(pkg, physical_package_map))
+ /* Already available somewhere? */
+ new = topology_phys_to_logical_pkg(pkg);
+ if (new >= 0)
goto found;
- if (logical_packages >= __max_logical_packages) {
- pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
- logical_packages, cpu, __max_logical_packages);
- return -ENOSPC;
- }
-
new = logical_packages++;
if (new != pkg) {
pr_info("CPU %u Converting physical %u to logical package %u\n",
cpu, pkg, new);
}
- physical_to_logical_pkg[pkg] = new;
-
found:
- cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+ cpu_data(cpu).logical_proc_id = new;
return 0;
}
-/**
- * topology_phys_to_logical_pkg - Map a physical package id to a logical
- *
- * Returns logical package id or -1 if not found
- */
-int topology_phys_to_logical_pkg(unsigned int phys_pkg)
-{
- if (phys_pkg >= max_physical_pkg_id)
- return -1;
- return physical_to_logical_pkg[phys_pkg];
-}
-EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-
-static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
-{
- unsigned int ncpus;
- size_t size;
-
- /*
- * Today neither Intel nor AMD support heterogenous systems. That
- * might change in the future....
- *
- * While ideally we'd want '* smp_num_siblings' in the below @ncpus
- * computation, this won't actually work since some Intel BIOSes
- * report inconsistent HT data when they disable HT.
- *
- * In particular, they reduce the APIC-IDs to only include the cores,
- * but leave the CPUID topology to say there are (2) siblings.
- * This means we don't know how many threads there will be until
- * after the APIC enumeration.
- *
- * By not including this we'll sometimes over-estimate the number of
- * logical packages by the amount of !present siblings, but this is
- * still better than MAX_LOCAL_APIC.
- *
- * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
- * on the command line leading to a similar issue as the HT disable
- * problem because the hyperthreads are usually enumerated after the
- * primary cores.
- */
- ncpus = boot_cpu_data.x86_max_cores;
- if (!ncpus) {
- pr_warn("x86_max_cores == zero !?!?");
- ncpus = 1;
- }
-
- __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
- logical_packages = 0;
-
- /*
- * Possibly larger than what we need as the number of apic ids per
- * package can be smaller than the actual used apic ids.
- */
- max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
- size = max_physical_pkg_id * sizeof(unsigned int);
- physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
- memset(physical_to_logical_pkg, 0xff, size);
- size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
- physical_package_map = kzalloc(size, GFP_KERNEL);
-
- pr_info("Max logical packages: %u\n", __max_logical_packages);
-
- topology_update_package_map(c->phys_proc_id, cpu);
-}
-
void __init smp_store_boot_cpu_info(void)
{
int id = 0; /* CPU 0 */
@@ -390,7 +327,8 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
- smp_init_package_map(c, id);
+ topology_update_package_map(c->phys_proc_id, id);
+ c->initialized = true;
}
/*
@@ -401,13 +339,16 @@ void smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- *c = boot_cpu_data;
+ /* Copy boot_cpu_data only on the first bringup */
+ if (!c->initialized)
+ *c = boot_cpu_data;
c->cpu_index = id;
/*
* During boot time, CPU0 has this setup already. Save the info when
* bringing up AP or offlined CPU0.
*/
identify_secondary_cpu(c);
+ c->initialized = true;
}
static bool
@@ -1356,7 +1297,16 @@ void __init native_smp_prepare_boot_cpu(void)
void __init native_smp_cpus_done(unsigned int max_cpus)
{
+ int ncpus;
+
pr_debug("Boot done\n");
+ /*
+ * Today neither Intel nor AMD support heterogenous systems so
+ * extrapolate the boot cpu's data to all packages.
+ */
+ ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+ __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+ pr_info("Max logical packages: %u\n", __max_logical_packages);
if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a63fe77b3217..676774b9bb8d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -188,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (len > TASK_SIZE)
return -ENOMEM;
+ /* No address checking. See comment at mmap_address_hint_valid() */
if (flags & MAP_FIXED)
return addr;
@@ -197,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
/* requesting a specific address */
if (addr) {
- addr = PAGE_ALIGN(addr);
+ addr &= PAGE_MASK;
+ if (!mmap_address_hint_valid(addr, len))
+ goto get_unmapped_area;
+
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ if (!vma || addr + len <= vm_start_gap(vma))
return addr;
}
+get_unmapped_area:
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 6ba82be68cff..f44ce0fb3583 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -78,7 +78,60 @@
#define UMIP_INST_SGDT 0 /* 0F 01 /0 */
#define UMIP_INST_SIDT 1 /* 0F 01 /1 */
-#define UMIP_INST_SMSW 3 /* 0F 01 /4 */
+#define UMIP_INST_SMSW 2 /* 0F 01 /4 */
+#define UMIP_INST_SLDT 3 /* 0F 00 /0 */
+#define UMIP_INST_STR 4 /* 0F 00 /1 */
+
+const char * const umip_insns[5] = {
+ [UMIP_INST_SGDT] = "SGDT",
+ [UMIP_INST_SIDT] = "SIDT",
+ [UMIP_INST_SMSW] = "SMSW",
+ [UMIP_INST_SLDT] = "SLDT",
+ [UMIP_INST_STR] = "STR",
+};
+
+#define umip_pr_err(regs, fmt, ...) \
+ umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
+#define umip_pr_warning(regs, fmt, ...) \
+ umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__)
+
+/**
+ * umip_printk() - Print a rate-limited message
+ * @regs: Register set with the context in which the warning is printed
+ * @log_level: Kernel log level to print the message
+ * @fmt: The text string to print
+ *
+ * Print the text contained in @fmt. The print rate is limited to bursts of 5
+ * messages every two minutes. The purpose of this customized version of
+ * printk() is to print messages when user space processes use any of the
+ * UMIP-protected instructions. Thus, the printed text is prepended with the
+ * task name and process ID number of the current task as well as the
+ * instruction and stack pointers in @regs as seen when entering kernel mode.
+ *
+ * Returns:
+ *
+ * None.
+ */
+static __printf(3, 4)
+void umip_printk(const struct pt_regs *regs, const char *log_level,
+ const char *fmt, ...)
+{
+ /* Bursts of 5 messages every two minutes */
+ static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
+ struct task_struct *tsk = current;
+ struct va_format vaf;
+ va_list args;
+
+ if (!__ratelimit(&ratelimit))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
+ task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
+ va_end(args);
+}
/**
* identify_insn() - Identify a UMIP-protected instruction
@@ -118,10 +171,16 @@ static int identify_insn(struct insn *insn)
default:
return -EINVAL;
}
+ } else if (insn->opcode.bytes[1] == 0x0) {
+ if (X86_MODRM_REG(insn->modrm.value) == 0)
+ return UMIP_INST_SLDT;
+ else if (X86_MODRM_REG(insn->modrm.value) == 1)
+ return UMIP_INST_STR;
+ else
+ return -EINVAL;
+ } else {
+ return -EINVAL;
}
-
- /* SLDT AND STR are not emulated */
- return -EINVAL;
}
/**
@@ -228,10 +287,8 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
return;
- pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n",
- tsk->comm, task_pid_nr(tsk), regs->ip,
- regs->sp, X86_PF_USER | X86_PF_WRITE,
- regs->ip);
+ umip_pr_err(regs, "segfault in emulation. error%x\n",
+ X86_PF_USER | X86_PF_WRITE);
}
/**
@@ -262,15 +319,11 @@ bool fixup_umip_exception(struct pt_regs *regs)
unsigned char buf[MAX_INSN_SIZE];
void __user *uaddr;
struct insn insn;
- char seg_defs;
+ int seg_defs;
if (!regs)
return false;
- /* Do not emulate 64-bit processes. */
- if (user_64bit_mode(regs))
- return false;
-
/*
* If not in user-space long mode, a custom code segment could be in
* use. This is true in protected mode (if the process defined a local
@@ -322,6 +375,15 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (umip_inst < 0)
return false;
+ umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+ umip_insns[umip_inst]);
+
+ /* Do not emulate SLDT, STR or user long mode processes. */
+ if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+ return false;
+
+ umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
return false;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b71daed3cca2..59e13a79c2e3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
+ case MSR_IA32_CR_PAT:
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
+ vcpu->arch.pat = data;
+ svm->vmcb->save.g_pat = data;
+ mark_dirty(svm->vmcb, VMCB_NPT);
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7c3522a989d0..714a0673ec3c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
+static bool __read_mostly enable_vnmi = 1;
+module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
+
static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
@@ -202,6 +205,10 @@ struct loaded_vmcs {
bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
+ /* Support for vnmi-less CPUs */
+ int soft_vnmi_blocked;
+ ktime_t entry_time;
+ s64 vnmi_blocked_time;
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
+static inline bool cpu_has_virtual_nmis(void)
+{
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
(vmcs12->secondary_vm_exec_control & bit);
}
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
-{
- return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
-}
-
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control &
@@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
- PIN_BASED_VIRTUAL_NMIS;
- opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+ PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+
+ if (!enable_vnmi)
+ pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
+
/* Enable the preemption timer dynamically */
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
@@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ if (!enable_vnmi ||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (!enable_vnmi) {
+ /*
+ * Tracking the NMI-blocked state in software is built upon
+ * finding the next open IRQ window. This, in turn, depends on
+ * well-behaving guests: They have to keep IRQs disabled at
+ * least as long as the NMI handler runs. Otherwise we may
+ * cause NMI nesting, maybe breaking the guest. But as this is
+ * highly unlikely, we can live with the residual risk.
+ */
+ vmx->loaded_vmcs->soft_vnmi_blocked = 1;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+
++vcpu->stat.nmi_injections;
vmx->loaded_vmcs->nmi_known_unmasked = false;
@@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool masked;
+ if (!enable_vnmi)
+ return vmx->loaded_vmcs->soft_vnmi_blocked;
if (vmx->loaded_vmcs->nmi_known_unmasked)
return false;
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
- if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ if (!enable_vnmi) {
+ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = masked;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+ } else {
+ vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ }
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
+ if (!enable_vnmi &&
+ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
+ return 0;
+
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
+ WARN_ON_ONCE(!enable_vnmi);
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
@@ -6758,6 +6798,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_flexpriority())
flexpriority_enabled = 0;
+ if (!cpu_has_virtual_nmis())
+ enable_vnmi = 0;
+
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
@@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
}
/* Create a new VMCS */
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
@@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked)) {
+ if (vmx_interrupt_allowed(vcpu)) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
+ vcpu->arch.nmi_pending) {
+ /*
+ * This CPU don't support us in finding the end of an
+ * NMI-blocked window if the guest runs with IRQs
+ * disabled. So we pull the trigger after 1 s of
+ * futile waiting, but inform the user about this.
+ */
+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+ "state on VCPU %d after 1 s timeout\n",
+ __func__, vcpu->vcpu_id);
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ }
+ }
+
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
@@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
- if (vmx->loaded_vmcs->nmi_known_unmasked)
- return;
- /*
- * Can't use vmx->exit_intr_info since we're not sure what
- * the exit reason is.
- */
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- /*
- * SDM 3: 27.7.1.2 (September 2008)
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
- * a guest IRET fault.
- * SDM 3: 23.2.2 (September 2008)
- * Bit 12 is undefined in any of the following cases:
- * If the VM exit sets the valid bit in the IDT-vectoring
- * information field.
- * If the VM exit is due to a double fault.
- */
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmx->loaded_vmcs->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- & GUEST_INTR_STATE_NMI);
+ if (enable_vnmi) {
+ if (vmx->loaded_vmcs->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ /*
+ * SDM 3: 27.7.1.2 (September 2008)
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
+ */
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmx->loaded_vmcs->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
+ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->vnmi_blocked_time +=
+ ktime_to_ns(ktime_sub(ktime_get(),
+ vmx->loaded_vmcs->entry_time));
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr3, cr4;
+ /* Record the guest's net vcpu time for enforced NMI injections. */
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->entry_time = ktime_get();
+
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 35625d279458..9119d8e41f1f 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -733,11 +733,11 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
*
* Returns:
*
- * A signed 8-bit value containing the default parameters on success.
+ * An int containing ORed-in default parameters on success.
*
* -EINVAL on error.
*/
-char insn_get_code_seg_params(struct pt_regs *regs)
+int insn_get_code_seg_params(struct pt_regs *regs)
{
struct desc_struct *desc;
short sel;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 12e377184ee4..c4d55919fac1 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -896,7 +896,7 @@ EndTable
GrpTable: Grp3_1
0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8ae0000cbdb3..00b296617ca4 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -158,6 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (len > TASK_SIZE)
return -ENOMEM;
+ /* No address checking. See comment at mmap_address_hint_valid() */
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
@@ -165,12 +166,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
if (addr) {
- addr = ALIGN(addr, huge_page_size(h));
+ addr &= huge_page_mask(h);
+ if (!mmap_address_hint_valid(addr, len))
+ goto get_unmapped_area;
+
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ if (!vma || addr + len <= vm_start_gap(vma))
return addr;
}
+
+get_unmapped_area:
if (mm->get_unmapped_area == arch_get_unmapped_area)
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index a99679826846..155ecbac9e28 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -33,6 +33,8 @@
#include <linux/compat.h>
#include <asm/elf.h>
+#include "physaddr.h"
+
struct va_alignment __read_mostly va_align = {
.flags = -1,
};
@@ -174,3 +176,63 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return "[mpx]";
return NULL;
}
+
+/**
+ * mmap_address_hint_valid - Validate the address hint of mmap
+ * @addr: Address hint
+ * @len: Mapping length
+ *
+ * Check whether @addr and @addr + @len result in a valid mapping.
+ *
+ * On 32bit this only checks whether @addr + @len is <= TASK_SIZE.
+ *
+ * On 64bit with 5-level page tables another sanity check is required
+ * because mappings requested by mmap(@addr, 0) which cross the 47-bit
+ * virtual address boundary can cause the following theoretical issue:
+ *
+ * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr
+ * is below the border of the 47-bit address space and @addr + @len is
+ * above the border.
+ *
+ * With 4-level paging this request succeeds, but the resulting mapping
+ * address will always be within the 47-bit virtual address space, because
+ * the hint address does not result in a valid mapping and is
+ * ignored. Hence applications which are not prepared to handle virtual
+ * addresses above 47-bit work correctly.
+ *
+ * With 5-level paging this request would be granted and result in a
+ * mapping which crosses the border of the 47-bit virtual address
+ * space. If the application cannot handle addresses above 47-bit this
+ * will lead to misbehaviour and hard to diagnose failures.
+ *
+ * Therefore ignore address hints which would result in a mapping crossing
+ * the 47-bit virtual address boundary.
+ *
+ * Note, that in the same scenario with MAP_FIXED the behaviour is
+ * different. The request with @addr < 47-bit and @addr + @len > 47-bit
+ * fails on a 4-level paging machine but succeeds on a 5-level paging
+ * machine. It is reasonable to expect that an application does not rely on
+ * the failure of such a fixed mapping request, so the restriction is not
+ * applied.
+ */
+bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
+{
+ if (TASK_SIZE - len < addr)
+ return false;
+
+ return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW);
+}
+
+/* Can we access it for direct reading/writing? Must be RAM: */
+int valid_phys_addr_range(phys_addr_t addr, size_t count)
+{
+ return addr + count <= __pa(high_memory);
+}
+
+/* Can we access it through mmap? Must be a valid physical address: */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
+{
+ phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
+
+ return phys_addr_valid(addr + count - 1);
+}
OpenPOWER on IntegriCloud