summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/boot/compressed/kaslr.c5
-rw-r--r--arch/x86/entry/entry_64.S14
-rw-r--r--arch/x86/events/intel/core.c35
-rw-r--r--arch/x86/events/intel/uncore.c4
-rw-r--r--arch/x86/events/intel/uncore.h2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c10
-rw-r--r--arch/x86/include/asm/elf.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h8
-rw-r--r--arch/x86/include/asm/hypertransport.h46
-rw-r--r--arch/x86/include/asm/insn-eval.h2
-rw-r--r--arch/x86/include/asm/io.h4
-rw-r--r--arch/x86/include/asm/irqdomain.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c61
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/htirq.c198
-rw-r--r--arch/x86/kernel/apic/vector.c5
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/mpparse.c6
-rw-r--r--arch/x86/kernel/smpboot.c128
-rw-r--r--arch/x86/kernel/sys_x86_64.c10
-rw-r--r--arch/x86/kernel/umip.c88
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/emulate.c39
-rw-r--r--arch/x86/kvm/ioapic.c34
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/svm.c18
-rw-r--r--arch/x86/kvm/vmx.c234
-rw-r--r--arch/x86/kvm/x86.c42
-rw-r--r--arch/x86/lib/insn-eval.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt2
-rw-r--r--arch/x86/mm/hugetlbpage.c11
-rw-r--r--arch/x86/mm/mmap.c62
36 files changed, 588 insertions, 532 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index df3276d6bfe3..8eed3f94bfc7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1804,14 +1804,20 @@ config X86_SMAP
If unsure, say Y.
config X86_INTEL_UMIP
- def_bool n
+ def_bool y
depends on CPU_SUP_INTEL
prompt "Intel User Mode Instruction Prevention" if EXPERT
---help---
The User Mode Instruction Prevention (UMIP) is a security
feature in newer Intel processors. If enabled, a general
- protection fault is issued if the instructions SGDT, SLDT,
- SIDT, SMSW and STR are executed in user mode.
+ protection fault is issued if the SGDT, SLDT, SIDT, SMSW
+ or STR instructions are executed in user mode. These instructions
+ unnecessarily expose information about the hardware state.
+
+ The vast majority of applications do not use these instructions.
+ For the very few that do, software emulation is provided in
+ specific cases in protected and virtual-8086 modes. Emulated
+ results are dummy.
config X86_INTEL_MPX
prompt "Intel MPX (Memory Protection Extensions)"
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a63fbc25ce84..8199a6187251 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -171,7 +171,6 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
static void mem_avoid_memmap(char *str)
{
static int i;
- int rc;
if (i >= MAX_MEMMAP_REGIONS)
return;
@@ -219,7 +218,7 @@ static int handle_mem_memmap(void)
return 0;
tmp_cmdline = malloc(len + 1);
- if (!tmp_cmdline )
+ if (!tmp_cmdline)
error("Failed to allocate space for tmp_cmdline");
memcpy(tmp_cmdline, args, len);
@@ -363,7 +362,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
cmd_line |= boot_params->hdr.cmd_line_ptr;
/* Calculate size of cmd_line. */
ptr = (char *)(unsigned long)cmd_line;
- for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+ for (cmd_line_size = 0; ptr[cmd_line_size++];)
;
mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a2b30ec69497..f81d50d7ceac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64)
END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
-.macro TRACE_IRQS_IRETQ
+.macro TRACE_IRQS_FLAGS flags:req
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, EFLAGS(%rsp) /* interrupts off? */
+ bt $9, \flags /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
#endif
.endm
+.macro TRACE_IRQS_IRETQ
+ TRACE_IRQS_FLAGS EFLAGS(%rsp)
+.endm
+
/*
* When dynamic function tracer is enabled it will add a breakpoint
* to all locations that it is about to modify, sync CPUs, update
@@ -148,8 +152,6 @@ ENTRY(entry_SYSCALL_64)
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- TRACE_IRQS_OFF
-
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
@@ -170,6 +172,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
UNWIND_HINT_REGS extra=0
+ TRACE_IRQS_OFF
+
/*
* If we need to do entry work or if we guess we'll need to do
* exit work, go straight to the slow path.
@@ -943,11 +947,13 @@ ENTRY(native_load_gs_index)
FRAME_BEGIN
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+ TRACE_IRQS_OFF
SWAPGS
.Lgs_change:
movl %edi, %gs
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS
+ TRACE_IRQS_FLAGS (%rsp)
popfq
FRAME_END
ret
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 43445da30cea..09c26a4f139c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3734,6 +3734,19 @@ EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
+ NULL
+};
+
+static struct attribute *hsw_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_commit),
EVENT_PTR(tx_abort),
@@ -3746,18 +3759,16 @@ static struct attribute *hsw_events_attrs[] = {
EVENT_PTR(el_conflict),
EVENT_PTR(cycles_t),
EVENT_PTR(cycles_ct),
- EVENT_PTR(mem_ld_hsw),
- EVENT_PTR(mem_st_hsw),
- EVENT_PTR(td_slots_issued),
- EVENT_PTR(td_slots_retired),
- EVENT_PTR(td_fetch_bubbles),
- EVENT_PTR(td_total_slots),
- EVENT_PTR(td_total_slots_scale),
- EVENT_PTR(td_recovery_bubbles),
- EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
+static __init struct attribute **get_hsw_events_attrs(void)
+{
+ return boot_cpu_has(X86_FEATURE_RTM) ?
+ merge_attr(hsw_events_attrs, hsw_tsx_events_attrs) :
+ hsw_events_attrs;
+}
+
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -4186,7 +4197,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.cpu_events = get_hsw_events_attrs();
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
@@ -4225,7 +4236,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.cpu_events = get_hsw_events_attrs();
x86_pmu.limit_period = bdw_limit_period;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
@@ -4283,7 +4294,7 @@ __init int intel_pmu_init(void)
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
- x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.cpu_events = get_hsw_events_attrs();
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
pr_cont("Skylake events, ");
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d45e06346f14..7874c980d569 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -975,10 +975,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
int i, phys_id, pkg;
phys_id = uncore_pcibus_to_physid(pdev->bus);
- pkg = topology_phys_to_logical_pkg(phys_id);
box = pci_get_drvdata(pdev);
if (!box) {
+ pkg = topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
uncore_extra_pci_dev[pkg].dev[i] = NULL;
@@ -994,7 +994,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
return;
pci_set_drvdata(pdev, NULL);
- pmu->boxes[pkg] = NULL;
+ pmu->boxes[box->pkgid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
uncore_box_exit(box);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 4364191e7c6b..414dc7e7c950 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -100,7 +100,7 @@ struct intel_uncore_extra_reg {
struct intel_uncore_box {
int pci_phys_id;
- int pkgid;
+ int pkgid; /* Logical package ID */
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 95cb19f4e06f..6d8044ab1060 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1057,7 +1057,7 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+ int pkg = box->pkgid;
struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
if (filter_pdev) {
@@ -3035,11 +3035,19 @@ static struct intel_uncore_type *bdx_msr_uncores[] = {
NULL,
};
+/* Bit 7 'Use Occupancy' is not available for counter 0 on BDX */
+static struct event_constraint bdx_uncore_pcu_constraints[] = {
+ EVENT_CONSTRAINT(0x80, 0xe, 0x80),
+ EVENT_CONSTRAINT_END
+};
+
void bdx_uncore_cpu_init(void)
{
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
uncore_msr_uncores = bdx_msr_uncores;
+
+ hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints;
}
static struct intel_uncore_type bdx_uncore_ha = {
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 3a091cea36c5..0d157d2a1e2a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -309,6 +309,7 @@ static inline int mmap_is_ia32(void)
extern unsigned long task_size_32bit(void);
extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy);
+extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b80e46733909..2851077b6051 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -99,14 +99,6 @@ struct irq_alloc_info {
void *dmar_data;
};
#endif
-#ifdef CONFIG_HT_IRQ
- struct {
- int ht_pos;
- int ht_idx;
- struct pci_dev *ht_dev;
- void *ht_update;
- };
-#endif
#ifdef CONFIG_X86_UV
struct {
int uv_limit;
diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h
deleted file mode 100644
index 5d55df352879..000000000000
--- a/arch/x86/include/asm/hypertransport.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_HYPERTRANSPORT_H
-#define _ASM_X86_HYPERTRANSPORT_H
-
-/*
- * Constants for x86 Hypertransport Interrupts.
- */
-
-#define HT_IRQ_LOW_BASE 0xf8000000
-
-#define HT_IRQ_LOW_VECTOR_SHIFT 16
-#define HT_IRQ_LOW_VECTOR_MASK 0x00ff0000
-#define HT_IRQ_LOW_VECTOR(v) \
- (((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK)
-
-#define HT_IRQ_LOW_DEST_ID_SHIFT 8
-#define HT_IRQ_LOW_DEST_ID_MASK 0x0000ff00
-#define HT_IRQ_LOW_DEST_ID(v) \
- (((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK)
-
-#define HT_IRQ_LOW_DM_PHYSICAL 0x0000000
-#define HT_IRQ_LOW_DM_LOGICAL 0x0000040
-
-#define HT_IRQ_LOW_RQEOI_EDGE 0x0000000
-#define HT_IRQ_LOW_RQEOI_LEVEL 0x0000020
-
-
-#define HT_IRQ_LOW_MT_FIXED 0x0000000
-#define HT_IRQ_LOW_MT_ARBITRATED 0x0000004
-#define HT_IRQ_LOW_MT_SMI 0x0000008
-#define HT_IRQ_LOW_MT_NMI 0x000000c
-#define HT_IRQ_LOW_MT_INIT 0x0000010
-#define HT_IRQ_LOW_MT_STARTUP 0x0000014
-#define HT_IRQ_LOW_MT_EXTINT 0x0000018
-#define HT_IRQ_LOW_MT_LINT1 0x000008c
-#define HT_IRQ_LOW_MT_LINT0 0x0000098
-
-#define HT_IRQ_LOW_IRQ_MASKED 0x0000001
-
-
-#define HT_IRQ_HIGH_DEST_ID_SHIFT 0
-#define HT_IRQ_HIGH_DEST_ID_MASK 0x00ffffff
-#define HT_IRQ_HIGH_DEST_ID(v) \
- ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
-
-#endif /* _ASM_X86_HYPERTRANSPORT_H */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index e1d3b4ce8a92..2b6ccf2c49f1 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -18,6 +18,6 @@
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
-char insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_code_seg_params(struct pt_regs *regs);
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 93ae8aee1780..95e948627fd0 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -111,6 +111,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
#endif
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
/**
* virt_to_phys - map virtual addresses to physical
* @address: address to remap
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index f695cc6b8e1f..139feef467f7 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -56,10 +56,4 @@ extern void arch_init_msi_domain(struct irq_domain *domain);
static inline void arch_init_msi_domain(struct irq_domain *domain) { }
#endif
-#ifdef CONFIG_HT_IRQ
-extern void arch_init_htirq_domain(struct irq_domain *domain);
-#else
-static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
-#endif
-
#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1bfb99770c34..977de5fb968b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1161,7 +1161,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
static inline int emulate_instruction(struct kvm_vcpu *vcpu,
int emulation_type)
{
- return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+ return x86_emulate_instruction(vcpu, 0,
+ emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
}
void kvm_enable_efer_bits(u64);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 09f9e1e00e3b..95e2dfd75521 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1061,7 +1061,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
-#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_RW;
@@ -1088,6 +1088,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
}
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_RW;
+}
+
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2db7cf720b04..cc16fa882e3e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,7 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
+ unsigned initialized : 1;
} __randomize_layout;
struct cpuid_regs {
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ef9e02e614d0..f4c463df8b08 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi);
+
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi)
{
- int ioapic;
- int pin;
- struct mpc_intsrc mp_irq;
-
/*
* Check bus_irq boundary.
*/
@@ -358,14 +357,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
}
/*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = mp_find_ioapic_pin(ioapic, gsi);
-
- /*
* TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE
* increase of timer interrupts!
@@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger << 2) | polarity;
- mp_irq.srcbus = MP_ISA_BUS;
- mp_irq.srcbusirq = bus_irq; /* IRQ */
- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
- mp_irq.dstirq = pin; /* INTIN# */
-
- mp_save_irq(&mp_irq);
-
+ if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+ return;
/*
* Reset default identity mapping if gsi is also an legacy IRQ,
* otherwise there will be more than one entry with the same GSI
@@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
return 0;
}
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi)
+{
+ struct mpc_intsrc mp_irq;
+ int ioapic, pin;
+
+ /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+ return ioapic;
+ }
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq;
+ mp_irq.dstapic = mpc_ioapic_id(ioapic);
+ mp_irq.dstirq = pin;
+
+ mp_save_irq(&mp_irq);
+
+ return 0;
+}
+
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
@@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
- mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ if (bus_irq < NR_IRQS_LEGACY)
+ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ else
+ mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index a9e08924927e..a6fcaf16cdbf 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -12,7 +12,6 @@ obj-y += hw_nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_HT_IRQ) += htirq.o
obj-$(CONFIG_SMP) += ipi.o
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
deleted file mode 100644
index b07075dce8b7..000000000000
--- a/arch/x86/kernel/apic/htirq.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Support Hypertransport IRQ
- *
- * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
- * Moved from arch/x86/kernel/apic/io_apic.c.
- * Jiang Liu <jiang.liu@linux.intel.com>
- * Add support of hierarchical irqdomain
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/htirq.h>
-#include <asm/irqdomain.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/hypertransport.h>
-
-static struct irq_domain *htirq_domain;
-
-/*
- * Hypertransport interrupt support
- */
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
- struct irq_data *parent = data->parent_data;
- int ret;
-
- ret = parent->chip->irq_set_affinity(parent, mask, force);
- if (ret >= 0) {
- struct ht_irq_msg msg;
- struct irq_cfg *cfg = irqd_cfg(data);
-
- fetch_ht_irq_msg(data->irq, &msg);
- msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
- HT_IRQ_LOW_DEST_ID_MASK);
- msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
- HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
- write_ht_irq_msg(data->irq, &msg);
- }
-
- return ret;
-}
-
-static struct irq_chip ht_irq_chip = {
- .name = "PCI-HT",
- .irq_mask = mask_ht_irq,
- .irq_unmask = unmask_ht_irq,
- .irq_ack = irq_chip_ack_parent,
- .irq_set_affinity = ht_set_affinity,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
- .flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
- unsigned int nr_irqs, void *arg)
-{
- struct ht_irq_cfg *ht_cfg;
- struct irq_alloc_info *info = arg;
- struct pci_dev *dev;
- irq_hw_number_t hwirq;
- int ret;
-
- if (nr_irqs > 1 || !info)
- return -EINVAL;
-
- dev = info->ht_dev;
- hwirq = (info->ht_idx & 0xFF) |
- PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
- (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
- if (irq_find_mapping(domain, hwirq) > 0)
- return -EEXIST;
-
- ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
- if (!ht_cfg)
- return -ENOMEM;
-
- ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
- if (ret < 0) {
- kfree(ht_cfg);
- return ret;
- }
-
- /* Initialize msg to a value that will never match the first write. */
- ht_cfg->msg.address_lo = 0xffffffff;
- ht_cfg->msg.address_hi = 0xffffffff;
- ht_cfg->dev = info->ht_dev;
- ht_cfg->update = info->ht_update;
- ht_cfg->pos = info->ht_pos;
- ht_cfg->idx = 0x10 + (info->ht_idx * 2);
- irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
- handle_edge_irq, ht_cfg, "edge");
-
- return 0;
-}
-
-static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
- unsigned int nr_irqs)
-{
- struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
-
- BUG_ON(nr_irqs != 1);
- kfree(irq_data->chip_data);
- irq_domain_free_irqs_top(domain, virq, nr_irqs);
-}
-
-static int htirq_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early)
-{
- struct ht_irq_msg msg;
- struct irq_cfg *cfg = irqd_cfg(irq_data);
-
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
- msg.address_lo =
- HT_IRQ_LOW_BASE |
- HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
- HT_IRQ_LOW_VECTOR(cfg->vector) |
- ((apic->irq_dest_mode == 0) ?
- HT_IRQ_LOW_DM_PHYSICAL :
- HT_IRQ_LOW_DM_LOGICAL) |
- HT_IRQ_LOW_RQEOI_EDGE |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED) |
- HT_IRQ_LOW_IRQ_MASKED;
- write_ht_irq_msg(irq_data->irq, &msg);
- return 0;
-}
-
-static void htirq_domain_deactivate(struct irq_domain *domain,
- struct irq_data *irq_data)
-{
- struct ht_irq_msg msg;
-
- memset(&msg, 0, sizeof(msg));
- write_ht_irq_msg(irq_data->irq, &msg);
-}
-
-static const struct irq_domain_ops htirq_domain_ops = {
- .alloc = htirq_domain_alloc,
- .free = htirq_domain_free,
- .activate = htirq_domain_activate,
- .deactivate = htirq_domain_deactivate,
-};
-
-void __init arch_init_htirq_domain(struct irq_domain *parent)
-{
- struct fwnode_handle *fn;
-
- if (disable_apic)
- return;
-
- fn = irq_domain_alloc_named_fwnode("PCI-HT");
- if (!fn)
- goto warn;
-
- htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
- irq_domain_free_fwnode(fn);
- if (!htirq_domain)
- goto warn;
-
- htirq_domain->parent = parent;
- return;
-
-warn:
- pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
-}
-
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
- ht_irq_update_t *update)
-{
- struct irq_alloc_info info;
-
- if (!htirq_domain)
- return -ENOSYS;
-
- init_irq_alloc_info(&info, NULL);
- info.ht_idx = idx;
- info.ht_pos = pos;
- info.ht_dev = dev;
- info.ht_update = update;
-
- return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
- &info);
-}
-
-void arch_teardown_ht_irq(unsigned int irq)
-{
- irq_domain_free_irqs(irq, 1);
-}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 05c85e693a5d..6a823a25eaff 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -1,5 +1,5 @@
/*
- * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ * Local APIC related interfaces to support IOAPIC, MSI, etc.
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
@@ -601,7 +601,7 @@ int __init arch_probe_nr_irqs(void)
nr_irqs = NR_VECTORS * nr_cpu_ids;
nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+#if defined(CONFIG_PCI_MSI)
/*
* for MSI and HT dyn irq
*/
@@ -663,7 +663,6 @@ int __init arch_early_irq_init(void)
irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain);
- arch_init_htirq_domain(x86_vector_domain);
BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 13ae9e5eec2f..fa998ca8aa5a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -341,6 +341,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_set_bits(X86_CR4_UMIP);
+ pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+
return;
out:
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 410c5dadcee3..3a4b12809ab5 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -431,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
}
static unsigned long mpf_base;
+static bool mpf_found;
static unsigned long __init get_mpc_size(unsigned long physptr)
{
@@ -504,7 +505,7 @@ void __init default_get_smp_config(unsigned int early)
if (!smp_found_config)
return;
- if (!mpf_base)
+ if (!mpf_found)
return;
if (acpi_lapic && early)
@@ -593,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
smp_found_config = 1;
#endif
mpf_base = base;
+ mpf_found = true;
pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
base, base + sizeof(*mpf) - 1, mpf);
@@ -858,7 +860,7 @@ static int __init update_mp_table(void)
if (!enable_update_mptable)
return 0;
- if (!mpf_base)
+ if (!mpf_found)
return 0;
mpf = early_memremap(mpf_base, sizeof(*mpf));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5f59e6bee123..3d01df7d7cf6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
/* Logical package management. We might want to allocate that dynamically */
-static int *physical_to_logical_pkg __read_mostly;
-static unsigned long *physical_package_map __read_mostly;;
-static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
@@ -281,108 +278,48 @@ static void notrace start_secondary(void *unused)
}
/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && c->phys_proc_id == phys_pkg)
+ return c->logical_proc_id;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+/**
* topology_update_package_map - Update the physical to logical package map
* @pkg: The physical package id as retrieved via CPUID
* @cpu: The cpu for which this is updated
*/
int topology_update_package_map(unsigned int pkg, unsigned int cpu)
{
- unsigned int new;
+ int new;
- /* Called from early boot ? */
- if (!physical_package_map)
- return 0;
-
- if (pkg >= max_physical_pkg_id)
- return -EINVAL;
-
- /* Set the logical package id */
- if (test_and_set_bit(pkg, physical_package_map))
+ /* Already available somewhere? */
+ new = topology_phys_to_logical_pkg(pkg);
+ if (new >= 0)
goto found;
- if (logical_packages >= __max_logical_packages) {
- pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
- logical_packages, cpu, __max_logical_packages);
- return -ENOSPC;
- }
-
new = logical_packages++;
if (new != pkg) {
pr_info("CPU %u Converting physical %u to logical package %u\n",
cpu, pkg, new);
}
- physical_to_logical_pkg[pkg] = new;
-
found:
- cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+ cpu_data(cpu).logical_proc_id = new;
return 0;
}
-/**
- * topology_phys_to_logical_pkg - Map a physical package id to a logical
- *
- * Returns logical package id or -1 if not found
- */
-int topology_phys_to_logical_pkg(unsigned int phys_pkg)
-{
- if (phys_pkg >= max_physical_pkg_id)
- return -1;
- return physical_to_logical_pkg[phys_pkg];
-}
-EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-
-static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
-{
- unsigned int ncpus;
- size_t size;
-
- /*
- * Today neither Intel nor AMD support heterogenous systems. That
- * might change in the future....
- *
- * While ideally we'd want '* smp_num_siblings' in the below @ncpus
- * computation, this won't actually work since some Intel BIOSes
- * report inconsistent HT data when they disable HT.
- *
- * In particular, they reduce the APIC-IDs to only include the cores,
- * but leave the CPUID topology to say there are (2) siblings.
- * This means we don't know how many threads there will be until
- * after the APIC enumeration.
- *
- * By not including this we'll sometimes over-estimate the number of
- * logical packages by the amount of !present siblings, but this is
- * still better than MAX_LOCAL_APIC.
- *
- * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
- * on the command line leading to a similar issue as the HT disable
- * problem because the hyperthreads are usually enumerated after the
- * primary cores.
- */
- ncpus = boot_cpu_data.x86_max_cores;
- if (!ncpus) {
- pr_warn("x86_max_cores == zero !?!?");
- ncpus = 1;
- }
-
- __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
- logical_packages = 0;
-
- /*
- * Possibly larger than what we need as the number of apic ids per
- * package can be smaller than the actual used apic ids.
- */
- max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
- size = max_physical_pkg_id * sizeof(unsigned int);
- physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
- memset(physical_to_logical_pkg, 0xff, size);
- size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
- physical_package_map = kzalloc(size, GFP_KERNEL);
-
- pr_info("Max logical packages: %u\n", __max_logical_packages);
-
- topology_update_package_map(c->phys_proc_id, cpu);
-}
-
void __init smp_store_boot_cpu_info(void)
{
int id = 0; /* CPU 0 */
@@ -390,7 +327,8 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
- smp_init_package_map(c, id);
+ topology_update_package_map(c->phys_proc_id, id);
+ c->initialized = true;
}
/*
@@ -401,13 +339,16 @@ void smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- *c = boot_cpu_data;
+ /* Copy boot_cpu_data only on the first bringup */
+ if (!c->initialized)
+ *c = boot_cpu_data;
c->cpu_index = id;
/*
* During boot time, CPU0 has this setup already. Save the info when
* bringing up AP or offlined CPU0.
*/
identify_secondary_cpu(c);
+ c->initialized = true;
}
static bool
@@ -1356,7 +1297,16 @@ void __init native_smp_prepare_boot_cpu(void)
void __init native_smp_cpus_done(unsigned int max_cpus)
{
+ int ncpus;
+
pr_debug("Boot done\n");
+ /*
+ * Today neither Intel nor AMD support heterogenous systems so
+ * extrapolate the boot cpu's data to all packages.
+ */
+ ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+ __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+ pr_info("Max logical packages: %u\n", __max_logical_packages);
if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a63fe77b3217..676774b9bb8d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -188,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (len > TASK_SIZE)
return -ENOMEM;
+ /* No address checking. See comment at mmap_address_hint_valid() */
if (flags & MAP_FIXED)
return addr;
@@ -197,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
/* requesting a specific address */
if (addr) {
- addr = PAGE_ALIGN(addr);
+ addr &= PAGE_MASK;
+ if (!mmap_address_hint_valid(addr, len))
+ goto get_unmapped_area;
+
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ if (!vma || addr + len <= vm_start_gap(vma))
return addr;
}
+get_unmapped_area:
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 6ba82be68cff..f44ce0fb3583 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -78,7 +78,60 @@
#define UMIP_INST_SGDT 0 /* 0F 01 /0 */
#define UMIP_INST_SIDT 1 /* 0F 01 /1 */
-#define UMIP_INST_SMSW 3 /* 0F 01 /4 */
+#define UMIP_INST_SMSW 2 /* 0F 01 /4 */
+#define UMIP_INST_SLDT 3 /* 0F 00 /0 */
+#define UMIP_INST_STR 4 /* 0F 00 /1 */
+
+const char * const umip_insns[5] = {
+ [UMIP_INST_SGDT] = "SGDT",
+ [UMIP_INST_SIDT] = "SIDT",
+ [UMIP_INST_SMSW] = "SMSW",
+ [UMIP_INST_SLDT] = "SLDT",
+ [UMIP_INST_STR] = "STR",
+};
+
+#define umip_pr_err(regs, fmt, ...) \
+ umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
+#define umip_pr_warning(regs, fmt, ...) \
+ umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__)
+
+/**
+ * umip_printk() - Print a rate-limited message
+ * @regs: Register set with the context in which the warning is printed
+ * @log_level: Kernel log level to print the message
+ * @fmt: The text string to print
+ *
+ * Print the text contained in @fmt. The print rate is limited to bursts of 5
+ * messages every two minutes. The purpose of this customized version of
+ * printk() is to print messages when user space processes use any of the
+ * UMIP-protected instructions. Thus, the printed text is prepended with the
+ * task name and process ID number of the current task as well as the
+ * instruction and stack pointers in @regs as seen when entering kernel mode.
+ *
+ * Returns:
+ *
+ * None.
+ */
+static __printf(3, 4)
+void umip_printk(const struct pt_regs *regs, const char *log_level,
+ const char *fmt, ...)
+{
+ /* Bursts of 5 messages every two minutes */
+ static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
+ struct task_struct *tsk = current;
+ struct va_format vaf;
+ va_list args;
+
+ if (!__ratelimit(&ratelimit))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
+ task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
+ va_end(args);
+}
/**
* identify_insn() - Identify a UMIP-protected instruction
@@ -118,10 +171,16 @@ static int identify_insn(struct insn *insn)
default:
return -EINVAL;
}
+ } else if (insn->opcode.bytes[1] == 0x0) {
+ if (X86_MODRM_REG(insn->modrm.value) == 0)
+ return UMIP_INST_SLDT;
+ else if (X86_MODRM_REG(insn->modrm.value) == 1)
+ return UMIP_INST_STR;
+ else
+ return -EINVAL;
+ } else {
+ return -EINVAL;
}
-
- /* SLDT AND STR are not emulated */
- return -EINVAL;
}
/**
@@ -228,10 +287,8 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
return;
- pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n",
- tsk->comm, task_pid_nr(tsk), regs->ip,
- regs->sp, X86_PF_USER | X86_PF_WRITE,
- regs->ip);
+ umip_pr_err(regs, "segfault in emulation. error%x\n",
+ X86_PF_USER | X86_PF_WRITE);
}
/**
@@ -262,15 +319,11 @@ bool fixup_umip_exception(struct pt_regs *regs)
unsigned char buf[MAX_INSN_SIZE];
void __user *uaddr;
struct insn insn;
- char seg_defs;
+ int seg_defs;
if (!regs)
return false;
- /* Do not emulate 64-bit processes. */
- if (user_64bit_mode(regs))
- return false;
-
/*
* If not in user-space long mode, a custom code segment could be in
* use. This is true in protected mode (if the process defined a local
@@ -322,6 +375,15 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (umip_inst < 0)
return false;
+ umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+ umip_insns[umip_inst]);
+
+ /* Do not emulate SLDT, STR or user long mode processes. */
+ if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+ return false;
+
+ umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
return false;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3a6583..c2cea6651279 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
[CPUID_1_ECX] = { 1, 0, CPUID_ECX},
[CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
- [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX},
+ [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
[CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8079d141792a..e7d04d0c8008 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
fxstate_size(ctxt));
}
+/*
+ * FXRSTOR might restore XMM registers not provided by the guest. Fill
+ * in the host registers (via FXSAVE) instead, so they won't be modified.
+ * (preemption has to stay disabled until FXRSTOR).
+ *
+ * Use noinline to keep the stack for other functions called by callers small.
+ */
+static noinline int fxregs_fixup(struct fxregs_state *fx_state,
+ const size_t used_size)
+{
+ struct fxregs_state fx_tmp;
+ int rc;
+
+ rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
+ memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
+ __fxstate_size(16) - used_size);
+
+ return rc;
+}
+
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
{
struct fxregs_state fx_state;
@@ -4024,19 +4044,19 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
+ size = fxstate_size(ctxt);
+ rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
ctxt->ops->get_fpu(ctxt);
- size = fxstate_size(ctxt);
if (size < __fxstate_size(16)) {
- rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+ rc = fxregs_fixup(&fx_state, size);
if (rc != X86EMUL_CONTINUE)
goto out;
}
- rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
- if (rc != X86EMUL_CONTINUE)
- goto out;
-
if (fx_state.mxcsr >> 16) {
rc = emulate_gp(ctxt, 0);
goto out;
@@ -5000,6 +5020,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
bool op_prefix = false;
bool has_seg_override = false;
struct opcode opcode;
+ u16 dummy;
+ struct desc_struct desc;
ctxt->memop.type = OP_NONE;
ctxt->memopp = NULL;
@@ -5018,6 +5040,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
switch (mode) {
case X86EMUL_MODE_REAL:
case X86EMUL_MODE_VM86:
+ def_op_bytes = def_ad_bytes = 2;
+ ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
+ if (desc.d)
+ def_op_bytes = def_ad_bytes = 4;
+ break;
case X86EMUL_MODE_PROT16:
def_op_bytes = def_ad_bytes = 2;
break;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index bdff437acbcb..4e822ad363f3 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
old_irr = ioapic->irr;
ioapic->irr |= mask;
- if (edge)
+ if (edge) {
ioapic->irr_delivered &= ~mask;
- if ((edge && old_irr == ioapic->irr) ||
- (!edge && entry.fields.remote_irr)) {
- ret = 0;
- goto out;
+ if (old_irr == ioapic->irr) {
+ ret = 0;
+ goto out;
+ }
}
ret = ioapic_service(ioapic, irq, line_status);
@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
index == RTC_GSI) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e->fields.dest_id, e->fields.dest_mode) ||
- (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
- kvm_apic_pending_eoi(vcpu, e->fields.vector)))
+ kvm_apic_pending_eoi(vcpu, e->fields.vector))
__set_bit(e->fields.vector,
ioapic_handled_vectors);
}
@@ -277,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
bool mask_before, mask_after;
+ int old_remote_irr, old_delivery_status;
union kvm_ioapic_redirect_entry *e;
switch (ioapic->ioregsel) {
@@ -299,14 +299,28 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
return;
e = &ioapic->redirtbl[index];
mask_before = e->fields.mask;
+ /* Preserve read-only fields */
+ old_remote_irr = e->fields.remote_irr;
+ old_delivery_status = e->fields.delivery_status;
if (ioapic->ioregsel & 1) {
e->bits &= 0xffffffff;
e->bits |= (u64) val << 32;
} else {
e->bits &= ~0xffffffffULL;
e->bits |= (u32) val;
- e->fields.remote_irr = 0;
}
+ e->fields.remote_irr = old_remote_irr;
+ e->fields.delivery_status = old_delivery_status;
+
+ /*
+ * Some OSes (Linux, Xen) assume that Remote IRR bit will
+ * be cleared by IOAPIC hardware when the entry is configured
+ * as edge-triggered. This behavior is used to simulate an
+ * explicit EOI on IOAPICs that don't have the EOI register.
+ */
+ if (e->fields.trig_mode == IOAPIC_EDGE_TRIG)
+ e->fields.remote_irr = 0;
+
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
@@ -324,7 +338,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
struct kvm_lapic_irq irqe;
int ret;
- if (entry->fields.mask)
+ if (entry->fields.mask ||
+ (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
+ entry->fields.remote_irr))
return -1;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 943acbf00c69..e2c1fb8d35ce 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
recalculate_apic_map(apic->vcpu->kvm);
}
+static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
+{
+ return ((id >> 4) << 16) | (1 << (id & 0xf));
+}
+
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
{
- u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
+ u32 ldr = kvm_apic_calc_x2apic_ldr(id);
WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
@@ -2245,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
{
if (apic_x2apic_mode(vcpu->arch.apic)) {
u32 *id = (u32 *)(s->regs + APIC_ID);
+ u32 *ldr = (u32 *)(s->regs + APIC_LDR);
if (vcpu->kvm->arch.x2apic_format) {
if (*id != vcpu->vcpu_id)
@@ -2255,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
else
*id <<= 24;
}
+
+ /* In x2APIC mode, the LDR is fixed and based on the id */
+ if (set)
+ *ldr = kvm_apic_calc_x2apic_ldr(*id);
}
return 0;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b71daed3cca2..eb714f1cdf7e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -361,6 +361,7 @@ static void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h;
struct nested_state *g;
+ u32 h_intercept_exceptions;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -371,9 +372,14 @@ static void recalc_intercepts(struct vcpu_svm *svm)
h = &svm->nested.hsave->control;
g = &svm->nested;
+ /* No need to intercept #UD if L1 doesn't intercept it */
+ h_intercept_exceptions =
+ h->intercept_exceptions & ~(1U << UD_VECTOR);
+
c->intercept_cr = h->intercept_cr | g->intercept_cr;
c->intercept_dr = h->intercept_dr | g->intercept_dr;
- c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
+ c->intercept_exceptions =
+ h_intercept_exceptions | g->intercept_exceptions;
c->intercept = h->intercept | g->intercept;
}
@@ -2196,7 +2202,10 @@ static int ud_interception(struct vcpu_svm *svm)
{
int er;
+ WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
@@ -3671,6 +3680,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
+ case MSR_IA32_CR_PAT:
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
+ vcpu->arch.pat = data;
+ svm->vmcb->save.g_pat = data;
+ mark_dirty(svm->vmcb, VMCB_NPT);
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7c3522a989d0..4704aaf6d19e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
+static bool __read_mostly enable_vnmi = 1;
+module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
+
static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
@@ -202,6 +205,10 @@ struct loaded_vmcs {
bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
+ /* Support for vnmi-less CPUs */
+ int soft_vnmi_blocked;
+ ktime_t entry_time;
+ s64 vnmi_blocked_time;
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
+static inline bool cpu_has_virtual_nmis(void)
+{
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
(vmcs12->secondary_vm_exec_control & bit);
}
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
-{
- return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
-}
-
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control &
@@ -1880,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
- eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
+ eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1898,6 +1905,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
+ else
+ eb |= 1u << UD_VECTOR;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -3712,9 +3721,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
- PIN_BASED_VIRTUAL_NMIS;
- opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+ PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -5232,6 +5241,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+
+ if (!enable_vnmi)
+ pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
+
/* Enable the preemption timer dynamically */
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
@@ -5589,7 +5602,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
}
- vmcs_writel(GUEST_RFLAGS, 0x02);
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -5666,7 +5679,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ if (!enable_vnmi ||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@@ -5706,6 +5720,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (!enable_vnmi) {
+ /*
+ * Tracking the NMI-blocked state in software is built upon
+ * finding the next open IRQ window. This, in turn, depends on
+ * well-behaving guests: They have to keep IRQs disabled at
+ * least as long as the NMI handler runs. Otherwise we may
+ * cause NMI nesting, maybe breaking the guest. But as this is
+ * highly unlikely, we can live with the residual risk.
+ */
+ vmx->loaded_vmcs->soft_vnmi_blocked = 1;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+
++vcpu->stat.nmi_injections;
vmx->loaded_vmcs->nmi_known_unmasked = false;
@@ -5724,6 +5751,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool masked;
+ if (!enable_vnmi)
+ return vmx->loaded_vmcs->soft_vnmi_blocked;
if (vmx->loaded_vmcs->nmi_known_unmasked)
return false;
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5735,13 +5764,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
- if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ if (!enable_vnmi) {
+ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = masked;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+ } else {
+ vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ }
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5749,6 +5785,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
+ if (!enable_vnmi &&
+ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
+ return 0;
+
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@@ -5877,11 +5917,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return 1; /* already handled by vmx_vcpu_run() */
if (is_invalid_opcode(intr_info)) {
- if (is_guest_mode(vcpu)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
+ WARN_ON_ONCE(is_guest_mode(vcpu));
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -6476,6 +6515,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@@ -6535,6 +6575,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
+ WARN_ON_ONCE(!enable_vnmi);
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
@@ -6562,7 +6603,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
- err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
+ err = emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
@@ -6758,6 +6799,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_flexpriority())
flexpriority_enabled = 0;
+ if (!cpu_has_virtual_nmis())
+ enable_vnmi = 0;
+
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
@@ -6962,7 +7006,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
}
/* Create a new VMCS */
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
@@ -7371,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
*/
static void free_nested(struct vcpu_vmx *vmx)
{
- if (!vmx->nested.vmxon)
+ if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
return;
vmx->nested.vmxon = false;
+ vmx->nested.smm.vmxon = false;
free_vpid(vmx->nested.vpid02);
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
@@ -7979,6 +8024,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -8823,6 +8869,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked)) {
+ if (vmx_interrupt_allowed(vcpu)) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
+ vcpu->arch.nmi_pending) {
+ /*
+ * This CPU don't support us in finding the end of an
+ * NMI-blocked window if the guest runs with IRQs
+ * disabled. So we pull the trigger after 1 s of
+ * futile waiting, but inform the user about this.
+ */
+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+ "state on VCPU %d after 1 s timeout\n",
+ __func__, vcpu->vcpu_id);
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ }
+ }
+
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
@@ -9105,33 +9170,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
- if (vmx->loaded_vmcs->nmi_known_unmasked)
- return;
- /*
- * Can't use vmx->exit_intr_info since we're not sure what
- * the exit reason is.
- */
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- /*
- * SDM 3: 27.7.1.2 (September 2008)
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
- * a guest IRET fault.
- * SDM 3: 23.2.2 (September 2008)
- * Bit 12 is undefined in any of the following cases:
- * If the VM exit sets the valid bit in the IDT-vectoring
- * information field.
- * If the VM exit is due to a double fault.
- */
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmx->loaded_vmcs->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- & GUEST_INTR_STATE_NMI);
+ if (enable_vnmi) {
+ if (vmx->loaded_vmcs->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ /*
+ * SDM 3: 27.7.1.2 (September 2008)
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
+ */
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmx->loaded_vmcs->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
+ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->vnmi_blocked_time +=
+ ktime_to_ns(ktime_sub(ktime_get(),
+ vmx->loaded_vmcs->entry_time));
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -9248,6 +9318,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr3, cr4;
+ /* Record the guest's net vcpu time for enforced NMI injections. */
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->entry_time = ktime_get();
+
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
@@ -9727,8 +9802,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
- /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
- cr4_fixed1_update(bit(11), ecx, bit(2));
+ cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
#undef cr4_fixed1_update
}
@@ -10802,6 +10876,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 1;
}
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
+ (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
+ (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+ return 1;
+
return 0;
}
@@ -11026,13 +11105,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qual;
-
- if (kvm_event_needs_reinjection(vcpu))
- return -EBUSY;
+ bool block_nested_events =
+ vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
if (vcpu->arch.exception.pending &&
nested_vmx_check_exception(vcpu, &exit_qual)) {
- if (vmx->nested.nested_run_pending)
+ if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
vcpu->arch.exception.pending = false;
@@ -11041,14 +11119,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
vmx->nested.preemption_timer_expired) {
- if (vmx->nested.nested_run_pending)
+ if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
return 0;
}
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
- if (vmx->nested.nested_run_pending)
+ if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
NMI_VECTOR | INTR_TYPE_NMI_INTR |
@@ -11064,7 +11142,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
nested_exit_on_intr(vcpu)) {
- if (vmx->nested.nested_run_pending)
+ if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
return 0;
@@ -11251,6 +11329,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_clear_interrupt_queue(vcpu);
}
+static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ u32 entry_failure_code;
+
+ nested_ept_uninit_mmu_context(vcpu);
+
+ /*
+ * Only PDPTE load can fail as the value of cr3 was checked on entry and
+ * couldn't have changed.
+ */
+ if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
+ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
+
+ if (!enable_ept)
+ vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+}
+
/*
* A part of what we need to when the nested L2 guest exits and we want to
* run its L1 parent, is to reset L1's guest state to the host state specified
@@ -11264,7 +11360,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct kvm_segment seg;
- u32 entry_failure_code;
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -11291,17 +11386,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
vmx_set_cr4(vcpu, vmcs12->host_cr4);
- nested_ept_uninit_mmu_context(vcpu);
-
- /*
- * Only PDPTE load can fail as the value of cr3 was checked on entry and
- * couldn't have changed.
- */
- if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
- nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
-
- if (!enable_ept)
- vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+ load_vmcs12_mmu_host_state(vcpu, vmcs12);
if (enable_vpid) {
/*
@@ -11531,6 +11616,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
* accordingly.
*/
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+
+ load_vmcs12_mmu_host_state(vcpu, vmcs12);
+
/*
* The emulated instruction was already skipped in
* nested_vmx_run, but the updated RIP was never
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c85aa2e2d1..eee8e7faf1af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
static bool __read_mostly ignore_msrs = 0;
module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
+static bool __read_mostly report_ignored_msrs = true;
+module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
+
unsigned int min_timer_period_us = 500;
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
@@ -1795,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm)
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
- kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
- &hv_clock.tsc_shift,
- &hv_clock.tsc_to_system_mul);
- ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ if (__this_cpu_read(cpu_tsc_khz)) {
+ kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+ &hv_clock.tsc_shift,
+ &hv_clock.tsc_to_system_mul);
+ ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ } else
+ ret = ktime_get_boot_ns() + ka->kvmclock_offset;
put_cpu();
@@ -1830,6 +1836,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
*/
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+ if (guest_hv_clock.version & 1)
+ ++guest_hv_clock.version; /* first time write, random junk */
+
vcpu->hv_clock.version = guest_hv_clock.version + 1;
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
&vcpu->hv_clock,
@@ -2322,7 +2331,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
*/
- vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
+ msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
@@ -2359,8 +2370,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr, data);
return 1;
} else {
- vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
- msr, data);
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu,
+ "ignored wrmsr: 0x%x data 0x%llx\n",
+ msr, data);
break;
}
}
@@ -2578,7 +2591,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->index);
return 1;
} else {
- vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
+ msr_info->index);
msr_info->data = 0;
}
break;
@@ -5430,7 +5445,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
- r = EMULATE_FAIL;
+ r = EMULATE_USER_EXIT;
}
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5722,6 +5737,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
emulation_type))
return EMULATE_DONE;
+ if (ctxt->have_exception && inject_emulated_exception(vcpu))
+ return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
return handle_emulation_failure(vcpu);
@@ -7250,12 +7267,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct fpu *fpu = &current->thread.fpu;
int r;
- sigset_t sigsaved;
fpu__initialize(fpu);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+ kvm_sigset_activate(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
@@ -7298,8 +7313,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
out:
post_kvm_run_save(vcpu);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ kvm_sigset_deactivate(vcpu);
return r;
}
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 35625d279458..9119d8e41f1f 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -733,11 +733,11 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
*
* Returns:
*
- * A signed 8-bit value containing the default parameters on success.
+ * An int containing ORed-in default parameters on success.
*
* -EINVAL on error.
*/
-char insn_get_code_seg_params(struct pt_regs *regs)
+int insn_get_code_seg_params(struct pt_regs *regs)
{
struct desc_struct *desc;
short sel;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 12e377184ee4..c4d55919fac1 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -896,7 +896,7 @@ EndTable
GrpTable: Grp3_1
0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8ae0000cbdb3..00b296617ca4 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -158,6 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (len > TASK_SIZE)
return -ENOMEM;
+ /* No address checking. See comment at mmap_address_hint_valid() */
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
@@ -165,12 +166,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
if (addr) {
- addr = ALIGN(addr, huge_page_size(h));
+ addr &= huge_page_mask(h);
+ if (!mmap_address_hint_valid(addr, len))
+ goto get_unmapped_area;
+
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ if (!vma || addr + len <= vm_start_gap(vma))
return addr;
}
+
+get_unmapped_area:
if (mm->get_unmapped_area == arch_get_unmapped_area)
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index a99679826846..155ecbac9e28 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -33,6 +33,8 @@
#include <linux/compat.h>
#include <asm/elf.h>
+#include "physaddr.h"
+
struct va_alignment __read_mostly va_align = {
.flags = -1,
};
@@ -174,3 +176,63 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return "[mpx]";
return NULL;
}
+
+/**
+ * mmap_address_hint_valid - Validate the address hint of mmap
+ * @addr: Address hint
+ * @len: Mapping length
+ *
+ * Check whether @addr and @addr + @len result in a valid mapping.
+ *
+ * On 32bit this only checks whether @addr + @len is <= TASK_SIZE.
+ *
+ * On 64bit with 5-level page tables another sanity check is required
+ * because mappings requested by mmap(@addr, 0) which cross the 47-bit
+ * virtual address boundary can cause the following theoretical issue:
+ *
+ * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr
+ * is below the border of the 47-bit address space and @addr + @len is
+ * above the border.
+ *
+ * With 4-level paging this request succeeds, but the resulting mapping
+ * address will always be within the 47-bit virtual address space, because
+ * the hint address does not result in a valid mapping and is
+ * ignored. Hence applications which are not prepared to handle virtual
+ * addresses above 47-bit work correctly.
+ *
+ * With 5-level paging this request would be granted and result in a
+ * mapping which crosses the border of the 47-bit virtual address
+ * space. If the application cannot handle addresses above 47-bit this
+ * will lead to misbehaviour and hard to diagnose failures.
+ *
+ * Therefore ignore address hints which would result in a mapping crossing
+ * the 47-bit virtual address boundary.
+ *
+ * Note, that in the same scenario with MAP_FIXED the behaviour is
+ * different. The request with @addr < 47-bit and @addr + @len > 47-bit
+ * fails on a 4-level paging machine but succeeds on a 5-level paging
+ * machine. It is reasonable to expect that an application does not rely on
+ * the failure of such a fixed mapping request, so the restriction is not
+ * applied.
+ */
+bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
+{
+ if (TASK_SIZE - len < addr)
+ return false;
+
+ return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW);
+}
+
+/* Can we access it for direct reading/writing? Must be RAM: */
+int valid_phys_addr_range(phys_addr_t addr, size_t count)
+{
+ return addr + count <= __pa(high_memory);
+}
+
+/* Can we access it through mmap? Must be a valid physical address: */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
+{
+ phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
+
+ return phys_addr_valid(addr + count - 1);
+}
OpenPOWER on IntegriCloud