summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/termios.h8
-rw-r--r--arch/alpha/include/uapi/asm/ioctls.h5
-rw-r--r--arch/alpha/include/uapi/asm/termbits.h17
-rw-r--r--arch/arm/include/asm/cputype.h1
-rw-r--r--arch/arm/include/asm/pgtable-2level.h2
-rw-r--r--arch/arm/include/asm/proc-fns.h61
-rw-r--r--arch/arm/kernel/bugs.c4
-rw-r--r--arch/arm/kernel/ftrace.c17
-rw-r--r--arch/arm/kernel/head-common.S6
-rw-r--r--arch/arm/kernel/setup.c40
-rw-r--r--arch/arm/kernel/smp.c31
-rw-r--r--arch/arm/mach-omap2/display.c111
-rw-r--r--arch/arm/mm/proc-v7-bugs.c17
-rw-r--r--arch/arm/mm/proc-v7.S2
-rw-r--r--arch/arm/vfp/vfpmodule.c2
-rw-r--r--arch/arm64/Kconfig25
-rw-r--r--arch/arm64/include/asm/ftrace.h13
-rw-r--r--arch/arm64/include/asm/processor.h8
-rw-r--r--arch/arm64/include/asm/sysreg.h4
-rw-r--r--arch/arm64/include/asm/tlbflush.h4
-rw-r--r--arch/arm64/kernel/cpu_errata.c20
-rw-r--r--arch/arm64/kernel/cpufeature.c2
-rw-r--r--arch/arm64/kernel/ftrace.c15
-rw-r--r--arch/arm64/kernel/setup.c1
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c26
-rw-r--r--arch/ia64/include/asm/numa.h4
-rw-r--r--arch/ia64/kernel/acpi.c6
-rw-r--r--arch/ia64/mm/numa.c6
-rw-r--r--arch/m68k/include/asm/pgtable_mm.h4
-rw-r--r--arch/microblaze/include/asm/pgtable.h2
-rw-r--r--arch/microblaze/kernel/ftrace.c15
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-helper.c2
-rw-r--r--arch/mips/configs/cavium_octeon_defconfig1
-rw-r--r--arch/mips/include/asm/syscall.h2
-rw-r--r--arch/mips/kernel/ftrace.c14
-rw-r--r--arch/mips/kernel/setup.c1
-rw-r--r--arch/mips/kernel/traps.c3
-rw-r--r--arch/mips/loongson64/loongson-3/numa.c12
-rw-r--r--arch/mips/mm/dma-noncoherent.c2
-rw-r--r--arch/mips/ralink/mt7620.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c11
-rw-r--r--arch/nds32/include/asm/pgtable.h2
-rw-r--r--arch/nds32/kernel/ftrace.c18
-rw-r--r--arch/parisc/include/asm/pgtable.h2
-rw-r--r--arch/parisc/include/asm/spinlock.h4
-rw-r--r--arch/parisc/kernel/ftrace.c17
-rw-r--r--arch/parisc/kernel/syscall.S12
-rw-r--r--arch/powerpc/include/asm/io.h20
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/asm/ptrace.h1
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c15
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1
-rw-r--r--arch/powerpc/kvm/trace.h8
-rw-r--r--arch/powerpc/kvm/trace_booke.h9
-rw-r--r--arch/powerpc/kvm/trace_hv.h9
-rw-r--r--arch/powerpc/kvm/trace_pr.h9
-rw-r--r--arch/powerpc/mm/numa.c2
-rw-r--r--arch/powerpc/mm/slb.c35
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c57
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c64
-rw-r--r--arch/riscv/Makefile19
-rw-r--r--arch/riscv/boot/.gitignore2
-rw-r--r--arch/riscv/boot/Makefile33
-rw-r--r--arch/riscv/boot/install.sh60
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/include/asm/module.h1
-rw-r--r--arch/riscv/include/asm/ptrace.h4
-rw-r--r--arch/riscv/include/asm/uaccess.h4
-rw-r--r--arch/riscv/include/asm/unistd.h5
-rw-r--r--arch/riscv/include/uapi/asm/unistd.h (renamed from arch/riscv/include/uapi/asm/syscalls.h)26
-rw-r--r--arch/riscv/kernel/cpu.c9
-rw-r--r--arch/riscv/kernel/ftrace.c14
-rw-r--r--arch/riscv/kernel/head.S10
-rw-r--r--arch/riscv/kernel/module.c12
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S2
-rw-r--r--arch/riscv/lib/Makefile2
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/boot/compressed/Makefile16
-rw-r--r--arch/s390/configs/debug_defconfig14
-rw-r--r--arch/s390/configs/performance_defconfig13
-rw-r--r--arch/s390/defconfig79
-rw-r--r--arch/s390/include/asm/mmu_context.h5
-rw-r--r--arch/s390/include/asm/pgalloc.h6
-rw-r--r--arch/s390/include/asm/pgtable.h18
-rw-r--r--arch/s390/include/asm/processor.h4
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/include/asm/tlb.h6
-rw-r--r--arch/s390/kernel/entry.S6
-rw-r--r--arch/s390/kernel/ftrace.c13
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c4
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c33
-rw-r--r--arch/s390/kernel/vdso32/Makefile6
-rw-r--r--arch/s390/kernel/vdso64/Makefile6
-rw-r--r--arch/s390/kernel/vmlinux.lds.S4
-rw-r--r--arch/s390/mm/pgalloc.c2
-rw-r--r--arch/s390/numa/numa.c1
-rw-r--r--arch/sh/kernel/ftrace.c16
-rw-r--r--arch/sparc/kernel/ftrace.c11
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c97
-rw-r--r--arch/um/drivers/ubd_kern.c12
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/Makefile9
-rw-r--r--arch/x86/boot/header.S6
-rw-r--r--arch/x86/events/core.c20
-rw-r--r--arch/x86/events/intel/core.c68
-rw-r--r--arch/x86/events/intel/uncore.h33
-rw-r--r--arch/x86/events/intel/uncore_snb.c121
-rw-r--r--arch/x86/events/perf_event.h13
-rw-r--r--arch/x86/include/asm/fpu/internal.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/nospec-branch.h26
-rw-r--r--arch/x86/include/asm/page_64_types.h12
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/qspinlock.h13
-rw-r--r--arch/x86/include/asm/spec-ctrl.h20
-rw-r--r--arch/x86/include/asm/switch_to.h3
-rw-r--r--arch/x86/include/asm/thread_info.h20
-rw-r--r--arch/x86/include/asm/tlbflush.h8
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xen/page.h35
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h7
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c525
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c19
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c11
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/fpu/signal.c4
-rw-r--r--arch/x86/kernel/ftrace.c15
-rw-r--r--arch/x86/kernel/head32.c1
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/ldt.c59
-rw-r--r--arch/x86/kernel/process.c101
-rw-r--r--arch/x86/kernel/process.h39
-rw-r--r--arch/x86/kernel/process_32.c10
-rw-r--r--arch/x86/kernel/process_64.c10
-rw-r--r--arch/x86/kernel/setup.c17
-rw-r--r--arch/x86/kernel/vsmp_64.c84
-rw-r--r--arch/x86/kvm/lapic.c7
-rw-r--r--arch/x86/kvm/mmu.c27
-rw-r--r--arch/x86/kvm/svm.c44
-rw-r--r--arch/x86/kvm/vmx.c98
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/mm/tlb.c115
-rw-r--r--arch/x86/xen/enlighten.c78
-rw-r--r--arch/x86/xen/mmu_pv.c6
-rw-r--r--arch/x86/xen/multicalls.c35
-rw-r--r--arch/x86/xen/p2m.c3
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/spinlock.c21
-rw-r--r--arch/xtensa/include/asm/processor.h6
-rw-r--r--arch/xtensa/kernel/asm-offsets.c16
-rw-r--r--arch/xtensa/kernel/head.S7
-rw-r--r--arch/xtensa/kernel/process.c5
-rw-r--r--arch/xtensa/kernel/ptrace.c42
161 files changed, 2034 insertions, 1248 deletions
diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h
index 6a8c53dec57e..b7c77bb1bfd2 100644
--- a/arch/alpha/include/asm/termios.h
+++ b/arch/alpha/include/asm/termios.h
@@ -73,9 +73,15 @@
})
#define user_termios_to_kernel_termios(k, u) \
- copy_from_user(k, u, sizeof(struct termios))
+ copy_from_user(k, u, sizeof(struct termios2))
#define kernel_termios_to_user_termios(u, k) \
+ copy_to_user(u, k, sizeof(struct termios2))
+
+#define user_termios_to_kernel_termios_1(k, u) \
+ copy_from_user(k, u, sizeof(struct termios))
+
+#define kernel_termios_to_user_termios_1(u, k) \
copy_to_user(u, k, sizeof(struct termios))
#endif /* _ALPHA_TERMIOS_H */
diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
index 1e9121c9b3c7..971311605288 100644
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -32,6 +32,11 @@
#define TCXONC _IO('t', 30)
#define TCFLSH _IO('t', 31)
+#define TCGETS2 _IOR('T', 42, struct termios2)
+#define TCSETS2 _IOW('T', 43, struct termios2)
+#define TCSETSW2 _IOW('T', 44, struct termios2)
+#define TCSETSF2 _IOW('T', 45, struct termios2)
+
#define TIOCSWINSZ _IOW('t', 103, struct winsize)
#define TIOCGWINSZ _IOR('t', 104, struct winsize)
#define TIOCSTART _IO('t', 110) /* start output, like ^Q */
diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h
index de6c8360fbe3..4575ba34a0ea 100644
--- a/arch/alpha/include/uapi/asm/termbits.h
+++ b/arch/alpha/include/uapi/asm/termbits.h
@@ -26,6 +26,19 @@ struct termios {
speed_t c_ospeed; /* output speed */
};
+/* Alpha has identical termios and termios2 */
+
+struct termios2 {
+ tcflag_t c_iflag; /* input mode flags */
+ tcflag_t c_oflag; /* output mode flags */
+ tcflag_t c_cflag; /* control mode flags */
+ tcflag_t c_lflag; /* local mode flags */
+ cc_t c_cc[NCCS]; /* control characters */
+ cc_t c_line; /* line discipline (== c_cc[19]) */
+ speed_t c_ispeed; /* input speed */
+ speed_t c_ospeed; /* output speed */
+};
+
/* Alpha has matching termios and ktermios */
struct ktermios {
@@ -152,6 +165,7 @@ struct ktermios {
#define B3000000 00034
#define B3500000 00035
#define B4000000 00036
+#define BOTHER 00037
#define CSIZE 00001400
#define CS5 00000000
@@ -169,6 +183,9 @@ struct ktermios {
#define CMSPAR 010000000000 /* mark or space (stick) parity */
#define CRTSCTS 020000000000 /* flow control */
+#define CIBAUD 07600000
+#define IBSHIFT 16
+
/* c_lflag bits */
#define ISIG 0x00000080
#define ICANON 0x00000100
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 0d289240b6ca..775cac3c02bb 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -111,6 +111,7 @@
#include <linux/kernel.h>
extern unsigned int processor_id;
+struct proc_info_list *lookup_processor(u32 midr);
#ifdef CONFIG_CPU_CP15
#define read_cpuid(reg) \
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 92fd2c8a9af0..12659ce5c1f3 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -10,7 +10,7 @@
#ifndef _ASM_PGTABLE_2LEVEL_H
#define _ASM_PGTABLE_2LEVEL_H
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
/*
* Hardware-wise, we have a two level page table structure, where the first
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index e25f4392e1b2..e1b6f280ab08 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -23,7 +23,7 @@ struct mm_struct;
/*
* Don't change this structure - ASM code relies on it.
*/
-extern struct processor {
+struct processor {
/* MISC
* get data abort address/flags
*/
@@ -79,9 +79,13 @@ extern struct processor {
unsigned int suspend_size;
void (*do_suspend)(void *);
void (*do_resume)(void *);
-} processor;
+};
#ifndef MULTI_CPU
+static inline void init_proc_vtable(const struct processor *p)
+{
+}
+
extern void cpu_proc_init(void);
extern void cpu_proc_fin(void);
extern int cpu_do_idle(void);
@@ -98,17 +102,50 @@ extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn));
extern void cpu_do_suspend(void *);
extern void cpu_do_resume(void *);
#else
-#define cpu_proc_init processor._proc_init
-#define cpu_proc_fin processor._proc_fin
-#define cpu_reset processor.reset
-#define cpu_do_idle processor._do_idle
-#define cpu_dcache_clean_area processor.dcache_clean_area
-#define cpu_set_pte_ext processor.set_pte_ext
-#define cpu_do_switch_mm processor.switch_mm
-/* These three are private to arch/arm/kernel/suspend.c */
-#define cpu_do_suspend processor.do_suspend
-#define cpu_do_resume processor.do_resume
+extern struct processor processor;
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+#include <linux/smp.h>
+/*
+ * This can't be a per-cpu variable because we need to access it before
+ * per-cpu has been initialised. We have a couple of functions that are
+ * called in a pre-emptible context, and so can't use smp_processor_id()
+ * there, hence PROC_TABLE(). We insist in init_proc_vtable() that the
+ * function pointers for these are identical across all CPUs.
+ */
+extern struct processor *cpu_vtable[];
+#define PROC_VTABLE(f) cpu_vtable[smp_processor_id()]->f
+#define PROC_TABLE(f) cpu_vtable[0]->f
+static inline void init_proc_vtable(const struct processor *p)
+{
+ unsigned int cpu = smp_processor_id();
+ *cpu_vtable[cpu] = *p;
+ WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area !=
+ cpu_vtable[0]->dcache_clean_area);
+ WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext !=
+ cpu_vtable[0]->set_pte_ext);
+}
+#else
+#define PROC_VTABLE(f) processor.f
+#define PROC_TABLE(f) processor.f
+static inline void init_proc_vtable(const struct processor *p)
+{
+ processor = *p;
+}
+#endif
+
+#define cpu_proc_init PROC_VTABLE(_proc_init)
+#define cpu_check_bugs PROC_VTABLE(check_bugs)
+#define cpu_proc_fin PROC_VTABLE(_proc_fin)
+#define cpu_reset PROC_VTABLE(reset)
+#define cpu_do_idle PROC_VTABLE(_do_idle)
+#define cpu_dcache_clean_area PROC_TABLE(dcache_clean_area)
+#define cpu_set_pte_ext PROC_TABLE(set_pte_ext)
+#define cpu_do_switch_mm PROC_VTABLE(switch_mm)
+
+/* These two are private to arch/arm/kernel/suspend.c */
+#define cpu_do_suspend PROC_VTABLE(do_suspend)
+#define cpu_do_resume PROC_VTABLE(do_resume)
#endif
extern void cpu_resume(void);
diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c
index 7be511310191..d41d3598e5e5 100644
--- a/arch/arm/kernel/bugs.c
+++ b/arch/arm/kernel/bugs.c
@@ -6,8 +6,8 @@
void check_other_bugs(void)
{
#ifdef MULTI_CPU
- if (processor.check_bugs)
- processor.check_bugs();
+ if (cpu_check_bugs)
+ cpu_check_bugs();
#endif
}
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 0142fcfcc3d3..bda949fd84e8 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -183,9 +183,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long) &return_to_handler;
- struct ftrace_graph_ent trace;
unsigned long old;
- int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
@@ -193,21 +191,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
old = *parent;
*parent = return_hooker;
- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
+ if (function_graph_enter(old, self_addr, frame_pointer, NULL))
*parent = old;
- return;
- }
-
- err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, NULL);
- if (err == -EBUSY) {
- *parent = old;
- return;
- }
}
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 6e0375e7db05..997b02302c31 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -145,6 +145,9 @@ __mmap_switched_data:
#endif
.size __mmap_switched_data, . - __mmap_switched_data
+ __FINIT
+ .text
+
/*
* This provides a C-API version of __lookup_processor_type
*/
@@ -156,9 +159,6 @@ ENTRY(lookup_processor_type)
ldmfd sp!, {r4 - r6, r9, pc}
ENDPROC(lookup_processor_type)
- __FINIT
- .text
-
/*
* Read processor ID register (CP#15, CR0), and look up in the linker-built
* supported processor list. Note that we can't use the absolute addresses
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index ac7e08886863..375b13f7e780 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -114,6 +114,11 @@ EXPORT_SYMBOL(elf_hwcap2);
#ifdef MULTI_CPU
struct processor processor __ro_after_init;
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+struct processor *cpu_vtable[NR_CPUS] = {
+ [0] = &processor,
+};
+#endif
#endif
#ifdef MULTI_TLB
struct cpu_tlb_fns cpu_tlb __ro_after_init;
@@ -666,28 +671,33 @@ static void __init smp_build_mpidr_hash(void)
}
#endif
-static void __init setup_processor(void)
+/*
+ * locate processor in the list of supported processor types. The linker
+ * builds this table for us from the entries in arch/arm/mm/proc-*.S
+ */
+struct proc_info_list *lookup_processor(u32 midr)
{
- struct proc_info_list *list;
+ struct proc_info_list *list = lookup_processor_type(midr);
- /*
- * locate processor in the list of supported processor
- * types. The linker builds this table for us from the
- * entries in arch/arm/mm/proc-*.S
- */
- list = lookup_processor_type(read_cpuid_id());
if (!list) {
- pr_err("CPU configuration botched (ID %08x), unable to continue.\n",
- read_cpuid_id());
- while (1);
+ pr_err("CPU%u: configuration botched (ID %08x), CPU halted\n",
+ smp_processor_id(), midr);
+ while (1)
+ /* can't use cpu_relax() here as it may require MMU setup */;
}
+ return list;
+}
+
+static void __init setup_processor(void)
+{
+ unsigned int midr = read_cpuid_id();
+ struct proc_info_list *list = lookup_processor(midr);
+
cpu_name = list->cpu_name;
__cpu_architecture = __get_cpu_architecture();
-#ifdef MULTI_CPU
- processor = *list->proc;
-#endif
+ init_proc_vtable(list->proc);
#ifdef MULTI_TLB
cpu_tlb = *list->tlb;
#endif
@@ -699,7 +709,7 @@ static void __init setup_processor(void)
#endif
pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n",
- cpu_name, read_cpuid_id(), read_cpuid_id() & 15,
+ list->cpu_name, midr, midr & 15,
proc_arch[cpu_architecture()], get_cr());
snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c",
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 0978282d5fc2..12a6172263c0 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -42,6 +42,7 @@
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/procinfo.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd)
#endif
}
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+static int secondary_biglittle_prepare(unsigned int cpu)
+{
+ if (!cpu_vtable[cpu])
+ cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL);
+
+ return cpu_vtable[cpu] ? 0 : -ENOMEM;
+}
+
+static void secondary_biglittle_init(void)
+{
+ init_proc_vtable(lookup_processor(read_cpuid_id())->proc);
+}
+#else
+static int secondary_biglittle_prepare(unsigned int cpu)
+{
+ return 0;
+}
+
+static void secondary_biglittle_init(void)
+{
+}
+#endif
+
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
@@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
if (!smp_ops.smp_boot_secondary)
return -ENOSYS;
+ ret = secondary_biglittle_prepare(cpu);
+ if (ret)
+ return ret;
+
/*
* We need to tell the secondary core where to find
* its stack and the page tables.
@@ -359,6 +388,8 @@ asmlinkage void secondary_start_kernel(void)
struct mm_struct *mm = &init_mm;
unsigned int cpu;
+ secondary_biglittle_init();
+
/*
* The identity mapping is uncached (strongly ordered), so
* switch away from it before attempting any exclusive accesses.
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 9500b6e27380..f86b72d1d59e 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -209,11 +209,61 @@ static int __init omapdss_init_fbdev(void)
return 0;
}
-#else
-static inline int omapdss_init_fbdev(void)
+
+static const char * const omapdss_compat_names[] __initconst = {
+ "ti,omap2-dss",
+ "ti,omap3-dss",
+ "ti,omap4-dss",
+ "ti,omap5-dss",
+ "ti,dra7-dss",
+};
+
+static struct device_node * __init omapdss_find_dss_of_node(void)
{
- return 0;
+ struct device_node *node;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) {
+ node = of_find_compatible_node(NULL, NULL,
+ omapdss_compat_names[i]);
+ if (node)
+ return node;
+ }
+
+ return NULL;
}
+
+static int __init omapdss_init_of(void)
+{
+ int r;
+ struct device_node *node;
+ struct platform_device *pdev;
+
+ /* only create dss helper devices if dss is enabled in the .dts */
+
+ node = omapdss_find_dss_of_node();
+ if (!node)
+ return 0;
+
+ if (!of_device_is_available(node))
+ return 0;
+
+ pdev = of_find_device_by_node(node);
+
+ if (!pdev) {
+ pr_err("Unable to find DSS platform device\n");
+ return -ENODEV;
+ }
+
+ r = of_platform_populate(node, NULL, NULL, &pdev->dev);
+ if (r) {
+ pr_err("Unable to populate DSS submodule devices\n");
+ return r;
+ }
+
+ return omapdss_init_fbdev();
+}
+omap_device_initcall(omapdss_init_of);
#endif /* CONFIG_FB_OMAP2 */
static void dispc_disable_outputs(void)
@@ -361,58 +411,3 @@ int omap_dss_reset(struct omap_hwmod *oh)
return r;
}
-
-static const char * const omapdss_compat_names[] __initconst = {
- "ti,omap2-dss",
- "ti,omap3-dss",
- "ti,omap4-dss",
- "ti,omap5-dss",
- "ti,dra7-dss",
-};
-
-static struct device_node * __init omapdss_find_dss_of_node(void)
-{
- struct device_node *node;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) {
- node = of_find_compatible_node(NULL, NULL,
- omapdss_compat_names[i]);
- if (node)
- return node;
- }
-
- return NULL;
-}
-
-static int __init omapdss_init_of(void)
-{
- int r;
- struct device_node *node;
- struct platform_device *pdev;
-
- /* only create dss helper devices if dss is enabled in the .dts */
-
- node = omapdss_find_dss_of_node();
- if (!node)
- return 0;
-
- if (!of_device_is_available(node))
- return 0;
-
- pdev = of_find_device_by_node(node);
-
- if (!pdev) {
- pr_err("Unable to find DSS platform device\n");
- return -ENODEV;
- }
-
- r = of_platform_populate(node, NULL, NULL, &pdev->dev);
- if (r) {
- pr_err("Unable to populate DSS submodule devices\n");
- return r;
- }
-
- return omapdss_init_fbdev();
-}
-omap_device_initcall(omapdss_init_of);
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 5544b82a2e7a..9a07916af8dd 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void)
case ARM_CPU_PART_CORTEX_A17:
case ARM_CPU_PART_CORTEX_A73:
case ARM_CPU_PART_CORTEX_A75:
- if (processor.switch_mm != cpu_v7_bpiall_switch_mm)
- goto bl_error;
per_cpu(harden_branch_predictor_fn, cpu) =
harden_branch_predictor_bpiall;
spectre_v2_method = "BPIALL";
@@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void)
case ARM_CPU_PART_CORTEX_A15:
case ARM_CPU_PART_BRAHMA_B15:
- if (processor.switch_mm != cpu_v7_iciallu_switch_mm)
- goto bl_error;
per_cpu(harden_branch_predictor_fn, cpu) =
harden_branch_predictor_iciallu;
spectre_v2_method = "ICIALLU";
@@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void)
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
if ((int)res.a0 != 0)
break;
- if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu)
- goto bl_error;
per_cpu(harden_branch_predictor_fn, cpu) =
call_hvc_arch_workaround_1;
- processor.switch_mm = cpu_v7_hvc_switch_mm;
+ cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
spectre_v2_method = "hypervisor";
break;
@@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void)
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
if ((int)res.a0 != 0)
break;
- if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu)
- goto bl_error;
per_cpu(harden_branch_predictor_fn, cpu) =
call_smc_arch_workaround_1;
- processor.switch_mm = cpu_v7_smc_switch_mm;
+ cpu_do_switch_mm = cpu_v7_smc_switch_mm;
spectre_v2_method = "firmware";
break;
@@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void)
if (spectre_v2_method)
pr_info("CPU%u: Spectre v2: using %s workaround\n",
smp_processor_id(), spectre_v2_method);
- return;
-
-bl_error:
- pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n",
- cpu);
}
#else
static void cpu_v7_spectre_init(void)
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 6fe52819e014..339eb17c9808 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -112,7 +112,7 @@ ENTRY(cpu_v7_hvc_switch_mm)
hvc #0
ldmfd sp!, {r0 - r3}
b cpu_v7_switch_mm
-ENDPROC(cpu_v7_smc_switch_mm)
+ENDPROC(cpu_v7_hvc_switch_mm)
#endif
ENTRY(cpu_v7_iciallu_switch_mm)
mov r3, #0
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index aff6e6eadc70..ee7b07938dd5 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -573,7 +573,7 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp,
*/
ufp_exc->fpexc = hwstate->fpexc;
ufp_exc->fpinst = hwstate->fpinst;
- ufp_exc->fpinst2 = ufp_exc->fpinst2;
+ ufp_exc->fpinst2 = hwstate->fpinst2;
/* Ensure that VFP is disabled. */
vfp_flush_hwstate(thread);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 787d7850e064..ea2ab0330e3a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -497,6 +497,24 @@ config ARM64_ERRATUM_1188873
If unsure, say Y.
+config ARM64_ERRATUM_1286807
+ bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
+ default y
+ select ARM64_WORKAROUND_REPEAT_TLBI
+ help
+ This option adds workaround for ARM Cortex-A76 erratum 1286807
+
+ On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual
+ address for a cacheable mapping of a location is being
+ accessed by a core while another core is remapping the virtual
+ address to a new physical page using the recommended
+ break-before-make sequence, then under very rare circumstances
+ TLBI+DSB completes before a read using the translation being
+ invalidated has been observed by other observers. The
+ workaround repeats the TLBI+DSB operation.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
@@ -566,9 +584,16 @@ config QCOM_FALKOR_ERRATUM_1003
is unchanged. Work around the erratum by invalidating the walk cache
entries for the trampoline before entering the kernel proper.
+config ARM64_WORKAROUND_REPEAT_TLBI
+ bool
+ help
+ Enable the repeat TLBI workaround for Falkor erratum 1009 and
+ Cortex-A76 erratum 1286807.
+
config QCOM_FALKOR_ERRATUM_1009
bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
default y
+ select ARM64_WORKAROUND_REPEAT_TLBI
help
On Falkor v1, the CPU may prematurely complete a DSB following a
TLBI xxIS invalidate maintenance operation. Repeat the TLBI operation
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index caa955f10e19..fac54fb050d0 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -56,6 +56,19 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
{
return is_compat_task();
}
+
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+
+static inline bool arch_syscall_match_sym_name(const char *sym,
+ const char *name)
+{
+ /*
+ * Since all syscall functions have __arm64_ prefix, we must skip it.
+ * However, as we described above, we decided to ignore compat
+ * syscalls, so we don't care about __arm64_compat_ prefix here.
+ */
+ return !strcmp(sym + 8, name);
+}
#endif /* ifndef __ASSEMBLY__ */
#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 3e2091708b8e..6b0d4dff5012 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -24,6 +24,14 @@
#define KERNEL_DS UL(-1)
#define USER_DS (TASK_SIZE_64 - 1)
+/*
+ * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
+ * no point in shifting all network buffers by 2 bytes just to make some IP
+ * header fields appear aligned in memory, potentially sacrificing some DMA
+ * performance on some platforms.
+ */
+#define NET_IP_ALIGN 0
+
#ifndef __ASSEMBLY__
#ifdef __KERNEL__
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 0c909c4a932f..842fb9572661 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -468,7 +468,7 @@
SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \
SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0)
-#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff
+#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffffUL
#error "Inconsistent SCTLR_EL2 set/clear bits"
#endif
@@ -509,7 +509,7 @@
SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\
SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0)
-#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff
+#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffffUL
#error "Inconsistent SCTLR_EL1 set/clear bits"
#endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index c3c0387aee18..5dfd23897dea 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -41,14 +41,14 @@
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op, \
ARM64_WORKAROUND_REPEAT_TLBI, \
- CONFIG_QCOM_FALKOR_ERRATUM_1009) \
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : )
#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op ", %0", \
ARM64_WORKAROUND_REPEAT_TLBI, \
- CONFIG_QCOM_FALKOR_ERRATUM_1009) \
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : "r" (arg))
#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a509e35132d2..6ad715d67df8 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -570,6 +570,20 @@ static const struct midr_range arm64_harden_el2_vectors[] = {
#endif
+#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
+
+static const struct midr_range arm64_repeat_tlbi_cpus[] = {
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
+ MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1286807
+ MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+#endif
+ {},
+};
+
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#if defined(CONFIG_ARM64_ERRATUM_826319) || \
defined(CONFIG_ARM64_ERRATUM_827319) || \
@@ -695,11 +709,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.matches = is_kryo_midr,
},
#endif
-#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
+#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
{
- .desc = "Qualcomm Technologies Falkor erratum 1009",
+ .desc = "Qualcomm erratum 1009, ARM erratum 1286807",
.capability = ARM64_WORKAROUND_REPEAT_TLBI,
- ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0),
+ ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus),
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_858921
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index af50064dea51..aec5ecb85737 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1333,7 +1333,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_hw_dbm,
},
#endif
-#ifdef CONFIG_ARM64_SSBD
{
.desc = "CRC32 instructions",
.capability = ARM64_HAS_CRC32,
@@ -1343,6 +1342,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64ISAR0_CRC32_SHIFT,
.min_field_value = 1,
},
+#ifdef CONFIG_ARM64_SSBD
{
.desc = "Speculative Store Bypassing Safe (SSBS)",
.capability = ARM64_SSBS,
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 50986e388d2b..57e962290df3 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -216,8 +216,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
unsigned long old;
- struct ftrace_graph_ent trace;
- int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
@@ -229,18 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*/
old = *parent;
- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- return;
-
- err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, NULL);
- if (err == -EBUSY)
- return;
- else
+ if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
*parent = return_hooker;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 953e316521fc..f4fc1e0544b7 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -313,6 +313,7 @@ void __init setup_arch(char **cmdline_p)
arm64_memblock_init();
paging_init();
+ efi_apply_persistent_mem_reservations();
acpi_table_upgrade();
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9d9582cac6c4..9b432d9fcada 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -483,8 +483,6 @@ void __init arm64_memblock_init(void)
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
dma_contiguous_reserve(arm64_dma_phys_limit);
-
- memblock_allow_resize();
}
void __init bootmem_init(void)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 394b8d554def..d1d6601b385d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -659,6 +659,8 @@ void __init paging_init(void)
memblock_free(__pa_symbol(init_pg_dir),
__pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
+
+ memblock_allow_resize();
}
/*
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a6fdaea07c63..89198017e8e6 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -351,7 +351,8 @@ static void build_epilogue(struct jit_ctx *ctx)
* >0 - successfully JITed a 16-byte eBPF instruction.
* <0 - failed to JIT.
*/
-static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+ bool extra_pass)
{
const u8 code = insn->code;
const u8 dst = bpf2a64[insn->dst_reg];
@@ -625,12 +626,19 @@ emit_cond_jmp:
case BPF_JMP | BPF_CALL:
{
const u8 r0 = bpf2a64[BPF_REG_0];
- const u64 func = (u64)__bpf_call_base + imm;
+ bool func_addr_fixed;
+ u64 func_addr;
+ int ret;
- if (ctx->prog->is_func)
- emit_addr_mov_i64(tmp, func, ctx);
+ ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
+ &func_addr, &func_addr_fixed);
+ if (ret < 0)
+ return ret;
+ if (func_addr_fixed)
+ /* We can use optimized emission here. */
+ emit_a64_mov_i64(tmp, func_addr, ctx);
else
- emit_a64_mov_i64(tmp, func, ctx);
+ emit_addr_mov_i64(tmp, func_addr, ctx);
emit(A64_BLR(tmp), ctx);
emit(A64_MOV(1, r0, A64_R(0)), ctx);
break;
@@ -753,7 +761,7 @@ emit_cond_jmp:
return 0;
}
-static int build_body(struct jit_ctx *ctx)
+static int build_body(struct jit_ctx *ctx, bool extra_pass)
{
const struct bpf_prog *prog = ctx->prog;
int i;
@@ -762,7 +770,7 @@ static int build_body(struct jit_ctx *ctx)
const struct bpf_insn *insn = &prog->insnsi[i];
int ret;
- ret = build_insn(insn, ctx);
+ ret = build_insn(insn, ctx, extra_pass);
if (ret > 0) {
i++;
if (ctx->image == NULL)
@@ -858,7 +866,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 1. Initial fake pass to compute ctx->idx. */
/* Fake pass to fill in ctx->offset. */
- if (build_body(&ctx)) {
+ if (build_body(&ctx, extra_pass)) {
prog = orig_prog;
goto out_off;
}
@@ -888,7 +896,7 @@ skip_init_ctx:
build_prologue(&ctx, was_classic);
- if (build_body(&ctx)) {
+ if (build_body(&ctx, extra_pass)) {
bpf_jit_binary_free(header);
prog = orig_prog;
goto out_off;
diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h
index ebef7f40aabb..c5c253cb9bd6 100644
--- a/arch/ia64/include/asm/numa.h
+++ b/arch/ia64/include/asm/numa.h
@@ -59,7 +59,9 @@ extern struct node_cpuid_s node_cpuid[NR_CPUS];
*/
extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
-#define node_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)])
+#define slit_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)])
+extern int __node_distance(int from, int to);
+#define node_distance(from,to) __node_distance(from, to)
extern int paddr_to_nid(unsigned long paddr);
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 1dacbf5e9e09..41eb281709da 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -578,8 +578,8 @@ void __init acpi_numa_fixup(void)
if (!slit_table) {
for (i = 0; i < MAX_NUMNODES; i++)
for (j = 0; j < MAX_NUMNODES; j++)
- node_distance(i, j) = i == j ? LOCAL_DISTANCE :
- REMOTE_DISTANCE;
+ slit_distance(i, j) = i == j ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
return;
}
@@ -592,7 +592,7 @@ void __init acpi_numa_fixup(void)
if (!pxm_bit_test(j))
continue;
node_to = pxm_to_node(j);
- node_distance(node_from, node_to) =
+ slit_distance(node_from, node_to) =
slit_table->entry[i * slit_table->locality_count + j];
}
}
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 3861d6e32d5f..a03803506b0c 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -36,6 +36,12 @@ struct node_cpuid_s node_cpuid[NR_CPUS] =
*/
u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
+int __node_distance(int from, int to)
+{
+ return slit_distance(from, to);
+}
+EXPORT_SYMBOL(__node_distance);
+
/* Identify which cnode a physical address resides on */
int
paddr_to_nid(unsigned long paddr)
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index 6181e4134483..fe3ddd73a0cc 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -55,12 +55,12 @@
*/
#ifdef CONFIG_SUN3
#define PTRS_PER_PTE 16
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
#define PTRS_PER_PMD 1
#define PTRS_PER_PGD 2048
#elif defined(CONFIG_COLDFIRE)
#define PTRS_PER_PTE 512
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
#define PTRS_PER_PMD 1
#define PTRS_PER_PGD 1024
#else
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index f64ebb9c9a41..e14b6621c933 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -63,7 +63,7 @@ extern int mem_init_done;
#include <asm-generic/4level-fixup.h>
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
index d57563c58a26..224eea40e1ee 100644
--- a/arch/microblaze/kernel/ftrace.c
+++ b/arch/microblaze/kernel/ftrace.c
@@ -22,8 +22,7 @@
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
- int faulted, err;
- struct ftrace_graph_ent trace;
+ int faulted;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
@@ -63,18 +62,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
- err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
- if (err == -EBUSY) {
+ if (function_graph_enter(old, self_addr, 0, NULL))
*parent = old;
- return;
- }
-
- trace.func = self_addr;
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
- *parent = old;
- }
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c
index 75108ec669eb..6c79e8a16a26 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c
@@ -67,7 +67,7 @@ void (*cvmx_override_pko_queue_priority) (int pko_port,
void (*cvmx_override_ipd_port_setup) (int ipd_port);
/* Port count per interface */
-static int interface_port_count[5];
+static int interface_port_count[9];
/**
* Return the number of interfaces the chip has. Each interface
diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index 490b12af103c..c52d0efacd14 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -140,6 +140,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_STAGING=y
CONFIG_OCTEON_ETHERNET=y
+CONFIG_OCTEON_USB=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_RAS=y
CONFIG_EXT4_FS=y
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 0170602a1e4e..6cf8ffb5367e 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -73,7 +73,7 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg,
#ifdef CONFIG_64BIT
case 4: case 5: case 6: case 7:
#ifdef CONFIG_MIPS32_O32
- if (test_thread_flag(TIF_32BIT_REGS))
+ if (test_tsk_thread_flag(task, TIF_32BIT_REGS))
return get_user(*arg, (int *)usp + n);
else
#endif
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 7f3dfdbc3657..b122cbb4aad1 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -322,7 +322,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
unsigned long fp)
{
unsigned long old_parent_ra;
- struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
int faulted, insns;
@@ -369,12 +368,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
if (unlikely(faulted))
goto out;
- if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
- NULL) == -EBUSY) {
- *parent_ra_addr = old_parent_ra;
- return;
- }
-
/*
* Get the recorded ip of the current mcount calling site in the
* __mcount_loc section, which will be used to filter the function
@@ -382,13 +375,10 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
*/
insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
- trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
+ self_ra -= (MCOUNT_INSN_SIZE * insns);
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
+ if (function_graph_enter(old_parent_ra, self_ra, fp, NULL))
*parent_ra_addr = old_parent_ra;
- }
return;
out:
ftrace_graph_stop();
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index ea09ed6a80a9..8c6c48ed786a 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -794,6 +794,7 @@ static void __init arch_mem_init(char **cmdline_p)
/* call board setup routine */
plat_mem_setup();
+ memblock_set_bottom_up(true);
/*
* Make sure all kernel memory is in the maps. The "UP" and
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 0f852e1b5891..15e103c6d799 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2260,10 +2260,8 @@ void __init trap_init(void)
unsigned long size = 0x200 + VECTORSPACING*64;
phys_addr_t ebase_pa;
- memblock_set_bottom_up(true);
ebase = (unsigned long)
memblock_alloc_from(size, 1 << fls(size), 0);
- memblock_set_bottom_up(false);
/*
* Try to ensure ebase resides in KSeg0 if possible.
@@ -2307,6 +2305,7 @@ void __init trap_init(void)
if (board_ebase_setup)
board_ebase_setup();
per_cpu_trap_init(true);
+ memblock_set_bottom_up(false);
/*
* Copy the generic exception handlers to their final destination.
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index 622761878cd1..60bf0a1cb757 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -231,6 +231,8 @@ static __init void prom_meminit(void)
cpumask_clear(&__node_data[(node)]->cpumask);
}
}
+ max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
+
for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) {
node = cpu / loongson_sysconf.cores_per_node;
if (node >= num_online_nodes())
@@ -248,19 +250,9 @@ static __init void prom_meminit(void)
void __init paging_init(void)
{
- unsigned node;
unsigned long zones_size[MAX_NR_ZONES] = {0, };
pagetable_init();
-
- for_each_online_node(node) {
- unsigned long start_pfn, end_pfn;
-
- get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
-
- if (end_pfn > max_low_pfn)
- max_low_pfn = end_pfn;
- }
#ifdef CONFIG_ZONE_DMA32
zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
#endif
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index e6c9485cadcf..cb38461391cb 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -50,7 +50,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
void *ret;
ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
- if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+ if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
dma_cache_wback_inv((unsigned long) ret, size);
ret = (void *)UNCAC_ADDR(ret);
}
diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c
index 41b71c4352c2..c1ce6f43642b 100644
--- a/arch/mips/ralink/mt7620.c
+++ b/arch/mips/ralink/mt7620.c
@@ -84,7 +84,7 @@ static struct rt2880_pmx_func pcie_rst_grp[] = {
};
static struct rt2880_pmx_func nd_sd_grp[] = {
FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15),
- FUNC("sd", MT7620_GPIO_MODE_SD, 45, 15)
+ FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13)
};
static struct rt2880_pmx_group mt7620a_pinmux_data[] = {
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index d8b8444d6795..813d13f92957 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -435,6 +435,7 @@ void __init prom_meminit(void)
mlreset();
szmem();
+ max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
for (node = 0; node < MAX_COMPACT_NODES; node++) {
if (node_online(node)) {
@@ -455,18 +456,8 @@ extern void setup_zero_pages(void);
void __init paging_init(void)
{
unsigned long zones_size[MAX_NR_ZONES] = {0, };
- unsigned node;
pagetable_init();
-
- for_each_online_node(node) {
- unsigned long start_pfn, end_pfn;
-
- get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
-
- if (end_pfn > max_low_pfn)
- max_low_pfn = end_pfn;
- }
zones_size[ZONE_NORMAL] = max_low_pfn;
free_area_init_nodes(zones_size);
}
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index d3e19a55cf53..9f52db930c00 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -4,7 +4,7 @@
#ifndef _ASMNDS32_PGTABLE_H
#define _ASMNDS32_PGTABLE_H
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
#include <asm-generic/4level-fixup.h>
#include <asm-generic/sizes.h>
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index a0a9679ad5de..8a41372551ff 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -211,29 +211,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
- struct ftrace_graph_ent trace;
unsigned long old;
- int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
old = *parent;
- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- return;
-
- err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, NULL);
-
- if (err == -EBUSY)
- return;
-
- *parent = return_hooker;
+ if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+ *parent = return_hooker;
}
noinline void ftrace_graph_caller(void)
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index b941ac7d4e70..c7bb74e22436 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -111,7 +111,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#if CONFIG_PGTABLE_LEVELS == 3
#define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
#else
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
#define BITS_PER_PMD 0
#endif
#define PTRS_PER_PMD (1UL << BITS_PER_PMD)
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 16aec9ba2580..8a63515f03bf 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -37,8 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x)
volatile unsigned int *a;
a = __ldcw_align(x);
- /* Release with ordered store. */
- __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory");
+ mb();
+ *a = 1;
}
static inline int arch_spin_trylock(arch_spinlock_t *x)
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 6fa8535d3cce..e46a4157a894 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -30,7 +30,6 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
unsigned long self_addr)
{
unsigned long old;
- struct ftrace_graph_ent trace;
extern int parisc_return_to_handler;
if (unlikely(ftrace_graph_is_dead()))
@@ -41,19 +40,9 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
old = *parent;
- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- return;
-
- if (ftrace_push_return_trace(old, self_addr, &trace.depth,
- 0, NULL) == -EBUSY)
- return;
-
- /* activate parisc_return_to_handler() as return point */
- *parent = (unsigned long) &parisc_return_to_handler;
+ if (!function_graph_enter(old, self_addr, 0, NULL))
+ /* activate parisc_return_to_handler() as return point */
+ *parent = (unsigned long) &parisc_return_to_handler;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 9505c317818d..a9bc90dc4ae7 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -640,7 +640,8 @@ cas_action:
sub,<> %r28, %r25, %r0
2: stw %r24, 0(%r26)
/* Free lock */
- stw,ma %r20, 0(%sr2,%r20)
+ sync
+ stw %r20, 0(%sr2,%r20)
#if ENABLE_LWS_DEBUG
/* Clear thread register indicator */
stw %r0, 4(%sr2,%r20)
@@ -654,7 +655,8 @@ cas_action:
3:
/* Error occurred on load or store */
/* Free lock */
- stw,ma %r20, 0(%sr2,%r20)
+ sync
+ stw %r20, 0(%sr2,%r20)
#if ENABLE_LWS_DEBUG
stw %r0, 4(%sr2,%r20)
#endif
@@ -855,7 +857,8 @@ cas2_action:
cas2_end:
/* Free lock */
- stw,ma %r20, 0(%sr2,%r20)
+ sync
+ stw %r20, 0(%sr2,%r20)
/* Enable interrupts */
ssm PSW_SM_I, %r0
/* Return to userspace, set no error */
@@ -865,7 +868,8 @@ cas2_end:
22:
/* Error occurred on load or store */
/* Free lock */
- stw,ma %r20, 0(%sr2,%r20)
+ sync
+ stw %r20, 0(%sr2,%r20)
ssm PSW_SM_I, %r0
ldo 1(%r0),%r28
b lws_exit
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 3ef40b703c4a..e746becd9d6f 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -268,19 +268,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
* their hooks, a bitfield is reserved for use by the platform near the
* top of MMIO addresses (not PIO, those have to cope the hard way).
*
- * This bit field is 12 bits and is at the top of the IO virtual
- * addresses PCI_IO_INDIRECT_TOKEN_MASK.
+ * The highest address in the kernel virtual space are:
*
- * The kernel virtual space is thus:
+ * d0003fffffffffff # with Hash MMU
+ * c00fffffffffffff # with Radix MMU
*
- * 0xD000000000000000 : vmalloc
- * 0xD000080000000000 : PCI PHB IO space
- * 0xD000080080000000 : ioremap
- * 0xD0000fffffffffff : end of ioremap region
- *
- * Since the top 4 bits are reserved as the region ID, we use thus
- * the next 12 bits and keep 4 bits available for the future if the
- * virtual address space is ever to be extended.
+ * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits
+ * that can be used for the field.
*
* The direct IO mapping operations will then mask off those bits
* before doing the actual access, though that only happen when
@@ -292,8 +286,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
*/
#ifdef CONFIG_PPC_INDIRECT_MMIO
-#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul
-#define PCI_IO_IND_TOKEN_SHIFT 48
+#define PCI_IO_IND_TOKEN_SHIFT 52
+#define PCI_IO_IND_TOKEN_MASK (0xfful << PCI_IO_IND_TOKEN_SHIFT)
#define PCI_FIX_ADDR(addr) \
((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK))
#define PCI_GET_ADDR_TOKEN(addr) \
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 6093bc8f74e5..a6e9e314c707 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -493,6 +493,8 @@
__PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \
__PPC_RT(t) | __PPC_RB(b))
+#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \
+ ___PPC_RT(t) | ___PPC_RB(b))
#define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \
__PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
/* PASemi instructions */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index f73886a1a7f5..0b8a735b6d85 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -54,6 +54,7 @@ struct pt_regs
#ifdef CONFIG_PPC64
unsigned long ppr;
+ unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */
#endif
};
#endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 2a51e4cc8246..236c1151a3a7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -636,6 +636,8 @@ static void *__init alloc_stack(unsigned long limit, int cpu)
{
unsigned long pa;
+ BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
+
pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
early_cpu_to_node(cpu), MEMBLOCK_NONE);
if (!pa) {
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 4bf051d3e21e..b65c8a34ad6e 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -950,7 +950,6 @@ int ftrace_disable_ftrace_graph_caller(void)
*/
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
{
- struct ftrace_graph_ent trace;
unsigned long return_hooker;
if (unlikely(ftrace_graph_is_dead()))
@@ -961,18 +960,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
return_hooker = ppc_function_entry(return_to_handler);
- trace.func = ip;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- goto out;
-
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
- NULL) == -EBUSY)
- goto out;
-
- parent = return_hooker;
+ if (!function_graph_enter(parent, ip, 0, NULL))
+ parent = return_hooker;
out:
return parent;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d65b961661fb..a56f8413758a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -983,6 +983,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) {
kvmppc_set_gpr(vcpu, 3, 0);
+ vcpu->arch.hcall_needed = 0;
return -EINTR;
}
break;
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 491b0f715d6b..ea1d7c808319 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -6,8 +6,6 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
/*
* Tracepoint for guest mode entry.
@@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests,
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+
#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index ac640e81fdc5..3837842986aa 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -6,8 +6,6 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm_booke
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace_booke
#define kvm_trace_symbol_exit \
{0, "CRITICAL"}, \
@@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
#endif
/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_booke
+
#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index bcfe8a987f6a..8a1e3b0047f1 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -9,8 +9,6 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm_hv
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace_hv
#define kvm_trace_symbol_hcall \
{H_REMOVE, "H_REMOVE"}, \
@@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit,
#endif /* _TRACE_KVM_HV_H */
/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index 2f9a8829552b..46a46d328fbf 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -8,8 +8,6 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm_pr
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace_pr
TRACE_EVENT(kvm_book3s_reenter,
TP_PROTO(int r, struct kvm_vcpu *vcpu),
@@ -257,4 +255,11 @@ TRACE_EVENT(kvm_exit,
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_pr
+
#include <trace/define_trace.h>
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 3a048e98a132..ce28ae5ca080 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1178,7 +1178,7 @@ static long vphn_get_associativity(unsigned long cpu,
switch (rc) {
case H_FUNCTION:
- printk(KERN_INFO
+ printk_once(KERN_INFO
"VPHN is not supported. Disabling polling...\n");
stop_topology_update();
break;
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index c3fdf2969d9f..bc3914d54e26 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -19,6 +19,7 @@
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
+#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <asm/smp.h>
@@ -58,27 +59,19 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
}
-static void assert_slb_exists(unsigned long ea)
+static void assert_slb_presence(bool present, unsigned long ea)
{
#ifdef CONFIG_DEBUG_VM
unsigned long tmp;
WARN_ON_ONCE(mfmsr() & MSR_EE);
- asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
- WARN_ON(tmp == 0);
-#endif
-}
-
-static void assert_slb_notexists(unsigned long ea)
-{
-#ifdef CONFIG_DEBUG_VM
- unsigned long tmp;
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ return;
- WARN_ON_ONCE(mfmsr() & MSR_EE);
+ asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
- asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
- WARN_ON(tmp != 0);
+ WARN_ON(present == (tmp == 0));
#endif
}
@@ -114,7 +107,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
*/
slb_shadow_update(ea, ssize, flags, index);
- assert_slb_notexists(ea);
+ assert_slb_presence(false, ea);
asm volatile("slbmte %0,%1" :
: "r" (mk_vsid_data(ea, ssize, flags)),
"r" (mk_esid_data(ea, ssize, index))
@@ -137,7 +130,7 @@ void __slb_restore_bolted_realmode(void)
"r" (be64_to_cpu(p->save_area[index].esid)));
}
- assert_slb_exists(local_paca->kstack);
+ assert_slb_presence(true, local_paca->kstack);
}
/*
@@ -185,7 +178,7 @@ void slb_flush_and_restore_bolted(void)
:: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
"r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
: "memory");
- assert_slb_exists(get_paca()->kstack);
+ assert_slb_presence(true, get_paca()->kstack);
get_paca()->slb_cache_ptr = 0;
@@ -443,9 +436,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
ea = (unsigned long)
get_paca()->slb_cache[i] << SID_SHIFT;
/*
- * Could assert_slb_exists here, but hypervisor
- * or machine check could have come in and
- * removed the entry at this point.
+ * Could assert_slb_presence(true) here, but
+ * hypervisor or machine check could have come
+ * in and removed the entry at this point.
*/
slbie_data = ea;
@@ -676,7 +669,7 @@ static long slb_insert_entry(unsigned long ea, unsigned long context,
* User preloads should add isync afterwards in case the kernel
* accesses user memory before it returns to userspace with rfid.
*/
- assert_slb_notexists(ea);
+ assert_slb_presence(false, ea);
asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
barrier();
@@ -715,7 +708,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
return -EFAULT;
if (ea < H_VMALLOC_END)
- flags = get_paca()->vmalloc_sllp;
+ flags = local_paca->vmalloc_sllp;
else
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
} else {
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 50b129785aee..17482f5de3e2 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -166,7 +166,33 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
PPC_BLR();
}
-static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
+static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
+ u64 func)
+{
+#ifdef PPC64_ELF_ABI_v1
+ /* func points to the function descriptor */
+ PPC_LI64(b2p[TMP_REG_2], func);
+ /* Load actual entry point from function descriptor */
+ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
+ /* ... and move it to LR */
+ PPC_MTLR(b2p[TMP_REG_1]);
+ /*
+ * Load TOC from function descriptor at offset 8.
+ * We can clobber r2 since we get called through a
+ * function pointer (so caller will save/restore r2)
+ * and since we don't use a TOC ourself.
+ */
+ PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
+#else
+ /* We can clobber r12 */
+ PPC_FUNC_ADDR(12, func);
+ PPC_MTLR(12);
+#endif
+ PPC_BLRL();
+}
+
+static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
+ u64 func)
{
unsigned int i, ctx_idx = ctx->idx;
@@ -273,7 +299,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
- int i;
+ int i, ret;
/* Start of epilogue code - will only be valid 2nd pass onwards */
u32 exit_addr = addrs[flen];
@@ -284,8 +310,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
u32 src_reg = b2p[insn[i].src_reg];
s16 off = insn[i].off;
s32 imm = insn[i].imm;
+ bool func_addr_fixed;
+ u64 func_addr;
u64 imm64;
- u8 *func;
u32 true_cond;
u32 tmp_idx;
@@ -711,23 +738,15 @@ emit_clear:
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
- /* bpf function call */
- if (insn[i].src_reg == BPF_PSEUDO_CALL)
- if (!extra_pass)
- func = NULL;
- else if (fp->aux->func && off < fp->aux->func_cnt)
- /* use the subprog id from the off
- * field to lookup the callee address
- */
- func = (u8 *) fp->aux->func[off]->bpf_func;
- else
- return -EINVAL;
- /* kernel helper call */
- else
- func = (u8 *) __bpf_call_base + imm;
-
- bpf_jit_emit_func_call(image, ctx, (u64)func);
+ ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ &func_addr, &func_addr_fixed);
+ if (ret < 0)
+ return ret;
+ if (func_addr_fixed)
+ bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
+ else
+ bpf_jit_emit_func_call_rel(image, ctx, func_addr);
/* move return value from r3 to BPF_REG_0 */
PPC_MR(b2p[BPF_REG_0], 3);
break;
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 6f60e0931922..75b935252981 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -102,63 +102,6 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
}
EXPORT_SYMBOL(pnv_pci_get_npu_dev);
-#define NPU_DMA_OP_UNSUPPORTED() \
- dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
- __func__)
-
-static void *dma_npu_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- NPU_DMA_OP_UNSUPPORTED();
- return NULL;
-}
-
-static void dma_npu_free(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- NPU_DMA_OP_UNSUPPORTED();
-}
-
-static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- NPU_DMA_OP_UNSUPPORTED();
- return 0;
-}
-
-static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction,
- unsigned long attrs)
-{
- NPU_DMA_OP_UNSUPPORTED();
- return 0;
-}
-
-static int dma_npu_dma_supported(struct device *dev, u64 mask)
-{
- NPU_DMA_OP_UNSUPPORTED();
- return 0;
-}
-
-static u64 dma_npu_get_required_mask(struct device *dev)
-{
- NPU_DMA_OP_UNSUPPORTED();
- return 0;
-}
-
-static const struct dma_map_ops dma_npu_ops = {
- .map_page = dma_npu_map_page,
- .map_sg = dma_npu_map_sg,
- .alloc = dma_npu_alloc,
- .free = dma_npu_free,
- .dma_supported = dma_npu_dma_supported,
- .get_required_mask = dma_npu_get_required_mask,
-};
-
/*
* Returns the PE assoicated with the PCI device of the given
* NPU. Returns the linked pci device if pci_dev != NULL.
@@ -270,10 +213,11 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
/*
- * We don't initialise npu_pe->tce32_table as we always use
- * dma_npu_ops which are nops.
+ * NVLink devices use the same TCE table configuration as
+ * their parent device so drivers shouldn't be doing DMA
+ * operations directly on these devices.
*/
- set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
+ set_dma_ops(&npe->pdev->dev, NULL);
}
/*
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index d10146197533..4b594f2e4f7e 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -71,10 +71,27 @@ KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
# arch specific predefines for sparse
CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)
+# Default target when executing plain make
+boot := arch/riscv/boot
+KBUILD_IMAGE := $(boot)/Image.gz
+
head-y := arch/riscv/kernel/head.o
core-y += arch/riscv/kernel/ arch/riscv/mm/
libs-y += arch/riscv/lib/
-all: vmlinux
+PHONY += vdso_install
+vdso_install:
+ $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
+
+all: Image.gz
+
+Image: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+Image.%: Image
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+zinstall install:
+ $(Q)$(MAKE) $(build)=$(boot) $@
diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore
new file mode 100644
index 000000000000..8dab0bb6ae66
--- /dev/null
+++ b/arch/riscv/boot/.gitignore
@@ -0,0 +1,2 @@
+Image
+Image.gz
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
new file mode 100644
index 000000000000..0990a9fdbe5d
--- /dev/null
+++ b/arch/riscv/boot/Makefile
@@ -0,0 +1,33 @@
+#
+# arch/riscv/boot/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2018, Anup Patel.
+# Author: Anup Patel <anup@brainfault.org>
+#
+# Based on the ia64 and arm64 boot/Makefile.
+#
+
+OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+
+targets := Image
+
+$(obj)/Image: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/Image.gz: $(obj)/Image FORCE
+ $(call if_changed,gzip)
+
+install:
+ $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+ $(obj)/Image System.map "$(INSTALL_PATH)"
+
+zinstall:
+ $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+ $(obj)/Image.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/riscv/boot/install.sh b/arch/riscv/boot/install.sh
new file mode 100644
index 000000000000..18c39159c0ff
--- /dev/null
+++ b/arch/riscv/boot/install.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+#
+# arch/riscv/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+# Adapted from code in arch/i386/boot/install.sh by Russell King
+#
+# "make install" script for the RISC-V Linux port
+#
+# Arguments:
+# $1 - kernel version
+# $2 - kernel image file
+# $3 - kernel map file
+# $4 - default install path (blank if root directory)
+#
+
+verify () {
+ if [ ! -f "$1" ]; then
+ echo "" 1>&2
+ echo " *** Missing file: $1" 1>&2
+ echo ' *** You need to run "make" before "make install".' 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
+# User may have a custom install script
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+
+if [ "$(basename $2)" = "Image.gz" ]; then
+# Compressed install
+ echo "Installing compressed kernel"
+ base=vmlinuz
+else
+# Normal install
+ echo "Installing normal kernel"
+ base=vmlinux
+fi
+
+if [ -f $4/$base-$1 ]; then
+ mv $4/$base-$1 $4/$base-$1.old
+fi
+cat $2 > $4/$base-$1
+
+# Install system map file
+if [ -f $4/System.map-$1 ]; then
+ mv $4/System.map-$1 $4/System.map-$1.old
+fi
+cp $3 $4/System.map-$1
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 07fa9ea75fea..ef4f15df9adf 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -76,4 +76,5 @@ CONFIG_NFS_V4_1=y
CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_PRINTK_TIME=y
# CONFIG_RCU_TRACE is not set
diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h
index 349df33808c4..cd2af4b013e3 100644
--- a/arch/riscv/include/asm/module.h
+++ b/arch/riscv/include/asm/module.h
@@ -8,6 +8,7 @@
#define MODULE_ARCH_VERMAGIC "riscv"
+struct module;
u64 module_emit_got_entry(struct module *mod, u64 val);
u64 module_emit_plt_entry(struct module *mod, u64 val);
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 2c5df945d43c..bbe1862e8f80 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -56,8 +56,8 @@ struct pt_regs {
unsigned long sstatus;
unsigned long sbadaddr;
unsigned long scause;
- /* a0 value before the syscall */
- unsigned long orig_a0;
+ /* a0 value before the syscall */
+ unsigned long orig_a0;
};
#ifdef CONFIG_64BIT
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 473cfc84e412..8c3e3e3c8be1 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -400,13 +400,13 @@ extern unsigned long __must_check __asm_copy_from_user(void *to,
static inline unsigned long
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- return __asm_copy_to_user(to, from, n);
+ return __asm_copy_from_user(to, from, n);
}
static inline unsigned long
raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
- return __asm_copy_from_user(to, from, n);
+ return __asm_copy_to_user(to, from, n);
}
extern long strncpy_from_user(char *dest, const char __user *src, long count);
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index eff7aa9aa163..fef96f117b4d 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -13,10 +13,9 @@
/*
* There is explicitly no include guard here because this file is expected to
- * be included multiple times. See uapi/asm/syscalls.h for more info.
+ * be included multiple times.
*/
-#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SYS_CLONE
+
#include <uapi/asm/unistd.h>
-#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/unistd.h
index 206dc4b0f6ea..1f3bd3ebbb0d 100644
--- a/arch/riscv/include/uapi/asm/syscalls.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -1,13 +1,25 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
- * Copyright (C) 2017-2018 SiFive
+ * Copyright (C) 2018 David Abdurachmanov <david.abdurachmanov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/*
- * There is explicitly no include guard here because this file is expected to
- * be included multiple times in order to define the syscall macros via
- * __SYSCALL.
- */
+#ifdef __LP64__
+#define __ARCH_WANT_NEW_STAT
+#endif /* __LP64__ */
+
+#include <asm-generic/unistd.h>
/*
* Allows the instruction cache to be flushed from userspace. Despite RISC-V
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 3a5a2ee31547..b4a7d4427fbb 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -64,7 +64,7 @@ int riscv_of_processor_hartid(struct device_node *node)
static void print_isa(struct seq_file *f, const char *orig_isa)
{
- static const char *ext = "mafdc";
+ static const char *ext = "mafdcsu";
const char *isa = orig_isa;
const char *e;
@@ -88,11 +88,14 @@ static void print_isa(struct seq_file *f, const char *orig_isa)
/*
* Check the rest of the ISA string for valid extensions, printing those
* we find. RISC-V ISA strings define an order, so we only print the
- * extension bits when they're in order.
+ * extension bits when they're in order. Hide the supervisor (S)
+ * extension from userspace as it's not accessible from there.
*/
for (e = ext; *e != '\0'; ++e) {
if (isa[0] == e[0]) {
- seq_write(f, isa, 1);
+ if (isa[0] != 's')
+ seq_write(f, isa, 1);
+
isa++;
}
}
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 1157b6b52d25..c433f6d3dd64 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -132,7 +132,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
unsigned long old;
- struct ftrace_graph_ent trace;
int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -144,17 +143,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*/
old = *parent;
- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- if (!ftrace_graph_entry(&trace))
- return;
-
- err = ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, parent);
- if (err == -EBUSY)
- return;
- *parent = return_hooker;
+ if (function_graph_enter(old, self_addr, frame_pointer, parent))
+ *parent = return_hooker;
}
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 711190d473d4..fe884cd69abd 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -44,6 +44,16 @@ ENTRY(_start)
amoadd.w a3, a2, (a3)
bnez a3, .Lsecondary_start
+ /* Clear BSS for flat non-ELF images */
+ la a3, __bss_start
+ la a4, __bss_stop
+ ble a4, a3, clear_bss_done
+clear_bss:
+ REG_S zero, (a3)
+ add a3, a3, RISCV_SZPTR
+ blt a3, a4, clear_bss
+clear_bss_done:
+
/* Save hart ID and DTB physical address */
mv s0, a0
mv s1, a1
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 3303ed2cd419..7dd308129b40 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -21,7 +21,7 @@ static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
{
if (v != (u32)v) {
pr_err("%s: value %016llx out of range for 32-bit field\n",
- me->name, v);
+ me->name, (long long)v);
return -EINVAL;
}
*location = v;
@@ -102,7 +102,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
if (offset != (s32)offset) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
- me->name, v, location);
+ me->name, (long long)v, location);
return -EINVAL;
}
@@ -144,7 +144,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
if (IS_ENABLED(CMODEL_MEDLOW)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
- me->name, v, location);
+ me->name, (long long)v, location);
return -EINVAL;
}
@@ -188,7 +188,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
} else {
pr_err(
"%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n",
- me->name, v, location);
+ me->name, (long long)v, location);
return -EINVAL;
}
@@ -212,7 +212,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
} else {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
- me->name, v, location);
+ me->name, (long long)v, location);
return -EINVAL;
}
}
@@ -234,7 +234,7 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
if (offset != fill_v) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
- me->name, v, location);
+ me->name, (long long)v, location);
return -EINVAL;
}
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index ece84991609c..65df1dfdc303 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -74,7 +74,7 @@ SECTIONS
*(.sbss*)
}
- BSS_SECTION(0, 0, 0)
+ BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
EXCEPTION_TABLE(0x10)
NOTES
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 5739bd05d289..4e2e600f7d53 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -3,6 +3,6 @@ lib-y += memcpy.o
lib-y += memset.o
lib-y += uaccess.o
-lib-(CONFIG_64BIT) += tishift.o
+lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_32BIT) += udivdi3.o
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 0b33577932c3..e21053e5e0da 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
UTS_MACHINE := s390x
-STACK_SIZE := $(if $(CONFIG_KASAN),32768,16384)
+STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384)
CHECKFLAGS += -D__s390__ -D__s390x__
export LD_BFD
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 593039620487..b1bdd15e3429 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -22,10 +22,10 @@ OBJCOPYFLAGS :=
OBJECTS := $(addprefix $(obj)/,$(obj-y))
LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS)
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE
$(call if_changed,ld)
-OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
$(obj)/info.bin: vmlinux FORCE
$(call if_changed,objcopy)
@@ -46,17 +46,17 @@ suffix-$(CONFIG_KERNEL_LZMA) := .lzma
suffix-$(CONFIG_KERNEL_LZO) := .lzo
suffix-$(CONFIG_KERNEL_XZ) := .xz
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,xzkern)
OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 259d1698ac50..c69cb04b7a59 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -64,6 +64,8 @@ CONFIG_NUMA=y
CONFIG_PREEMPT=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
@@ -84,9 +86,11 @@ CONFIG_PCI_DEBUG=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
+CONFIG_VFIO_AP=m
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -161,8 +165,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
-CONFIG_NFT_EXTHDR=m
-CONFIG_NFT_META=m
CONFIG_NFT_CT=m
CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
@@ -365,6 +367,8 @@ CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
@@ -461,6 +465,7 @@ CONFIG_PPTP=m
CONFIG_PPPOL2TP=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -486,9 +491,12 @@ CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -615,7 +623,6 @@ CONFIG_DEBUG_CREDENTIALS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
CONFIG_NOTIFIER_ERROR_INJECTION=m
-CONFIG_PM_NOTIFIER_ERROR_INJECT=m
CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
CONFIG_FAULT_INJECTION=y
CONFIG_FAILSLAB=y
@@ -727,3 +734,4 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_KVM_S390_UCONTROL=y
CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 37fd60c20e22..32f539dc9c19 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -65,6 +65,8 @@ CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
@@ -82,9 +84,11 @@ CONFIG_PCI=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
+CONFIG_VFIO_AP=m
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -159,8 +163,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
-CONFIG_NFT_EXTHDR=m
-CONFIG_NFT_META=m
CONFIG_NFT_CT=m
CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
@@ -362,6 +364,8 @@ CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
@@ -458,6 +462,7 @@ CONFIG_PPTP=m
CONFIG_PPPOL2TP=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -483,9 +488,12 @@ CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -666,3 +674,4 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_KVM_S390_UCONTROL=y
CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 7cb6a52f727d..4d58a92b5d97 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -26,14 +26,23 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
+CONFIG_LIVEPATCH=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC_FILE=y
+CONFIG_CRASH_DUMP=y
+CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
+CONFIG_CMM=m
CONFIG_OPROFILE=y
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
@@ -44,11 +53,7 @@ CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
-CONFIG_LIVEPATCH=y
-CONFIG_NR_CPUS=256
-CONFIG_NUMA=y
-CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
+CONFIG_BINFMT_MISC=m
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
@@ -60,9 +65,6 @@ CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
CONFIG_IDLE_PAGE_TRACKING=y
-CONFIG_CRASH_DUMP=y
-CONFIG_BINFMT_MISC=m
-CONFIG_HIBERNATION=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -98,6 +100,7 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_VIRTIO_BLK=y
CONFIG_SCSI=y
+# CONFIG_SCSI_MQ_DEFAULT is not set
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
@@ -131,6 +134,7 @@ CONFIG_EQUALIZER=m
CONFIG_TUN=m
CONFIG_VIRTIO_NET=y
# CONFIG_NET_VENDOR_ALACRITECH is not set
+# CONFIG_NET_VENDOR_AURORA is not set
# CONFIG_NET_VENDOR_CORTINA is not set
# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SOCIONEXT is not set
@@ -157,33 +161,6 @@ CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_GDB_SCRIPTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_DEBUG_SECTION_MISMATCH=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_PAGEALLOC=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PROVE_LOCKING=y
-CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_SG=y
-CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-# CONFIG_RUNTIME_TESTING_MENU is not set
-CONFIG_S390_PTDUMP=y
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
CONFIG_CRYPTO_TEST=m
@@ -193,6 +170,7 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_CMAC=m
@@ -231,7 +209,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_ZCRYPT=m
-CONFIG_ZCRYPT_MULTIDEVNODES=y
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_SHA1_S390=m
@@ -247,4 +224,30 @@ CONFIG_CRC7=m
# CONFIG_XZ_DEC_ARM is not set
# CONFIG_XZ_DEC_ARMTHUMB is not set
# CONFIG_XZ_DEC_SPARC is not set
-CONFIG_CMM=m
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FUNCTION_PROFILER=y
+# CONFIG_RUNTIME_TESTING_MENU is not set
+CONFIG_S390_PTDUMP=y
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index dbd689d556ce..ccbb53e22024 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_limit = STACK_TOP_MAX;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
- /* pgd_alloc() did not account this pud */
- mm_inc_nr_puds(mm);
break;
case -PAGE_SIZE:
/* forked 5-level task, set new asce with new_mm->pgd */
@@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk,
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
- /* pgd_alloc() did not account this pmd */
- mm_inc_nr_pmds(mm);
- mm_inc_nr_puds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..5ee733720a57 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry)
static inline unsigned long pgd_entry_type(struct mm_struct *mm)
{
- if (mm->context.asce_limit <= _REGION3_SIZE)
+ if (mm_pmd_folded(mm))
return _SEGMENT_ENTRY_EMPTY;
- if (mm->context.asce_limit <= _REGION2_SIZE)
+ if (mm_pud_folded(mm))
return _REGION3_ENTRY_EMPTY;
- if (mm->context.asce_limit <= _REGION1_SIZE)
+ if (mm_p4d_folded(mm))
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 411d435e7a7d..063732414dfb 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -493,6 +493,24 @@ static inline int is_module_addr(void *addr)
_REGION_ENTRY_PROTECT | \
_REGION_ENTRY_NOEXEC)
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
static inline int mm_has_pgste(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 302795c47c06..81038ab357ce 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -236,7 +236,7 @@ static inline unsigned long current_stack_pointer(void)
return sp;
}
-static __no_sanitize_address_or_inline unsigned short stap(void)
+static __no_kasan_or_inline unsigned short stap(void)
{
unsigned short cpu_address;
@@ -330,7 +330,7 @@ static inline void __load_psw(psw_t psw)
* Set PSW mask to specified value, while leaving the
* PSW addr pointing to the next instruction.
*/
-static __no_sanitize_address_or_inline void __load_psw_mask(unsigned long mask)
+static __no_kasan_or_inline void __load_psw_mask(unsigned long mask)
{
unsigned long addr;
psw_t psw;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 27248f42a03c..ce4e17c9aad6 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -14,7 +14,7 @@
* General size of kernel stacks
*/
#ifdef CONFIG_KASAN
-#define THREAD_SIZE_ORDER 3
+#define THREAD_SIZE_ORDER 4
#else
#define THREAD_SIZE_ORDER 2
#endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 457b7ba0fbb6..b31c779cf581 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= _REGION3_SIZE)
+ if (mm_pmd_folded(tlb->mm))
return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
tlb_remove_table(tlb, pmd);
@@ -152,7 +152,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= _REGION1_SIZE)
+ if (mm_p4d_folded(tlb->mm))
return;
tlb_remove_table(tlb, p4d);
}
@@ -167,7 +167,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= _REGION2_SIZE)
+ if (mm_pud_folded(tlb->mm))
return;
tlb_remove_table(tlb, pud);
}
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 724fba4d09d2..39191a0feed1 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -236,10 +236,10 @@ ENTRY(__switch_to)
stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
lghi %r4,__TASK_stack
lghi %r1,__TASK_thread
- lg %r5,0(%r4,%r3) # start of kernel stack of next
+ llill %r5,STACK_INIT
stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
- lgr %r15,%r5
- aghi %r15,STACK_INIT # end of kernel stack of next
+ lg %r15,0(%r4,%r3) # start of kernel stack of next
+ agr %r15,%r5 # end of kernel stack of next
stg %r3,__LC_CURRENT # store task struct of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 84be7f02d0c2..39b13d71a8fe 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init);
*/
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
{
- struct ftrace_graph_ent trace;
-
if (unlikely(ftrace_graph_is_dead()))
goto out;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
ip -= MCOUNT_INSN_SIZE;
- trace.func = ip;
- trace.depth = current->curr_ret_stack + 1;
- /* Only trace if the calling function expects to. */
- if (!ftrace_graph_entry(&trace))
- goto out;
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
- NULL) == -EBUSY)
- goto out;
- parent = (unsigned long) return_to_handler;
+ if (!function_graph_enter(parent, ip, 0, NULL))
+ parent = (unsigned long) return_to_handler;
out:
return parent;
}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index cc085e2d2ce9..d5523adeddbf 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -346,6 +346,8 @@ static int __hw_perf_event_init(struct perf_event *event)
break;
case PERF_TYPE_HARDWARE:
+ if (is_sampling_event(event)) /* No sampling support */
+ return -ENOENT;
ev = attr->config;
/* Count user space (problem-state) only */
if (!attr->exclude_user && attr->exclude_kernel) {
@@ -373,7 +375,7 @@ static int __hw_perf_event_init(struct perf_event *event)
return -ENOENT;
if (ev > PERF_CPUM_CF_MAX_CTR)
- return -EINVAL;
+ return -ENOENT;
/* Obtain the counter set to which the specified counter belongs */
set = get_counter_set(ev);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 7bf604ff50a1..bfabeb1889cc 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1842,10 +1842,30 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags)
CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
-static struct attribute *cpumsf_pmu_events_attr[] = {
- CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
- NULL,
- NULL,
+/* Attribute list for CPU_SF.
+ *
+ * The availablitiy depends on the CPU_MF sampling facility authorization
+ * for basic + diagnositic samples. This is determined at initialization
+ * time by the sampling facility device driver.
+ * If the authorization for basic samples is turned off, it should be
+ * also turned off for diagnostic sampling.
+ *
+ * During initialization of the device driver, check the authorization
+ * level for diagnostic sampling and installs the attribute
+ * file for diagnostic sampling if necessary.
+ *
+ * For now install a placeholder to reference all possible attributes:
+ * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG.
+ * Add another entry for the final NULL pointer.
+ */
+enum {
+ SF_CYCLES_BASIC_ATTR_IDX = 0,
+ SF_CYCLES_BASIC_DIAG_ATTR_IDX,
+ SF_CYCLES_ATTR_MAX
+};
+
+static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
+ [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
};
PMU_FORMAT_ATTR(event, "config:0-63");
@@ -2040,7 +2060,10 @@ static int __init init_cpum_sampling_pmu(void)
if (si.ad) {
sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
- cpumsf_pmu_events_attr[1] =
+ /* Sampling of diagnostic data authorized,
+ * install event into attribute list of PMU device.
+ */
+ cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
}
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index eb8aebea3ea7..e76309fbbcb3 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -37,7 +37,7 @@ KASAN_SANITIZE := n
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
$(call if_changed,vdso32ld)
# strip rule for the .so file
@@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
# actual build commands
quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+ cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index a22b2cf86eec..f849ac61c5da 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -37,7 +37,7 @@ KASAN_SANITIZE := n
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
$(call if_changed,vdso64ld)
# strip rule for the .so file
@@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
# actual build commands
quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 21eb7407d51b..8429ab079715 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -154,14 +154,14 @@ SECTIONS
* uncompressed image info used by the decompressor
* it should match struct vmlinux_info
*/
- .vmlinux.info 0 : {
+ .vmlinux.info 0 (INFO) : {
QUAD(_stext) /* default_lma */
QUAD(startup_continue) /* entry */
QUAD(__bss_start - _stext) /* image_size */
QUAD(__bss_stop - __bss_start) /* bss_size */
QUAD(__boot_data_start) /* bootdata_off */
QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */
- }
+ } :NONE
/* Debugging sections. */
STABS_DEBUG
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 76d89ee8b428..6791562779ee 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -101,6 +101,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
mm->context.asce_limit = _REGION1_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ mm_inc_nr_puds(mm);
} else {
crst_table_init(table, _REGION1_ENTRY_EMPTY);
pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
@@ -130,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm)
}
pgd = mm->pgd;
+ mm_dec_nr_pmds(mm);
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
mm->context.asce_limit = _REGION3_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index ae0d9e889534..d31bde0870d8 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -53,6 +53,7 @@ int __node_distance(int a, int b)
{
return mode->distance ? mode->distance(a, b) : 0;
}
+EXPORT_SYMBOL(__node_distance);
int numa_debug_enabled;
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 96dd9f7da250..1b04270e5460 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -321,8 +321,7 @@ int ftrace_disable_ftrace_graph_caller(void)
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
- int faulted, err;
- struct ftrace_graph_ent trace;
+ int faulted;
unsigned long return_hooker = (unsigned long)&return_to_handler;
if (unlikely(ftrace_graph_is_dead()))
@@ -365,18 +364,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
- err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
- if (err == -EBUSY) {
+ if (function_graph_enter(old, self_addr, 0, NULL))
__raw_writel(old, parent);
- return;
- }
-
- trace.func = self_addr;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
- __raw_writel(old, parent);
- }
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 915dda4ae412..684b84ce397f 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long) &return_to_handler;
- struct ftrace_graph_ent trace;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return parent + 8UL;
- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- return parent + 8UL;
-
- if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
- frame_pointer, NULL) == -EBUSY)
+ if (function_graph_enter(parent, self_addr, frame_pointer, NULL))
return parent + 8UL;
return return_hooker;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 222785af550b..5fda4f7bf15d 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -791,7 +791,7 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
}
/* Just skip the save instruction and the ctx register move. */
-#define BPF_TAILCALL_PROLOGUE_SKIP 16
+#define BPF_TAILCALL_PROLOGUE_SKIP 32
#define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128)
static void build_prologue(struct jit_ctx *ctx)
@@ -824,9 +824,15 @@ static void build_prologue(struct jit_ctx *ctx)
const u8 vfp = bpf2sparc[BPF_REG_FP];
emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx);
+ } else {
+ emit_nop(ctx);
}
emit_reg_move(I0, O0, ctx);
+ emit_reg_move(I1, O1, ctx);
+ emit_reg_move(I2, O2, ctx);
+ emit_reg_move(I3, O3, ctx);
+ emit_reg_move(I4, O4, ctx);
/* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
}
@@ -1270,6 +1276,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const u8 tmp2 = bpf2sparc[TMP_REG_2];
u32 opcode = 0, rs2;
+ if (insn->dst_reg == BPF_REG_FP)
+ ctx->saw_frame_pointer = true;
+
ctx->tmp_2_used = true;
emit_loadimm(imm, tmp2, ctx);
@@ -1308,6 +1317,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const u8 tmp = bpf2sparc[TMP_REG_1];
u32 opcode = 0, rs2;
+ if (insn->dst_reg == BPF_REG_FP)
+ ctx->saw_frame_pointer = true;
+
switch (BPF_SIZE(code)) {
case BPF_W:
opcode = ST32;
@@ -1340,6 +1352,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const u8 tmp2 = bpf2sparc[TMP_REG_2];
const u8 tmp3 = bpf2sparc[TMP_REG_3];
+ if (insn->dst_reg == BPF_REG_FP)
+ ctx->saw_frame_pointer = true;
+
ctx->tmp_1_used = true;
ctx->tmp_2_used = true;
ctx->tmp_3_used = true;
@@ -1360,6 +1375,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const u8 tmp2 = bpf2sparc[TMP_REG_2];
const u8 tmp3 = bpf2sparc[TMP_REG_3];
+ if (insn->dst_reg == BPF_REG_FP)
+ ctx->saw_frame_pointer = true;
+
ctx->tmp_1_used = true;
ctx->tmp_2_used = true;
ctx->tmp_3_used = true;
@@ -1425,12 +1443,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
struct bpf_prog *tmp, *orig_prog = prog;
struct sparc64_jit_data *jit_data;
struct bpf_binary_header *header;
+ u32 prev_image_size, image_size;
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
- u32 image_size;
u8 *image_ptr;
- int pass;
+ int pass, i;
if (!prog->jit_requested)
return orig_prog;
@@ -1461,61 +1479,82 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
header = jit_data->header;
extra_pass = true;
image_size = sizeof(u32) * ctx.idx;
+ prev_image_size = image_size;
+ pass = 1;
goto skip_init_ctx;
}
memset(&ctx, 0, sizeof(ctx));
ctx.prog = prog;
- ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL);
+ ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL);
if (ctx.offset == NULL) {
prog = orig_prog;
goto out_off;
}
- /* Fake pass to detect features used, and get an accurate assessment
- * of what the final image size will be.
+ /* Longest sequence emitted is for bswap32, 12 instructions. Pre-cook
+ * the offset array so that we converge faster.
*/
- if (build_body(&ctx)) {
- prog = orig_prog;
- goto out_off;
- }
- build_prologue(&ctx);
- build_epilogue(&ctx);
-
- /* Now we know the actual image size. */
- image_size = sizeof(u32) * ctx.idx;
- header = bpf_jit_binary_alloc(image_size, &image_ptr,
- sizeof(u32), jit_fill_hole);
- if (header == NULL) {
- prog = orig_prog;
- goto out_off;
- }
+ for (i = 0; i < prog->len; i++)
+ ctx.offset[i] = i * (12 * 4);
- ctx.image = (u32 *)image_ptr;
-skip_init_ctx:
- for (pass = 1; pass < 3; pass++) {
+ prev_image_size = ~0U;
+ for (pass = 1; pass < 40; pass++) {
ctx.idx = 0;
build_prologue(&ctx);
-
if (build_body(&ctx)) {
- bpf_jit_binary_free(header);
prog = orig_prog;
goto out_off;
}
-
build_epilogue(&ctx);
if (bpf_jit_enable > 1)
- pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
- image_size - (ctx.idx * 4),
+ pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass,
+ ctx.idx * 4,
ctx.tmp_1_used ? '1' : ' ',
ctx.tmp_2_used ? '2' : ' ',
ctx.tmp_3_used ? '3' : ' ',
ctx.saw_frame_pointer ? 'F' : ' ',
ctx.saw_call ? 'C' : ' ',
ctx.saw_tail_call ? 'T' : ' ');
+
+ if (ctx.idx * 4 == prev_image_size)
+ break;
+ prev_image_size = ctx.idx * 4;
+ cond_resched();
+ }
+
+ /* Now we know the actual image size. */
+ image_size = sizeof(u32) * ctx.idx;
+ header = bpf_jit_binary_alloc(image_size, &image_ptr,
+ sizeof(u32), jit_fill_hole);
+ if (header == NULL) {
+ prog = orig_prog;
+ goto out_off;
+ }
+
+ ctx.image = (u32 *)image_ptr;
+skip_init_ctx:
+ ctx.idx = 0;
+
+ build_prologue(&ctx);
+
+ if (build_body(&ctx)) {
+ bpf_jit_binary_free(header);
+ prog = orig_prog;
+ goto out_off;
+ }
+
+ build_epilogue(&ctx);
+
+ if (ctx.idx * 4 != prev_image_size) {
+ pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n",
+ prev_image_size, ctx.idx * 4);
+ bpf_jit_binary_free(header);
+ prog = orig_prog;
+ goto out_off;
}
if (bpf_jit_enable > 1)
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 74c002ddc0ce..28c40624bcb6 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1305,6 +1305,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
io_req->fds[0] = dev->cow.fd;
else
io_req->fds[0] = dev->fd;
+ io_req->error = 0;
if (req_op(req) == REQ_OP_FLUSH) {
io_req->op = UBD_FLUSH;
@@ -1313,9 +1314,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
io_req->cow_offset = -1;
io_req->offset = off;
io_req->length = bvec->bv_len;
- io_req->error = 0;
io_req->sector_mask = 0;
-
io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
io_req->offsets[0] = 0;
io_req->offsets[1] = dev->cow.data_offset;
@@ -1341,11 +1340,14 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
+ struct ubd *ubd_dev = hctx->queue->queuedata;
struct request *req = bd->rq;
int ret = 0;
blk_mq_start_request(req);
+ spin_lock_irq(&ubd_dev->lock);
+
if (req_op(req) == REQ_OP_FLUSH) {
ret = ubd_queue_one_vec(hctx, req, 0, NULL);
} else {
@@ -1361,9 +1363,11 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
}
}
out:
- if (ret < 0) {
+ spin_unlock_irq(&ubd_dev->lock);
+
+ if (ret < 0)
blk_mq_requeue_request(req, true);
- }
+
return BLK_STS_OK;
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ba7e3464ee92..8689e794a43c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -444,10 +444,6 @@ config RETPOLINE
branches. Requires a compiler with -mindirect-branch=thunk-extern
support for full protection. The kernel may run slower.
- Without compiler support, at least indirect branches in assembler
- code are eliminated. Since this includes the syscall entry path,
- it is not entirely pointless.
-
config INTEL_RDT
bool "Intel Resource Director Technology support"
depends on X86 && CPU_SUP_INTEL
@@ -525,7 +521,6 @@ config X86_VSMP
bool "ScaleMP vSMP"
select HYPERVISOR_GUEST
select PARAVIRT
- select PARAVIRT_XXL
depends on X86_64 && PCI
depends on X86_EXTENDED_PLATFORM
depends on SMP
@@ -1005,13 +1000,7 @@ config NR_CPUS
to the kernel image.
config SCHED_SMT
- bool "SMT (Hyperthreading) scheduler support"
- depends on SMP
- ---help---
- SMT scheduler support improves the CPU scheduler's decision making
- when dealing with Intel Pentium 4 chips with HyperThreading at a
- cost of slightly increased overhead in some places. If unsure say
- N here.
+ def_bool y if SMP
config SCHED_MC
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5b562e464009..f5d7f4134524 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -213,8 +213,6 @@ ifdef CONFIG_X86_64
KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
endif
-# Speed up the build
-KBUILD_CFLAGS += -pipe
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
KBUILD_CFLAGS += -Wno-sign-compare
#
@@ -222,9 +220,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
-ifneq ($(RETPOLINE_CFLAGS),)
- KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ifeq ($(RETPOLINE_CFLAGS),)
+ $(error You are building kernel with non-retpoline compiler, please update your compiler.)
endif
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
endif
archscripts: scripts_basic
@@ -239,7 +238,7 @@ archheaders:
archmacros:
$(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
-ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,-
+ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s
export ASM_MACRO_FLAGS
KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 4c881c850125..850b8762e889 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -300,7 +300,7 @@ _start:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x020e # header version number (>= 0x0105)
+ .word 0x020d # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -558,10 +558,6 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
init_size: .long INIT_SIZE # kernel initialization size
handover_offset: .long 0 # Filled in by build.c
-acpi_rsdp_addr: .quad 0 # 64-bit physical pointer to the
- # ACPI RSDP table, added with
- # version 2.14
-
# End of setup header #####################################################
.section ".entrytext", "ax"
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 106911b603bd..374a19712e20 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
if (config == -1LL)
return -EINVAL;
- /*
- * Branch tracing:
- */
- if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
- !attr->freq && hwc->sample_period == 1) {
- /* BTS is not supported by this architecture. */
- if (!x86_pmu.bts_active)
- return -EOPNOTSUPP;
-
- /* BTS is currently only allowed for user-mode. */
- if (!attr->exclude_kernel)
- return -EOPNOTSUPP;
-
- /* disallow bts if conflicting events are present */
- if (x86_add_exclusive(x86_lbr_exclusive_lbr))
- return -EBUSY;
-
- event->destroy = hw_perf_lbr_event_destroy;
- }
-
hwc->config |= config;
return 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 273c62e81546..ecc3e34ca955 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2306,14 +2306,18 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
return handled;
}
-static bool disable_counter_freezing;
+static bool disable_counter_freezing = true;
static int __init intel_perf_counter_freezing_setup(char *s)
{
- disable_counter_freezing = true;
- pr_info("Intel PMU Counter freezing feature disabled\n");
+ bool res;
+
+ if (kstrtobool(s, &res))
+ return -EINVAL;
+
+ disable_counter_freezing = !res;
return 1;
}
-__setup("disable_counter_freezing", intel_perf_counter_freezing_setup);
+__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);
/*
* Simplified handler for Arch Perfmon v4:
@@ -2470,16 +2474,7 @@ done:
static struct event_constraint *
intel_bts_constraints(struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
- unsigned int hw_event, bts_event;
-
- if (event->attr.freq)
- return NULL;
-
- hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
- bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-
- if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
+ if (unlikely(intel_pmu_has_bts(event)))
return &bts_constraint;
return NULL;
@@ -3098,6 +3093,43 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
return flags;
}
+static int intel_pmu_bts_config(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ if (unlikely(intel_pmu_has_bts(event))) {
+ /* BTS is not supported by this architecture. */
+ if (!x86_pmu.bts_active)
+ return -EOPNOTSUPP;
+
+ /* BTS is currently only allowed for user-mode. */
+ if (!attr->exclude_kernel)
+ return -EOPNOTSUPP;
+
+ /* BTS is not allowed for precise events. */
+ if (attr->precise_ip)
+ return -EOPNOTSUPP;
+
+ /* disallow bts if conflicting events are present */
+ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+ return -EBUSY;
+
+ event->destroy = hw_perf_lbr_event_destroy;
+ }
+
+ return 0;
+}
+
+static int core_pmu_hw_config(struct perf_event *event)
+{
+ int ret = x86_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+
+ return intel_pmu_bts_config(event);
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -3105,6 +3137,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ ret = intel_pmu_bts_config(event);
+ if (ret)
+ return ret;
+
if (event->attr.precise_ip) {
if (!event->attr.freq) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
@@ -3127,7 +3163,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
/*
* BTS is set up earlier in this path, so don't account twice
*/
- if (!intel_pmu_has_bts(event)) {
+ if (!unlikely(intel_pmu_has_bts(event))) {
/* disallow lbr if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
@@ -3596,7 +3632,7 @@ static __initconst const struct x86_pmu core_pmu = {
.enable_all = core_pmu_enable_all,
.enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
+ .hw_config = core_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index e17ab885b1e9..cb46d602a6b8 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -129,8 +129,15 @@ struct intel_uncore_box {
struct intel_uncore_extra_reg shared_regs[0];
};
-#define UNCORE_BOX_FLAG_INITIATED 0
-#define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */
+/* CFL uncore 8th cbox MSRs */
+#define CFL_UNC_CBO_7_PERFEVTSEL0 0xf70
+#define CFL_UNC_CBO_7_PER_CTR0 0xf76
+
+#define UNCORE_BOX_FLAG_INITIATED 0
+/* event config registers are 8-byte apart */
+#define UNCORE_BOX_FLAG_CTL_OFFS8 1
+/* CFL 8th CBOX has different MSR space */
+#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2
struct uncore_event_desc {
struct kobj_attribute attr;
@@ -297,17 +304,27 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box,
static inline
unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
{
- return box->pmu->type->event_ctl +
- (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
- uncore_msr_box_offset(box);
+ if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) {
+ return CFL_UNC_CBO_7_PERFEVTSEL0 +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx);
+ } else {
+ return box->pmu->type->event_ctl +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+ uncore_msr_box_offset(box);
+ }
}
static inline
unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
{
- return box->pmu->type->perf_ctr +
- (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
- uncore_msr_box_offset(box);
+ if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) {
+ return CFL_UNC_CBO_7_PER_CTR0 +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx);
+ } else {
+ return box->pmu->type->perf_ctr +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+ uncore_msr_box_offset(box);
+ }
}
static inline
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 8527c3e1038b..2593b0d7aeee 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -15,6 +15,25 @@
#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910
#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f
#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f
+#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c
+#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904
+#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914
+#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f
+#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f
+#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc
+#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0
+#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10
+#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4
+#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f
+#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f
+#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2
+#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30
+#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18
+#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6
+#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31
+#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33
+#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca
+#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -202,6 +221,10 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
}
+
+ /* The 8th CBOX has different MSR space */
+ if (box->pmu->pmu_idx == 7)
+ __set_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags);
}
static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
@@ -228,7 +251,7 @@ static struct intel_uncore_ops skl_uncore_msr_ops = {
static struct intel_uncore_type skl_uncore_cbox = {
.name = "cbox",
.num_counters = 4,
- .num_boxes = 5,
+ .num_boxes = 8,
.perf_ctr_bits = 44,
.fixed_ctr_bits = 48,
.perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
@@ -569,7 +592,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
-
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ },
};
@@ -618,6 +716,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */
IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */
IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */
+ IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */
+ IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */
+ IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */
+ IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */
+ IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */
+ IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */
+ IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */
+ IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */
+ IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */
+ IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */
+ IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */
+ IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */
+ IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */
+ IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */
+ IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */
+ IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */
+ IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */
+ IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */
+ IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */
{ /* end marker */ }
};
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index adae087cecdd..78d7b7031bfc 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -859,11 +859,16 @@ static inline int amd_pmu_init(void)
static inline bool intel_pmu_has_bts(struct perf_event *event)
{
- if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
- !event->attr.freq && event->hw.sample_period == 1)
- return true;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int hw_event, bts_event;
+
+ if (event->attr.freq)
+ return false;
+
+ hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+ bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
- return false;
+ return hw_event == bts_event && hwc->sample_period == 1;
}
int intel_pmu_save_and_restart(struct perf_event *event);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 5f7290e6e954..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
"3: movl $-2,%[err]\n\t" \
"jmp 2b\n\t" \
".popsection\n\t" \
- _ASM_EXTABLE_UA(1b, 3b) \
+ _ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55e51ff7e421..fbda5a917c5b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1094,7 +1094,8 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
- void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+ /* Returns actual tsc_offset set in active VMCS */
+ u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4da9b1c58d28..c1a812bd5a27 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -221,6 +221,8 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am
int mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
+bool mce_is_correctable(struct mce *m);
+int mce_usable_address(struct mce *m);
DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 0d6271cce198..1d0a7778e163 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -232,7 +232,7 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
: "cc");
}
#endif
- return hv_status;
+ return hv_status;
}
/*
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 80f4a4f38c79..c8f73efb4ece 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,9 +41,10 @@
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
-#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
+#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
-#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 80dc14422495..032b6009baab 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -3,6 +3,8 @@
#ifndef _ASM_X86_NOSPEC_BRANCH_H_
#define _ASM_X86_NOSPEC_BRANCH_H_
+#include <linux/static_key.h>
+
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
@@ -162,11 +164,12 @@
_ASM_PTR " 999b\n\t" \
".popsection\n\t"
-#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_64
/*
- * Since the inline asm uses the %V modifier which is only in newer GCC,
- * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ * Inline asm uses the %V modifier which is only in newer GCC
+ * which is ensured when CONFIG_RETPOLINE is defined.
*/
# define CALL_NOSPEC \
ANNOTATE_NOSPEC_ALTERNATIVE \
@@ -181,7 +184,7 @@
X86_FEATURE_RETPOLINE_AMD)
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
-#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+#else /* CONFIG_X86_32 */
/*
* For i386 we use the original ret-equivalent retpoline, because
* otherwise we'll run out of registers. We don't care about CET
@@ -211,6 +214,7 @@
X86_FEATURE_RETPOLINE_AMD)
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
#else /* No retpoline for C / inline asm */
# define CALL_NOSPEC "call *%[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
@@ -219,13 +223,19 @@
/* The Spectre V2 mitigation variants */
enum spectre_v2_mitigation {
SPECTRE_V2_NONE,
- SPECTRE_V2_RETPOLINE_MINIMAL,
- SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
SPECTRE_V2_RETPOLINE_GENERIC,
SPECTRE_V2_RETPOLINE_AMD,
SPECTRE_V2_IBRS_ENHANCED,
};
+/* The indirect branch speculation control variants */
+enum spectre_v2_user_mitigation {
+ SPECTRE_V2_USER_NONE,
+ SPECTRE_V2_USER_STRICT,
+ SPECTRE_V2_USER_PRCTL,
+ SPECTRE_V2_USER_SECCOMP,
+};
+
/* The Speculative Store Bypass disable variants */
enum ssb_mitigation {
SPEC_STORE_BYPASS_NONE,
@@ -303,6 +313,10 @@ do { \
preempt_enable(); \
} while (0)
+DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index cd0cf1c568b4..8f657286d599 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -33,12 +33,14 @@
/*
* Set __PAGE_OFFSET to the most negative possible address +
- * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
- * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
- * what Xen requires.
+ * PGDIR_SIZE*17 (pgd slot 273).
+ *
+ * The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for
+ * a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary,
+ * but it's what Xen requires.
*/
-#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL)
-#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL)
+#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL)
+#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL)
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 04edd2d58211..84bd9bdc1987 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -111,9 +111,7 @@ extern unsigned int ptrs_per_p4d;
*/
#define MAXMEM (1UL << MAX_PHYSMEM_BITS)
-#define LDT_PGD_ENTRY_L4 -3UL
-#define LDT_PGD_ENTRY_L5 -112UL
-#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_PGD_ENTRY -240UL
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE)
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 87623c6b13db..bd5ac6cc37db 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -13,12 +13,15 @@
#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire
static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
{
- u32 val = 0;
-
- if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
- "I", _Q_PENDING_OFFSET))
- val |= _Q_PENDING_VAL;
+ u32 val;
+ /*
+ * We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto
+ * and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a
+ * statement expression, which GCC doesn't like.
+ */
+ val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
+ "I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL;
val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK;
return val;
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index ae7c2c5cd7f0..5393babc0598 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}
+static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
+{
+ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
+ return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
+}
+
static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
{
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}
+static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
+{
+ BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
+ return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
+}
+
static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
{
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
@@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void);
static inline void speculative_store_bypass_ht_init(void) { }
#endif
-extern void speculative_store_bypass_update(unsigned long tif);
-
-static inline void speculative_store_bypass_update_current(void)
-{
- speculative_store_bypass_update(current_thread_info()->flags);
-}
+extern void speculation_ctrl_update(unsigned long tif);
+extern void speculation_ctrl_update_current(void);
#endif
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 36bd243843d6..7cf1a270d891 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
__visible struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
-struct tss_struct;
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss);
/* This runs runs on the previous thread's stack. */
static inline void prepare_switch_to(struct task_struct *next)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2ff2a30a264f..82b73b75d67c 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,10 +79,12 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_SSBD 5 /* Reduced data speculation */
+#define TIF_SSBD 5 /* Speculative store bypass disable */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
+#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
+#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
@@ -110,6 +112,8 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
+#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
@@ -145,8 +149,18 @@ struct thread_info {
_TIF_FSCHECK)
/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
+#define _TIF_WORK_CTXSW_BASE \
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
+ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
+
+/*
+ * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
+ */
+#ifdef CONFIG_SMP
+# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
+#else
+# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
+#endif
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index d760611cfc35..f4204bf377fc 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -169,10 +169,14 @@ struct tlb_state {
#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+ /* Last user mm for optimizing IBPB */
+ union {
+ struct mm_struct *last_user_mm;
+ unsigned long last_user_mm_ibpb;
+ };
+
u16 loaded_mm_asid;
u16 next_asid;
- /* last user mm's ctx id */
- u64 last_ctx_id;
/*
* We can be in one of several states:
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 0f842104862c..b85a7c54c6a1 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -303,6 +303,4 @@ extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
extern bool x86_pnpbios_disabled(void);
-void x86_verify_bootdata_version(void);
-
#endif
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 123e669bf363..790ce08e41f2 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -9,7 +9,7 @@
#include <linux/mm.h>
#include <linux/device.h>
-#include <linux/uaccess.h>
+#include <asm/extable.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -93,12 +93,39 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
*/
static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
{
- return __put_user(val, (unsigned long __user *)addr);
+ int ret = 0;
+
+ asm volatile("1: mov %[val], %[ptr]\n"
+ "2:\n"
+ ".section .fixup, \"ax\"\n"
+ "3: sub $1, %[ret]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [ret] "+r" (ret), [ptr] "=m" (*addr)
+ : [val] "r" (val));
+
+ return ret;
}
-static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
+static inline int xen_safe_read_ulong(const unsigned long *addr,
+ unsigned long *val)
{
- return __get_user(*val, (unsigned long __user *)addr);
+ int ret = 0;
+ unsigned long rval = ~0ul;
+
+ asm volatile("1: mov %[ptr], %[rval]\n"
+ "2:\n"
+ ".section .fixup, \"ax\"\n"
+ "3: sub $1, %[ret]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [ret] "+r" (ret), [rval] "+r" (rval)
+ : [ptr] "m" (*addr));
+ *val = rval;
+
+ return ret;
}
#ifdef CONFIG_XEN_PV
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 22f89d040ddd..60733f137e9a 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -16,9 +16,6 @@
#define RAMDISK_PROMPT_FLAG 0x8000
#define RAMDISK_LOAD_FLAG 0x4000
-/* version flags */
-#define VERSION_WRITTEN 0x8000
-
/* loadflags */
#define LOADED_HIGH (1<<0)
#define KASLR_FLAG (1<<1)
@@ -89,7 +86,6 @@ struct setup_header {
__u64 pref_address;
__u32 init_size;
__u32 handover_offset;
- __u64 acpi_rsdp_addr;
} __attribute__((packed));
struct sys_desc_table {
@@ -159,7 +155,8 @@ struct boot_params {
__u8 _pad2[4]; /* 0x054 */
__u64 tboot_addr; /* 0x058 */
struct ist_info ist_info; /* 0x060 */
- __u8 _pad3[16]; /* 0x070 */
+ __u64 acpi_rsdp_addr; /* 0x070 */
+ __u8 _pad3[8]; /* 0x078 */
__u8 hd0_info[16]; /* obsolete! */ /* 0x080 */
__u8 hd1_info[16]; /* obsolete! */ /* 0x090 */
struct sys_desc_table sys_desc_table; /* obsolete! */ /* 0x0a0 */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 92c76bf97ad8..06635fbca81c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
u64 x86_default_get_root_pointer(void)
{
- return boot_params.hdr.acpi_rsdp_addr;
+ return boot_params.acpi_rsdp_addr;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c37e66e493bf..500278f5308e 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
+#include <linux/sched/smt.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@@ -53,6 +54,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
u64 __ro_after_init x86_amd_ls_cfg_base;
u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
+/* Control conditional STIPB in switch_to() */
+DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+/* Control conditional IBPB in switch_mm() */
+DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+/* Control unconditional IBPB in switch_mm() */
+DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -123,31 +131,6 @@ void __init check_bugs(void)
#endif
}
-/* The kernel command line selection */
-enum spectre_v2_mitigation_cmd {
- SPECTRE_V2_CMD_NONE,
- SPECTRE_V2_CMD_AUTO,
- SPECTRE_V2_CMD_FORCE,
- SPECTRE_V2_CMD_RETPOLINE,
- SPECTRE_V2_CMD_RETPOLINE_GENERIC,
- SPECTRE_V2_CMD_RETPOLINE_AMD,
-};
-
-static const char *spectre_v2_strings[] = {
- [SPECTRE_V2_NONE] = "Vulnerable",
- [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
- [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
-};
-
-#undef pr_fmt
-#define pr_fmt(fmt) "Spectre V2 : " fmt
-
-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
- SPECTRE_V2_NONE;
-
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
@@ -169,6 +152,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
static_cpu_has(X86_FEATURE_AMD_SSBD))
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+ /* Conditional STIBP enabled? */
+ if (static_branch_unlikely(&switch_to_cond_stibp))
+ hostval |= stibp_tif_to_spec_ctrl(ti->flags);
+
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
@@ -202,7 +189,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
ssbd_spec_ctrl_to_tif(hostval);
- speculative_store_bypass_update(tif);
+ speculation_ctrl_update(tif);
}
}
EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
@@ -217,6 +204,15 @@ static void x86_amd_ssb_disable(void)
wrmsrl(MSR_AMD64_LS_CFG, msrval);
}
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+ SPECTRE_V2_NONE;
+
+static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
+ SPECTRE_V2_USER_NONE;
+
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -238,67 +234,217 @@ static inline const char *spectre_v2_module_string(void)
static inline const char *spectre_v2_module_string(void) { return ""; }
#endif
-static void __init spec2_print_if_insecure(const char *reason)
+static inline bool match_option(const char *arg, int arglen, const char *opt)
{
- if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- pr_info("%s selected on command line.\n", reason);
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
}
-static void __init spec2_print_if_secure(const char *reason)
+/* The kernel command line selection for spectre v2 */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+enum spectre_v2_user_cmd {
+ SPECTRE_V2_USER_CMD_NONE,
+ SPECTRE_V2_USER_CMD_AUTO,
+ SPECTRE_V2_USER_CMD_FORCE,
+ SPECTRE_V2_USER_CMD_PRCTL,
+ SPECTRE_V2_USER_CMD_PRCTL_IBPB,
+ SPECTRE_V2_USER_CMD_SECCOMP,
+ SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
+};
+
+static const char * const spectre_v2_user_strings[] = {
+ [SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
+ [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
+ [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
+ [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
+};
+
+static const struct {
+ const char *option;
+ enum spectre_v2_user_cmd cmd;
+ bool secure;
+} v2_user_options[] __initdata = {
+ { "auto", SPECTRE_V2_USER_CMD_AUTO, false },
+ { "off", SPECTRE_V2_USER_CMD_NONE, false },
+ { "on", SPECTRE_V2_USER_CMD_FORCE, true },
+ { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false },
+ { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false },
+ { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false },
+ { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false },
+};
+
+static void __init spec_v2_user_print_cond(const char *reason, bool secure)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- pr_info("%s selected on command line.\n", reason);
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
+ pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
-static inline bool retp_compiler(void)
+static enum spectre_v2_user_cmd __init
+spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
{
- return __is_defined(RETPOLINE);
+ char arg[20];
+ int ret, i;
+
+ switch (v2_cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return SPECTRE_V2_USER_CMD_NONE;
+ case SPECTRE_V2_CMD_FORCE:
+ return SPECTRE_V2_USER_CMD_FORCE;
+ default:
+ break;
+ }
+
+ ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
+ arg, sizeof(arg));
+ if (ret < 0)
+ return SPECTRE_V2_USER_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
+ if (match_option(arg, ret, v2_user_options[i].option)) {
+ spec_v2_user_print_cond(v2_user_options[i].option,
+ v2_user_options[i].secure);
+ return v2_user_options[i].cmd;
+ }
+ }
+
+ pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
+ return SPECTRE_V2_USER_CMD_AUTO;
}
-static inline bool match_option(const char *arg, int arglen, const char *opt)
+static void __init
+spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
{
- int len = strlen(opt);
+ enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
+ bool smt_possible = IS_ENABLED(CONFIG_SMP);
+ enum spectre_v2_user_cmd cmd;
- return len == arglen && !strncmp(arg, opt, len);
+ if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
+ return;
+
+ if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
+ cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+ smt_possible = false;
+
+ cmd = spectre_v2_parse_user_cmdline(v2_cmd);
+ switch (cmd) {
+ case SPECTRE_V2_USER_CMD_NONE:
+ goto set_mode;
+ case SPECTRE_V2_USER_CMD_FORCE:
+ mode = SPECTRE_V2_USER_STRICT;
+ break;
+ case SPECTRE_V2_USER_CMD_PRCTL:
+ case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
+ mode = SPECTRE_V2_USER_PRCTL;
+ break;
+ case SPECTRE_V2_USER_CMD_AUTO:
+ case SPECTRE_V2_USER_CMD_SECCOMP:
+ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
+ if (IS_ENABLED(CONFIG_SECCOMP))
+ mode = SPECTRE_V2_USER_SECCOMP;
+ else
+ mode = SPECTRE_V2_USER_PRCTL;
+ break;
+ }
+
+ /* Initialize Indirect Branch Prediction Barrier */
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+
+ switch (cmd) {
+ case SPECTRE_V2_USER_CMD_FORCE:
+ case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
+ case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
+ static_branch_enable(&switch_mm_always_ibpb);
+ break;
+ case SPECTRE_V2_USER_CMD_PRCTL:
+ case SPECTRE_V2_USER_CMD_AUTO:
+ case SPECTRE_V2_USER_CMD_SECCOMP:
+ static_branch_enable(&switch_mm_cond_ibpb);
+ break;
+ default:
+ break;
+ }
+
+ pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
+ static_key_enabled(&switch_mm_always_ibpb) ?
+ "always-on" : "conditional");
+ }
+
+ /* If enhanced IBRS is enabled no STIPB required */
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ return;
+
+ /*
+ * If SMT is not possible or STIBP is not available clear the STIPB
+ * mode.
+ */
+ if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+ mode = SPECTRE_V2_USER_NONE;
+set_mode:
+ spectre_v2_user = mode;
+ /* Only print the STIBP mode when SMT possible */
+ if (smt_possible)
+ pr_info("%s\n", spectre_v2_user_strings[mode]);
}
+static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+ [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
+};
+
static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
-} mitigation_options[] = {
- { "off", SPECTRE_V2_CMD_NONE, false },
- { "on", SPECTRE_V2_CMD_FORCE, true },
- { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
- { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
- { "auto", SPECTRE_V2_CMD_AUTO, false },
+} mitigation_options[] __initdata = {
+ { "off", SPECTRE_V2_CMD_NONE, false },
+ { "on", SPECTRE_V2_CMD_FORCE, true },
+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
};
+static void __init spec_v2_print_cond(const char *reason, bool secure)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
+ pr_info("%s selected on command line.\n", reason);
+}
+
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
{
+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
char arg[20];
int ret, i;
- enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
return SPECTRE_V2_CMD_NONE;
- else {
- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
- if (ret < 0)
- return SPECTRE_V2_CMD_AUTO;
- for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
- if (!match_option(arg, ret, mitigation_options[i].option))
- continue;
- cmd = mitigation_options[i].cmd;
- break;
- }
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
+ if (ret < 0)
+ return SPECTRE_V2_CMD_AUTO;
- if (i >= ARRAY_SIZE(mitigation_options)) {
- pr_err("unknown option (%s). Switching to AUTO select\n", arg);
- return SPECTRE_V2_CMD_AUTO;
- }
+ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+ if (!match_option(arg, ret, mitigation_options[i].option))
+ continue;
+ cmd = mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+ return SPECTRE_V2_CMD_AUTO;
}
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
@@ -316,54 +462,11 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
- if (mitigation_options[i].secure)
- spec2_print_if_secure(mitigation_options[i].option);
- else
- spec2_print_if_insecure(mitigation_options[i].option);
-
+ spec_v2_print_cond(mitigation_options[i].option,
+ mitigation_options[i].secure);
return cmd;
}
-static bool stibp_needed(void)
-{
- if (spectre_v2_enabled == SPECTRE_V2_NONE)
- return false;
-
- if (!boot_cpu_has(X86_FEATURE_STIBP))
- return false;
-
- return true;
-}
-
-static void update_stibp_msr(void *info)
-{
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
-}
-
-void arch_smt_update(void)
-{
- u64 mask;
-
- if (!stibp_needed())
- return;
-
- mutex_lock(&spec_ctrl_mutex);
- mask = x86_spec_ctrl_base;
- if (cpu_smt_control == CPU_SMT_ENABLED)
- mask |= SPEC_CTRL_STIBP;
- else
- mask &= ~SPEC_CTRL_STIBP;
-
- if (mask != x86_spec_ctrl_base) {
- pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
- cpu_smt_control == CPU_SMT_ENABLED ?
- "Enabling" : "Disabling");
- x86_spec_ctrl_base = mask;
- on_each_cpu(update_stibp_msr, NULL, 1);
- }
- mutex_unlock(&spec_ctrl_mutex);
-}
-
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -417,14 +520,12 @@ retpoline_auto:
pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
goto retpoline_generic;
}
- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
- SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ mode = SPECTRE_V2_RETPOLINE_AMD;
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
} else {
retpoline_generic:
- mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
- SPECTRE_V2_RETPOLINE_MINIMAL;
+ mode = SPECTRE_V2_RETPOLINE_GENERIC;
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
}
@@ -443,12 +544,6 @@ specv2_set_mode:
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
- /* Initialize Indirect Branch Prediction Barrier if supported */
- if (boot_cpu_has(X86_FEATURE_IBPB)) {
- setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
- pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
- }
-
/*
* Retpoline means the kernel is safe because it has no indirect
* branches. Enhanced IBRS protects firmware too, so, enable restricted
@@ -465,10 +560,67 @@ specv2_set_mode:
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
+ /* Set up IBPB and STIBP depending on the general spectre V2 command */
+ spectre_v2_user_select_mitigation(cmd);
+
/* Enable STIBP if appropriate */
arch_smt_update();
}
+static void update_stibp_msr(void * __unused)
+{
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+}
+
+/* Update x86_spec_ctrl_base in case SMT state changed. */
+static void update_stibp_strict(void)
+{
+ u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
+
+ if (sched_smt_active())
+ mask |= SPEC_CTRL_STIBP;
+
+ if (mask == x86_spec_ctrl_base)
+ return;
+
+ pr_info("Update user space SMT mitigation: STIBP %s\n",
+ mask & SPEC_CTRL_STIBP ? "always-on" : "off");
+ x86_spec_ctrl_base = mask;
+ on_each_cpu(update_stibp_msr, NULL, 1);
+}
+
+/* Update the static key controlling the evaluation of TIF_SPEC_IB */
+static void update_indir_branch_cond(void)
+{
+ if (sched_smt_active())
+ static_branch_enable(&switch_to_cond_stibp);
+ else
+ static_branch_disable(&switch_to_cond_stibp);
+}
+
+void arch_smt_update(void)
+{
+ /* Enhanced IBRS implies STIBP. No update required. */
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ return;
+
+ mutex_lock(&spec_ctrl_mutex);
+
+ switch (spectre_v2_user) {
+ case SPECTRE_V2_USER_NONE:
+ break;
+ case SPECTRE_V2_USER_STRICT:
+ update_stibp_strict();
+ break;
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ update_indir_branch_cond();
+ break;
+ }
+
+ mutex_unlock(&spec_ctrl_mutex);
+}
+
#undef pr_fmt
#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
@@ -483,7 +635,7 @@ enum ssb_mitigation_cmd {
SPEC_STORE_BYPASS_CMD_SECCOMP,
};
-static const char *ssb_strings[] = {
+static const char * const ssb_strings[] = {
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
[SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
[SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
@@ -493,7 +645,7 @@ static const char *ssb_strings[] = {
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] = {
+} ssb_mitigation_options[] __initdata = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
@@ -604,10 +756,25 @@ static void ssb_select_mitigation(void)
#undef pr_fmt
#define pr_fmt(fmt) "Speculation prctl: " fmt
-static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+static void task_update_spec_tif(struct task_struct *tsk)
{
- bool update;
+ /* Force the update of the real TIF bits */
+ set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
+ /*
+ * Immediately update the speculation control MSRs for the current
+ * task, but for a non-current task delay setting the CPU
+ * mitigation until it is scheduled next.
+ *
+ * This can only happen for SECCOMP mitigation. For PRCTL it's
+ * always the current task.
+ */
+ if (tsk == current)
+ speculation_ctrl_update_current();
+}
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
return -ENXIO;
@@ -618,28 +785,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
if (task_spec_ssb_force_disable(task))
return -EPERM;
task_clear_spec_ssb_disable(task);
- update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+ task_update_spec_tif(task);
break;
case PR_SPEC_DISABLE:
task_set_spec_ssb_disable(task);
- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ task_update_spec_tif(task);
break;
case PR_SPEC_FORCE_DISABLE:
task_set_spec_ssb_disable(task);
task_set_spec_ssb_force_disable(task);
- update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ task_update_spec_tif(task);
break;
default:
return -ERANGE;
}
+ return 0;
+}
- /*
- * If being set on non-current task, delay setting the CPU
- * mitigation until it is next scheduled.
- */
- if (task == current && update)
- speculative_store_bypass_update_current();
-
+static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ return 0;
+ /*
+ * Indirect branch speculation is always disabled in strict
+ * mode.
+ */
+ if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+ return -EPERM;
+ task_clear_spec_ib_disable(task);
+ task_update_spec_tif(task);
+ break;
+ case PR_SPEC_DISABLE:
+ case PR_SPEC_FORCE_DISABLE:
+ /*
+ * Indirect branch speculation is always allowed when
+ * mitigation is force disabled.
+ */
+ if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ return -EPERM;
+ if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+ return 0;
+ task_set_spec_ib_disable(task);
+ if (ctrl == PR_SPEC_FORCE_DISABLE)
+ task_set_spec_ib_force_disable(task);
+ task_update_spec_tif(task);
+ break;
+ default:
+ return -ERANGE;
+ }
return 0;
}
@@ -649,6 +844,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_set(task, ctrl);
+ case PR_SPEC_INDIRECT_BRANCH:
+ return ib_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@@ -659,6 +856,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+ if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+ ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif
@@ -681,11 +880,35 @@ static int ssb_prctl_get(struct task_struct *task)
}
}
+static int ib_prctl_get(struct task_struct *task)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return PR_SPEC_NOT_AFFECTED;
+
+ switch (spectre_v2_user) {
+ case SPECTRE_V2_USER_NONE:
+ return PR_SPEC_ENABLE;
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ if (task_spec_ib_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ib_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ case SPECTRE_V2_USER_STRICT:
+ return PR_SPEC_DISABLE;
+ default:
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
{
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_get(task);
+ case PR_SPEC_INDIRECT_BRANCH:
+ return ib_prctl_get(task);
default:
return -ENODEV;
}
@@ -823,7 +1046,7 @@ early_param("l1tf", l1tf_cmdline);
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
#if IS_ENABLED(CONFIG_KVM_INTEL)
-static const char *l1tf_vmx_states[] = {
+static const char * const l1tf_vmx_states[] = {
[VMENTER_L1D_FLUSH_AUTO] = "auto",
[VMENTER_L1D_FLUSH_NEVER] = "vulnerable",
[VMENTER_L1D_FLUSH_COND] = "conditional cache flushes",
@@ -839,13 +1062,14 @@ static ssize_t l1tf_show_state(char *buf)
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
(l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
- cpu_smt_control == CPU_SMT_ENABLED))
+ sched_smt_active())) {
return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation]);
+ }
return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation],
- cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
+ sched_smt_active() ? "vulnerable" : "disabled");
}
#else
static ssize_t l1tf_show_state(char *buf)
@@ -854,11 +1078,39 @@ static ssize_t l1tf_show_state(char *buf)
}
#endif
+static char *stibp_state(void)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ return "";
+
+ switch (spectre_v2_user) {
+ case SPECTRE_V2_USER_NONE:
+ return ", STIBP: disabled";
+ case SPECTRE_V2_USER_STRICT:
+ return ", STIBP: forced";
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ if (static_key_enabled(&switch_to_cond_stibp))
+ return ", STIBP: conditional";
+ }
+ return "";
+}
+
+static char *ibpb_state(void)
+{
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ if (static_key_enabled(&switch_mm_always_ibpb))
+ return ", IBPB: always-on";
+ if (static_key_enabled(&switch_mm_cond_ibpb))
+ return ", IBPB: conditional";
+ return ", IBPB: disabled";
+ }
+ return "";
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
- int ret;
-
if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
@@ -876,13 +1128,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
case X86_BUG_SPECTRE_V2:
- ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "",
+ stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
spectre_v2_module_string());
- return ret;
case X86_BUG_SPEC_STORE_BYPASS:
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8c66d2fc8f81..36d2696c9563 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -485,7 +485,7 @@ static void mce_report_event(struct pt_regs *regs)
* be somewhat complicated (e.g. segment offset would require an instruction
* parser). So only support physical addresses up to page granuality for now.
*/
-static int mce_usable_address(struct mce *m)
+int mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_ADDRV))
return 0;
@@ -505,6 +505,7 @@ static int mce_usable_address(struct mce *m)
return 1;
}
+EXPORT_SYMBOL_GPL(mce_usable_address);
bool mce_is_memory_error(struct mce *m)
{
@@ -534,7 +535,7 @@ bool mce_is_memory_error(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_memory_error);
-static bool mce_is_correctable(struct mce *m)
+bool mce_is_correctable(struct mce *m)
{
if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
return false;
@@ -547,6 +548,7 @@ static bool mce_is_correctable(struct mce *m)
return true;
}
+EXPORT_SYMBOL_GPL(mce_is_correctable);
static bool cec_add_mce(struct mce *m)
{
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index dd33c357548f..e12454e21b8a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -56,7 +56,7 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
-static bool thresholding_en;
+static bool thresholding_irq_en;
static const char * const th_names[] = {
"load_store",
@@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
set_offset:
offset = setup_APIC_mce_threshold(offset, new);
-
- if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
- mce_threshold_vector = amd_threshold_interrupt;
+ if (offset == new)
+ thresholding_irq_en = true;
done:
mce_threshold_block_init(&b, offset);
@@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu)
{
unsigned int bank;
- if (!thresholding_en)
- return 0;
-
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
@@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu)
struct threshold_bank **bp;
int err = 0;
- if (!thresholding_en)
- return 0;
-
bp = per_cpu(threshold_banks, cpu);
if (bp)
return 0;
@@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void)
{
unsigned lcpu = 0;
- if (mce_threshold_vector == amd_threshold_interrupt)
- thresholding_en = true;
-
/* to hit CPUs online before the notifier is up */
for_each_online_cpu(lcpu) {
int err = mce_threshold_create_device(lcpu);
@@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void)
return err;
}
+ if (thresholding_irq_en)
+ mce_threshold_vector = amd_threshold_interrupt;
+
return 0;
}
/*
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 1c72f3819eb1..e81a2db42df7 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -20,6 +20,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kexec.h>
+#include <linux/i8253.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
@@ -295,6 +296,16 @@ static void __init ms_hyperv_init_platform(void)
if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason;
+ /*
+ * Hyper-V VMs have a PIT emulation quirk such that zeroing the
+ * counter register during PIT shutdown restarts the PIT. So it
+ * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
+ * to false tells pit_shutdown() not to zero the counter so that
+ * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
+ * and setting this value has no effect.
+ */
+ i8253_clear_counter_on_shutdown = false;
+
#if IS_ENABLED(CONFIG_HYPERV)
/*
* Setup the hook to get control post apic initialization.
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d9ab49bed8af..0eda91f8eeac 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s)
}
early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
-static unsigned long long vmware_sched_clock(void)
+static unsigned long long notrace vmware_sched_clock(void)
{
unsigned long long ns;
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 61a949d84dfa..d99a8ee9e185 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
}
+ local_bh_disable();
fpu->initialized = 1;
- preempt_disable();
fpu__restore(fpu);
- preempt_enable();
+ local_bh_enable();
return err;
} else {
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 01ebcb6f263e..7ee8067cbf45 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
{
unsigned long old;
int faulted;
- struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
@@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
return;
}
- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
+ if (function_graph_enter(old, self_addr, frame_pointer, parent))
*parent = old;
- return;
- }
-
- if (ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer, parent) == -EBUSY) {
- *parent = old;
- return;
- }
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 76fa3b836598..ec6fefbfd3c0 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void)
cr4_init_shadow();
sanitize_boot_params(&boot_params);
- x86_verify_bootdata_version();
x86_early_init_platform_quirks();
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7663a8eb602b..16b1cbd3a61e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
if (!boot_params.hdr.version)
copy_bootdata(__va(real_mode_data));
- x86_verify_bootdata_version();
-
x86_early_init_platform_quirks();
switch (boot_params.hdr.hardware_subarch) {
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ab18e0884dc6..6135ae8ce036 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -199,14 +199,6 @@ static void sanity_check_ldt_mapping(struct mm_struct *mm)
/*
* If PTI is enabled, this maps the LDT into the kernelmode and
* usermode tables for the given mm.
- *
- * There is no corresponding unmap function. Even if the LDT is freed, we
- * leave the PTEs around until the slot is reused or the mm is destroyed.
- * This is harmless: the LDT is always in ordinary memory, and no one will
- * access the freed slot.
- *
- * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
- * it useful, and the flush would slow down modify_ldt().
*/
static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
@@ -214,8 +206,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
unsigned long va;
bool is_vmalloc;
spinlock_t *ptl;
- pgd_t *pgd;
- int i;
+ int i, nr_pages;
if (!static_cpu_has(X86_FEATURE_PTI))
return 0;
@@ -229,16 +220,11 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
/* Check if the current mappings are sane */
sanity_check_ldt_mapping(mm);
- /*
- * Did we already have the top level entry allocated? We can't
- * use pgd_none() for this because it doens't do anything on
- * 4-level page table kernels.
- */
- pgd = pgd_offset(mm, LDT_BASE_ADDR);
-
is_vmalloc = is_vmalloc_addr(ldt->entries);
- for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
+ nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
+
+ for (i = 0; i < nr_pages; i++) {
unsigned long offset = i << PAGE_SHIFT;
const void *src = (char *)ldt->entries + offset;
unsigned long pfn;
@@ -272,13 +258,39 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
/* Propagate LDT mapping to the user page-table */
map_ldt_struct_to_user(mm);
- va = (unsigned long)ldt_slot_va(slot);
- flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, PAGE_SHIFT, false);
-
ldt->slot = slot;
return 0;
}
+static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
+{
+ unsigned long va;
+ int i, nr_pages;
+
+ if (!ldt)
+ return;
+
+ /* LDT map/unmap is only required for PTI */
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
+
+ for (i = 0; i < nr_pages; i++) {
+ unsigned long offset = i << PAGE_SHIFT;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
+ ptep = get_locked_pte(mm, va, &ptl);
+ pte_clear(mm, va, ptep);
+ pte_unmap_unlock(ptep, ptl);
+ }
+
+ va = (unsigned long)ldt_slot_va(ldt->slot);
+ flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
+}
+
#else /* !CONFIG_PAGE_TABLE_ISOLATION */
static int
@@ -286,6 +298,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
return 0;
}
+
+static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
+{
+}
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
static void free_ldt_pgtables(struct mm_struct *mm)
@@ -524,6 +540,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
}
install_ldt(mm, new_ldt);
+ unmap_ldt_struct(mm, old_ldt);
free_ldt_struct(old_ldt);
error = 0;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c93fcfdf1673..7d31192296a8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -40,6 +40,8 @@
#include <asm/prctl.h>
#include <asm/spec-ctrl.h>
+#include "process.h"
+
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's. The TSS size is kept cacheline-aligned
@@ -252,11 +254,12 @@ void arch_setup_new_exec(void)
enable_cpuid();
}
-static inline void switch_to_bitmap(struct tss_struct *tss,
- struct thread_struct *prev,
+static inline void switch_to_bitmap(struct thread_struct *prev,
struct thread_struct *next,
unsigned long tifp, unsigned long tifn)
{
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
@@ -395,32 +398,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
}
-static __always_inline void intel_set_ssb_state(unsigned long tifn)
+/*
+ * Update the MSRs managing speculation control, during context switch.
+ *
+ * tifp: Previous task's thread flags
+ * tifn: Next task's thread flags
+ */
+static __always_inline void __speculation_ctrl_update(unsigned long tifp,
+ unsigned long tifn)
{
- u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+ unsigned long tif_diff = tifp ^ tifn;
+ u64 msr = x86_spec_ctrl_base;
+ bool updmsr = false;
+
+ /*
+ * If TIF_SSBD is different, select the proper mitigation
+ * method. Note that if SSBD mitigation is disabled or permanentely
+ * enabled this branch can't be taken because nothing can set
+ * TIF_SSBD.
+ */
+ if (tif_diff & _TIF_SSBD) {
+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+ amd_set_ssb_virt_state(tifn);
+ } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+ amd_set_core_ssb_state(tifn);
+ } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ msr |= ssbd_tif_to_spec_ctrl(tifn);
+ updmsr = true;
+ }
+ }
+
+ /*
+ * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
+ * otherwise avoid the MSR write.
+ */
+ if (IS_ENABLED(CONFIG_SMP) &&
+ static_branch_unlikely(&switch_to_cond_stibp)) {
+ updmsr |= !!(tif_diff & _TIF_SPEC_IB);
+ msr |= stibp_tif_to_spec_ctrl(tifn);
+ }
- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ if (updmsr)
+ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
}
-static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
{
- if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
- amd_set_ssb_virt_state(tifn);
- else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
- amd_set_core_ssb_state(tifn);
- else
- intel_set_ssb_state(tifn);
+ if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
+ if (task_spec_ssb_disable(tsk))
+ set_tsk_thread_flag(tsk, TIF_SSBD);
+ else
+ clear_tsk_thread_flag(tsk, TIF_SSBD);
+
+ if (task_spec_ib_disable(tsk))
+ set_tsk_thread_flag(tsk, TIF_SPEC_IB);
+ else
+ clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
+ }
+ /* Return the updated threadinfo flags*/
+ return task_thread_info(tsk)->flags;
}
-void speculative_store_bypass_update(unsigned long tif)
+void speculation_ctrl_update(unsigned long tif)
{
+ /* Forced update. Make sure all relevant TIF flags are different */
preempt_disable();
- __speculative_store_bypass_update(tif);
+ __speculation_ctrl_update(~tif, tif);
preempt_enable();
}
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss)
+/* Called from seccomp/prctl update */
+void speculation_ctrl_update_current(void)
+{
+ preempt_disable();
+ speculation_ctrl_update(speculation_ctrl_update_tif(current));
+ preempt_enable();
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev, *next;
unsigned long tifp, tifn;
@@ -430,7 +486,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
- switch_to_bitmap(tss, prev, next, tifp, tifn);
+ switch_to_bitmap(prev, next, tifp, tifn);
propagate_user_return_notify(prev_p, next_p);
@@ -451,8 +507,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
- if ((tifp ^ tifn) & _TIF_SSBD)
- __speculative_store_bypass_update(tifn);
+ if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
+ __speculation_ctrl_update(tifp, tifn);
+ } else {
+ speculation_ctrl_update_tif(prev_p);
+ tifn = speculation_ctrl_update_tif(next_p);
+
+ /* Enforce MSR update to ensure consistent state */
+ __speculation_ctrl_update(~tifn, tifn);
+ }
}
/*
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
new file mode 100644
index 000000000000..898e97cf6629
--- /dev/null
+++ b/arch/x86/kernel/process.h
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Code shared between 32 and 64 bit
+
+#include <asm/spec-ctrl.h>
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
+
+/*
+ * This needs to be inline to optimize for the common case where no extra
+ * work needs to be done.
+ */
+static inline void switch_to_extra(struct task_struct *prev,
+ struct task_struct *next)
+{
+ unsigned long next_tif = task_thread_info(next)->flags;
+ unsigned long prev_tif = task_thread_info(prev)->flags;
+
+ if (IS_ENABLED(CONFIG_SMP)) {
+ /*
+ * Avoid __switch_to_xtra() invocation when conditional
+ * STIPB is disabled and the only different bit is
+ * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
+ * in the TIF_WORK_CTXSW masks.
+ */
+ if (!static_branch_likely(&switch_to_cond_stibp)) {
+ prev_tif &= ~_TIF_SPEC_IB;
+ next_tif &= ~_TIF_SPEC_IB;
+ }
+ }
+
+ /*
+ * __switch_to_xtra() handles debug registers, i/o bitmaps,
+ * speculation mitigations etc.
+ */
+ if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
+ prev_tif & _TIF_WORK_CTXSW_PREV))
+ __switch_to_xtra(prev, next);
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5046a3c9dec2..d3e593eb189f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,6 +59,8 @@
#include <asm/intel_rdt_sched.h>
#include <asm/proto.h>
+#include "process.h"
+
void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -232,7 +234,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
@@ -264,12 +265,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
set_iopl_mask(next->iopl);
- /*
- * Now maybe handle debug registers and/or IO bitmaps
- */
- if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
- task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
- __switch_to_xtra(prev_p, next_p, tss);
+ switch_to_extra(prev_p, next_p);
/*
* Leave lazy mode, flushing any hypercalls made here.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0e0b4288a4b2..bbfbf017065c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -60,6 +60,8 @@
#include <asm/unistd_32_ia32.h>
#endif
+#include "process.h"
+
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
@@ -553,7 +555,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(irq_count) != -1);
@@ -617,12 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Reload sp0. */
update_task_stack(next_p);
- /*
- * Now maybe reload the debug registers and handle I/O bitmaps
- */
- if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
- task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
- __switch_to_xtra(prev_p, next_p, tss);
+ switch_to_extra(prev_p, next_p);
#ifdef CONFIG_XEN_PV
/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b74e7bfed6ab..d494b9bfe618 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1280,23 +1280,6 @@ void __init setup_arch(char **cmdline_p)
unwind_init();
}
-/*
- * From boot protocol 2.14 onwards we expect the bootloader to set the
- * version to "0x8000 | <used version>". In case we find a version >= 2.14
- * without the 0x8000 we assume the boot loader supports 2.13 only and
- * reset the version accordingly. The 0x8000 flag is removed in any case.
- */
-void __init x86_verify_bootdata_version(void)
-{
- if (boot_params.hdr.version & VERSION_WRITTEN)
- boot_params.hdr.version &= ~VERSION_WRITTEN;
- else if (boot_params.hdr.version >= 0x020e)
- boot_params.hdr.version = 0x020d;
-
- if (boot_params.hdr.version < 0x020e)
- boot_params.hdr.acpi_rsdp_addr = 0;
-}
-
#ifdef CONFIG_X86_32
static struct resource video_ram_resource = {
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 1eae5af491c2..891a75dbc131 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,65 +26,8 @@
#define TOPOLOGY_REGISTER_OFFSET 0x10
-#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_XXL
-/*
- * Interrupt control on vSMPowered systems:
- * ~AC is a shadow of IF. If IF is 'on' AC should be 'off'
- * and vice versa.
- */
-
-asmlinkage __visible unsigned long vsmp_save_fl(void)
-{
- unsigned long flags = native_save_fl();
-
- if (!(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC))
- flags &= ~X86_EFLAGS_IF;
- return flags;
-}
-PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
-
-__visible void vsmp_restore_fl(unsigned long flags)
-{
- if (flags & X86_EFLAGS_IF)
- flags &= ~X86_EFLAGS_AC;
- else
- flags |= X86_EFLAGS_AC;
- native_restore_fl(flags);
-}
-PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
-
-asmlinkage __visible void vsmp_irq_disable(void)
-{
- unsigned long flags = native_save_fl();
-
- native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
-}
-PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
-
-asmlinkage __visible void vsmp_irq_enable(void)
-{
- unsigned long flags = native_save_fl();
-
- native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
-}
-PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
-
-static unsigned __init vsmp_patch(u8 type, void *ibuf,
- unsigned long addr, unsigned len)
-{
- switch (type) {
- case PARAVIRT_PATCH(irq.irq_enable):
- case PARAVIRT_PATCH(irq.irq_disable):
- case PARAVIRT_PATCH(irq.save_fl):
- case PARAVIRT_PATCH(irq.restore_fl):
- return paravirt_patch_default(type, ibuf, addr, len);
- default:
- return native_patch(type, ibuf, addr, len);
- }
-
-}
-
-static void __init set_vsmp_pv_ops(void)
+#ifdef CONFIG_PCI
+static void __init set_vsmp_ctl(void)
{
void __iomem *address;
unsigned int cap, ctl, cfg;
@@ -109,28 +52,12 @@ static void __init set_vsmp_pv_ops(void)
}
#endif
- if (cap & ctl & (1 << 4)) {
- /* Setup irq ops and turn on vSMP IRQ fastpath handling */
- pv_ops.irq.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
- pv_ops.irq.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
- pv_ops.irq.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
- pv_ops.irq.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
- pv_ops.init.patch = vsmp_patch;
- ctl &= ~(1 << 4);
- }
writel(ctl, address + 4);
ctl = readl(address + 4);
pr_info("vSMP CTL: control set to:0x%08x\n", ctl);
early_iounmap(address, 8);
}
-#else
-static void __init set_vsmp_pv_ops(void)
-{
-}
-#endif
-
-#ifdef CONFIG_PCI
static int is_vsmp = -1;
static void __init detect_vsmp_box(void)
@@ -164,11 +91,14 @@ static int is_vsmp_box(void)
{
return 0;
}
+static void __init set_vsmp_ctl(void)
+{
+}
#endif
static void __init vsmp_cap_cpus(void)
{
-#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP)
+#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
void __iomem *address;
unsigned int cfg, topology, node_shift, maxcpus;
@@ -221,6 +151,6 @@ void __init vsmp_init(void)
vsmp_cap_cpus();
- set_vsmp_pv_ops();
+ set_vsmp_ctl();
return;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 89db20f8cb70..c4533d05c214 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -55,7 +55,7 @@
#define PRIo64 "o"
/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
-#define apic_debug(fmt, arg...)
+#define apic_debug(fmt, arg...) do {} while (0)
/* 14 is the version for Xeon and Pentium 8.4.8*/
#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
@@ -576,6 +576,11 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
+ if (unlikely(!map)) {
+ count = -EOPNOTSUPP;
+ goto out;
+ }
+
if (min > map->max_apic_id)
goto out;
/* Bits above cluster_size are masked in the caller. */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cf5f572f2305..7c03c0f35444 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5074,9 +5074,9 @@ static bool need_remote_flush(u64 old, u64 new)
}
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
- const u8 *new, int *bytes)
+ int *bytes)
{
- u64 gentry;
+ u64 gentry = 0;
int r;
/*
@@ -5088,22 +5088,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
*gpa &= ~(gpa_t)7;
*bytes = 8;
- r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
- if (r)
- gentry = 0;
- new = (const u8 *)&gentry;
}
- switch (*bytes) {
- case 4:
- gentry = *(const u32 *)new;
- break;
- case 8:
- gentry = *(const u64 *)new;
- break;
- default:
- gentry = 0;
- break;
+ if (*bytes == 4 || *bytes == 8) {
+ r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
+ if (r)
+ gentry = 0;
}
return gentry;
@@ -5207,8 +5197,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
- gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
-
/*
* No need to care whether allocation memory is successful
* or not since pte prefetch is skiped if it does not have
@@ -5217,6 +5205,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mmu_topup_memory_caches(vcpu);
spin_lock(&vcpu->kvm->mmu_lock);
+
+ gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
+
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0e21ccc46792..cc6467b35a85 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
return vcpu->arch.tsc_offset;
}
-static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0;
@@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+ return svm->vmcb->control.tsc_offset;
}
static void avic_init_vmcb(struct vcpu_svm *svm)
@@ -1664,20 +1665,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
static int avic_init_access_page(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- int ret;
+ int ret = 0;
+ mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page_done)
- return 0;
+ goto out;
- ret = x86_set_memory_region(kvm,
- APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
- APIC_DEFAULT_PHYS_BASE,
- PAGE_SIZE);
+ ret = __x86_set_memory_region(kvm,
+ APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE,
+ PAGE_SIZE);
if (ret)
- return ret;
+ goto out;
kvm->arch.apic_access_page_done = true;
- return 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return ret;
}
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
@@ -2189,21 +2193,31 @@ out:
return ERR_PTR(err);
}
+static void svm_clear_current_vmcb(struct vmcb *vmcb)
+{
+ int i;
+
+ for_each_online_cpu(i)
+ cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
+}
+
static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /*
+ * The vmcb page can be recycled, causing a false negative in
+ * svm_vcpu_load(). So, ensure that no logical CPU has this
+ * vmcb page recorded as its current vmcb.
+ */
+ svm_clear_current_vmcb(svm->vmcb);
+
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
- /*
- * The vmcb page can be recycled, causing a false negative in
- * svm_vcpu_load(). So do a full IBPB now.
- */
- indirect_branch_prediction_barrier();
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -7149,7 +7163,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
.read_l1_tsc_offset = svm_read_l1_tsc_offset,
- .write_tsc_offset = svm_write_tsc_offset,
+ .write_l1_tsc_offset = svm_write_l1_tsc_offset,
.set_tdp_cr3 = set_tdp_cr3,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4555077d69ce..02edd9960e9d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -174,6 +174,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
+module_param(ple_gap, uint, 0444);
static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, uint, 0444);
@@ -984,6 +985,7 @@ struct vcpu_vmx {
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
+ bool guest_msrs_dirty;
unsigned long host_idt_base;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
@@ -1306,7 +1308,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
u16 error_code);
static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
-static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
@@ -1610,12 +1612,6 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- /* We don't support disabling the feature for simplicity. */
- if (vmx->nested.enlightened_vmcs_enabled)
- return 0;
-
- vmx->nested.enlightened_vmcs_enabled = true;
-
/*
* vmcs_version represents the range of supported Enlightened VMCS
* versions: lower 8 bits is the minimal version, higher 8 bits is the
@@ -1625,6 +1621,12 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
if (vmcs_version)
*vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
+ /* We don't support disabling the feature for simplicity. */
+ if (vmx->nested.enlightened_vmcs_enabled)
+ return 0;
+
+ vmx->nested.enlightened_vmcs_enabled = true;
+
vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
@@ -2897,6 +2899,20 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->req_immediate_exit = false;
+ /*
+ * Note that guest MSRs to be saved/restored can also be changed
+ * when guest state is loaded. This happens when guest transitions
+ * to/from long-mode by setting MSR_EFER.LMA.
+ */
+ if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) {
+ vmx->guest_msrs_dirty = false;
+ for (i = 0; i < vmx->save_nmsrs; ++i)
+ kvm_set_shared_msr(vmx->guest_msrs[i].index,
+ vmx->guest_msrs[i].data,
+ vmx->guest_msrs[i].mask);
+
+ }
+
if (vmx->loaded_cpu_state)
return;
@@ -2957,11 +2973,6 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmcs_writel(HOST_GS_BASE, gs_base);
host_state->gs_base = gs_base;
}
-
- for (i = 0; i < vmx->save_nmsrs; ++i)
- kvm_set_shared_msr(vmx->guest_msrs[i].index,
- vmx->guest_msrs[i].data,
- vmx->guest_msrs[i].mask);
}
static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
@@ -3436,6 +3447,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
move_msr_up(vmx, index, save_nmsrs++);
vmx->save_nmsrs = save_nmsrs;
+ vmx->guest_msrs_dirty = true;
if (cpu_has_vmx_msr_bitmap())
vmx_update_msr_bitmap(&vmx->vcpu);
@@ -3452,11 +3464,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
return vcpu->arch.tsc_offset;
}
-/*
- * writes 'offset' into guest's timestamp counter offset register
- */
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
+ u64 active_offset = offset;
if (is_guest_mode(vcpu)) {
/*
* We're here if L1 chose not to trap WRMSR to TSC. According
@@ -3464,17 +3474,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
* set for L2 remains unchanged, and still needs to be added
* to the newly set TSC to get L2's TSC.
*/
- struct vmcs12 *vmcs12;
- /* recalculate vmcs02.TSC_OFFSET: */
- vmcs12 = get_vmcs12(vcpu);
- vmcs_write64(TSC_OFFSET, offset +
- (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
- vmcs12->tsc_offset : 0));
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING))
+ active_offset += vmcs12->tsc_offset;
} else {
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
vmcs_read64(TSC_OFFSET), offset);
- vmcs_write64(TSC_OFFSET, offset);
}
+
+ vmcs_write64(TSC_OFFSET, active_offset);
+ return active_offset;
}
/*
@@ -5944,7 +5953,7 @@ static void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
-static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type)
{
int f = sizeof(unsigned long);
@@ -5982,7 +5991,7 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit
}
}
-static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type)
{
int f = sizeof(unsigned long);
@@ -6020,7 +6029,7 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm
}
}
-static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type, bool value)
{
if (value)
@@ -8664,8 +8673,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
- vmcs12->hdr.revision_id = evmcs->revision_id;
-
/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
vmcs12->tpr_threshold = evmcs->tpr_threshold;
vmcs12->guest_rip = evmcs->guest_rip;
@@ -9369,7 +9376,30 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
- if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) {
+ /*
+ * Currently, KVM only supports eVMCS version 1
+ * (== KVM_EVMCS_VERSION) and thus we expect guest to set this
+ * value to first u32 field of eVMCS which should specify eVMCS
+ * VersionNumber.
+ *
+ * Guest should be aware of supported eVMCS versions by host by
+ * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is
+ * expected to set this CPUID leaf according to the value
+ * returned in vmcs_version from nested_enable_evmcs().
+ *
+ * However, it turns out that Microsoft Hyper-V fails to comply
+ * to their own invented interface: When Hyper-V use eVMCS, it
+ * just sets first u32 field of eVMCS to revision_id specified
+ * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number
+ * which is one of the supported versions specified in
+ * CPUID.0x4000000A.EAX[0:15].
+ *
+ * To overcome Hyper-V bug, we accept here either a supported
+ * eVMCS version or VMCS12 revision_id as valid values for first
+ * u32 field of eVMCS.
+ */
+ if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
+ (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
nested_release_evmcs(vcpu);
return 0;
}
@@ -9390,9 +9420,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
* present in struct hv_enlightened_vmcs, ...). Make sure there
* are no leftovers.
*/
- if (from_launch)
- memset(vmx->nested.cached_vmcs12, 0,
- sizeof(*vmx->nested.cached_vmcs12));
+ if (from_launch) {
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ memset(vmcs12, 0, sizeof(*vmcs12));
+ vmcs12->hdr.revision_id = VMCS12_REVISION;
+ }
}
return 1;
@@ -15062,7 +15094,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
.read_l1_tsc_offset = vmx_read_l1_tsc_offset,
- .write_tsc_offset = vmx_write_tsc_offset,
+ .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
.set_tdp_cr3 = vmx_set_cr3,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5cd5647120f2..d02937760c3b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1665,8 +1665,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
- kvm_x86_ops->write_tsc_offset(vcpu, offset);
- vcpu->arch.tsc_offset = offset;
+ vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
}
static inline bool kvm_check_tsc_unstable(void)
@@ -1794,7 +1793,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
{
- kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
+ u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+ kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
}
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -6918,6 +6918,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
clock_pairing.nsec = ts.tv_nsec;
clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
clock_pairing.flags = 0;
+ memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
ret = 0;
if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
@@ -7455,7 +7456,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
else {
if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+ if (ioapic_in_kernel(vcpu->kvm))
+ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
if (is_guest_mode(vcpu))
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bddd6b3cee1d..03b6b4c2238d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -7,7 +7,6 @@
#include <linux/export.h>
#include <linux/cpu.h>
#include <linux/debugfs.h>
-#include <linux/ptrace.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -31,6 +30,12 @@
*/
/*
+ * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
+ * stored in cpu_tlb_state.last_user_mm_ibpb.
+ */
+#define LAST_USER_MM_IBPB 0x1UL
+
+/*
* We get here when we do something requiring a TLB invalidation
* but could not go invalidate all of the contexts. We do the
* necessary invalidation by clearing out the 'ctx_id' which
@@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
}
}
-static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id)
+static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
+{
+ unsigned long next_tif = task_thread_info(next)->flags;
+ unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
+
+ return (unsigned long)next->mm | ibpb;
+}
+
+static void cond_ibpb(struct task_struct *next)
{
+ if (!next || !next->mm)
+ return;
+
/*
- * Check if the current (previous) task has access to the memory
- * of the @tsk (next) task. If access is denied, make sure to
- * issue a IBPB to stop user->user Spectre-v2 attacks.
- *
- * Note: __ptrace_may_access() returns 0 or -ERRNO.
+ * Both, the conditional and the always IBPB mode use the mm
+ * pointer to avoid the IBPB when switching between tasks of the
+ * same process. Using the mm pointer instead of mm->context.ctx_id
+ * opens a hypothetical hole vs. mm_struct reuse, which is more or
+ * less impossible to control by an attacker. Aside of that it
+ * would only affect the first schedule so the theoretically
+ * exposed data is not really interesting.
*/
- return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id &&
- ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB));
+ if (static_branch_likely(&switch_mm_cond_ibpb)) {
+ unsigned long prev_mm, next_mm;
+
+ /*
+ * This is a bit more complex than the always mode because
+ * it has to handle two cases:
+ *
+ * 1) Switch from a user space task (potential attacker)
+ * which has TIF_SPEC_IB set to a user space task
+ * (potential victim) which has TIF_SPEC_IB not set.
+ *
+ * 2) Switch from a user space task (potential attacker)
+ * which has TIF_SPEC_IB not set to a user space task
+ * (potential victim) which has TIF_SPEC_IB set.
+ *
+ * This could be done by unconditionally issuing IBPB when
+ * a task which has TIF_SPEC_IB set is either scheduled in
+ * or out. Though that results in two flushes when:
+ *
+ * - the same user space task is scheduled out and later
+ * scheduled in again and only a kernel thread ran in
+ * between.
+ *
+ * - a user space task belonging to the same process is
+ * scheduled in after a kernel thread ran in between
+ *
+ * - a user space task belonging to the same process is
+ * scheduled in immediately.
+ *
+ * Optimize this with reasonably small overhead for the
+ * above cases. Mangle the TIF_SPEC_IB bit into the mm
+ * pointer of the incoming task which is stored in
+ * cpu_tlbstate.last_user_mm_ibpb for comparison.
+ */
+ next_mm = mm_mangle_tif_spec_ib(next);
+ prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
+
+ /*
+ * Issue IBPB only if the mm's are different and one or
+ * both have the IBPB bit set.
+ */
+ if (next_mm != prev_mm &&
+ (next_mm | prev_mm) & LAST_USER_MM_IBPB)
+ indirect_branch_prediction_barrier();
+
+ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
+ }
+
+ if (static_branch_unlikely(&switch_mm_always_ibpb)) {
+ /*
+ * Only flush when switching to a user space task with a
+ * different context than the user space task which ran
+ * last on this CPU.
+ */
+ if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
+ indirect_branch_prediction_barrier();
+ this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
+ }
+ }
}
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
@@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
new_asid = prev_asid;
need_flush = true;
} else {
- u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
-
/*
* Avoid user/user BTB poisoning by flushing the branch
* predictor when switching between processes. This stops
* one process from doing Spectre-v2 attacks on another.
- *
- * As an optimization, flush indirect branches only when
- * switching into a processes that can't be ptrace by the
- * current one (as in such case, attacker has much more
- * convenient way how to tamper with the next process than
- * branch buffer poisoning).
*/
- if (static_cpu_has(X86_FEATURE_USE_IBPB) &&
- ibpb_needed(tsk, last_ctx_id))
- indirect_branch_prediction_barrier();
+ cond_ibpb(tsk);
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
@@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
- /*
- * Record last user mm's context id, so we can avoid
- * flushing branch buffer with IBPB if we switch back
- * to the same user.
- */
- if (next != &init_mm)
- this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
-
/* Make sure we write CR3 before loaded_mm. */
barrier();
@@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void)
write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
- this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e996e8e744cb..750f46ad018a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -10,7 +10,6 @@
#include <xen/xen.h>
#include <xen/features.h>
#include <xen/page.h>
-#include <xen/interface/memory.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -346,80 +345,3 @@ void xen_arch_unregister_cpu(int num)
}
EXPORT_SYMBOL(xen_arch_unregister_cpu);
#endif
-
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-void __init arch_xen_balloon_init(struct resource *hostmem_resource)
-{
- struct xen_memory_map memmap;
- int rc;
- unsigned int i, last_guest_ram;
- phys_addr_t max_addr = PFN_PHYS(max_pfn);
- struct e820_table *xen_e820_table;
- const struct e820_entry *entry;
- struct resource *res;
-
- if (!xen_initial_domain())
- return;
-
- xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
- if (!xen_e820_table)
- return;
-
- memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
- set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
- rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
- if (rc) {
- pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
- goto out;
- }
-
- last_guest_ram = 0;
- for (i = 0; i < memmap.nr_entries; i++) {
- if (xen_e820_table->entries[i].addr >= max_addr)
- break;
- if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
- last_guest_ram = i;
- }
-
- entry = &xen_e820_table->entries[last_guest_ram];
- if (max_addr >= entry->addr + entry->size)
- goto out; /* No unallocated host RAM. */
-
- hostmem_resource->start = max_addr;
- hostmem_resource->end = entry->addr + entry->size;
-
- /*
- * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
- * as unavailable. The rest of that region can be used for hotplug-based
- * ballooning.
- */
- for (; i < memmap.nr_entries; i++) {
- entry = &xen_e820_table->entries[i];
-
- if (entry->type == E820_TYPE_RAM)
- continue;
-
- if (entry->addr >= hostmem_resource->end)
- break;
-
- res = kzalloc(sizeof(*res), GFP_KERNEL);
- if (!res)
- goto out;
-
- res->name = "Unavailable host RAM";
- res->start = entry->addr;
- res->end = (entry->addr + entry->size < hostmem_resource->end) ?
- entry->addr + entry->size : hostmem_resource->end;
- rc = insert_resource(hostmem_resource, res);
- if (rc) {
- pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
- __func__, res->start, res->end, rc);
- kfree(res);
- goto out;
- }
- }
-
- out:
- kfree(xen_e820_table);
-}
-#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 0d7b3ae4960b..a5d7ed125337 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1905,7 +1905,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
init_top_pgt[0] = __pgd(0);
/* Pre-constructed entries are in pfn, so convert to mfn */
- /* L4[272] -> level3_ident_pgt */
+ /* L4[273] -> level3_ident_pgt */
/* L4[511] -> level3_kernel_pgt */
convert_pfn_mfn(init_top_pgt);
@@ -1925,8 +1925,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
addr[0] = (unsigned long)pgd;
addr[1] = (unsigned long)l3;
addr[2] = (unsigned long)l2;
- /* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
- * Both L4[272][0] and L4[511][510] have entries that point to the same
+ /* Graft it onto L4[273][0]. Note that we creating an aliasing problem:
+ * Both L4[273][0] and L4[511][510] have entries that point to the same
* L2 (PMD) tables. Meaning that if you modify it in __va space
* it will be also modified in the __ka space! (But if you just
* modify the PMD table to point to other PTE's or none, then you
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 2bce7958ce8b..0766a08bdf45 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -69,6 +69,11 @@ void xen_mc_flush(void)
trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
+#if MC_DEBUG
+ memcpy(b->debug, b->entries,
+ b->mcidx * sizeof(struct multicall_entry));
+#endif
+
switch (b->mcidx) {
case 0:
/* no-op */
@@ -87,32 +92,34 @@ void xen_mc_flush(void)
break;
default:
-#if MC_DEBUG
- memcpy(b->debug, b->entries,
- b->mcidx * sizeof(struct multicall_entry));
-#endif
-
if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
BUG();
for (i = 0; i < b->mcidx; i++)
if (b->entries[i].result < 0)
ret++;
+ }
+ if (WARN_ON(ret)) {
+ pr_err("%d of %d multicall(s) failed: cpu %d\n",
+ ret, b->mcidx, smp_processor_id());
+ for (i = 0; i < b->mcidx; i++) {
+ if (b->entries[i].result < 0) {
#if MC_DEBUG
- if (ret) {
- printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
- ret, smp_processor_id());
- dump_stack();
- for (i = 0; i < b->mcidx; i++) {
- printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
- i+1, b->mcidx,
+ pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n",
+ i + 1,
b->debug[i].op,
b->debug[i].args[0],
b->entries[i].result,
b->caller[i]);
+#else
+ pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n",
+ i + 1,
+ b->entries[i].op,
+ b->entries[i].args[0],
+ b->entries[i].result);
+#endif
}
}
-#endif
}
b->mcidx = 0;
@@ -126,8 +133,6 @@ void xen_mc_flush(void)
b->cbidx = 0;
local_irq_restore(flags);
-
- WARN_ON(ret);
}
struct multicall_space __xen_mc_entry(size_t args)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index b06731705529..055e37e43541 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -656,8 +656,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
/*
* The interface requires atomic updates on p2m elements.
- * xen_safe_write_ulong() is using __put_user which does an atomic
- * store via asm().
+ * xen_safe_write_ulong() is using an atomic store via asm().
*/
if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
return true;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1163e33121fb..075ed47993bb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,6 +808,7 @@ char * __init xen_memory_setup(void)
addr = xen_e820_table.entries[0].addr;
size = xen_e820_table.entries[0].size;
while (i < xen_e820_table.nr_entries) {
+ bool discard = false;
chunk_size = size;
type = xen_e820_table.entries[i].type;
@@ -823,10 +824,11 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(pfn_s, n_pfns);
xen_max_p2m_pfn = pfn_s + n_pfns;
} else
- type = E820_TYPE_UNUSABLE;
+ discard = true;
}
- xen_align_and_add_e820_region(addr, chunk_size, type);
+ if (!discard)
+ xen_align_and_add_e820_region(addr, chunk_size, type);
addr += chunk_size;
size -= chunk_size;
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 441c88262169..3776122c87cc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -3,24 +3,21 @@
* Split spinlock implementation out into its own file, so it can be
* compiled in a FTRACE-compatible way.
*/
-#include <linux/kernel_stat.h>
+#include <linux/kernel.h>
#include <linux/spinlock.h>
-#include <linux/debugfs.h>
-#include <linux/log2.h>
-#include <linux/gfp.h>
#include <linux/slab.h>
+#include <linux/atomic.h>
#include <asm/paravirt.h>
#include <asm/qspinlock.h>
-#include <xen/interface/xen.h>
#include <xen/events.h>
#include "xen-ops.h"
-#include "debugfs.h"
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(char *, irq_name);
+static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest);
static bool xen_pvspin = true;
static void xen_qlock_kick(int cpu)
@@ -39,25 +36,25 @@ static void xen_qlock_kick(int cpu)
*/
static void xen_qlock_wait(u8 *byte, u8 val)
{
- unsigned long flags;
int irq = __this_cpu_read(lock_kicker_irq);
+ atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest);
/* If kicker interrupts not initialized yet, just spin */
if (irq == -1 || in_nmi())
return;
- /* Guard against reentry. */
- local_irq_save(flags);
+ /* Detect reentry. */
+ atomic_inc(nest_cnt);
- /* If irq pending already clear it. */
- if (xen_test_irq_pending(irq)) {
+ /* If irq pending already and no nested call clear it. */
+ if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) {
xen_clear_irq_pending(irq);
} else if (READ_ONCE(*byte) == val) {
/* Block until irq becomes pending (or a spurious wakeup) */
xen_poll_irq(irq);
}
- local_irq_restore(flags);
+ atomic_dec(nest_cnt);
}
static irqreturn_t dummy_handler(int irq, void *dev_id)
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index be9bfd9aa865..34a23016dd14 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -23,7 +23,11 @@
# error Linux requires the Xtensa Windowed Registers Option.
#endif
-#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH
+/* Xtensa ABI requires stack alignment to be at least 16 */
+
+#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16)
+
+#define ARCH_SLAB_MINALIGN STACK_ALIGN
/*
* User space process size: 1 GB.
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 67904f55f188..120dd746a147 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -94,14 +94,14 @@ int main(void)
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
#if XTENSA_HAVE_COPROCESSORS
- DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp));
- DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp));
+ DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
+ DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
+ DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2));
+ DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3));
+ DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4));
+ DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5));
+ DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6));
+ DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7));
#endif
DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index 2f76118ecf62..9053a5622d2c 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -88,9 +88,12 @@ _SetupMMU:
initialize_mmu
#if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
rsr a2, excsave1
- movi a3, 0x08000000
+ movi a3, XCHAL_KSEG_PADDR
+ bltu a2, a3, 1f
+ sub a2, a2, a3
+ movi a3, XCHAL_KSEG_SIZE
bgeu a2, a3, 1f
- movi a3, 0xd0000000
+ movi a3, XCHAL_KSEG_CACHED_VADDR
add a2, a2, a3
wsr a2, excsave1
1:
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 483dcfb6e681..4bb68133a72a 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -94,18 +94,21 @@ void coprocessor_release_all(struct thread_info *ti)
void coprocessor_flush_all(struct thread_info *ti)
{
- unsigned long cpenable;
+ unsigned long cpenable, old_cpenable;
int i;
preempt_disable();
+ RSR_CPENABLE(old_cpenable);
cpenable = ti->cpenable;
+ WSR_CPENABLE(cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
cpenable >>= 1;
}
+ WSR_CPENABLE(old_cpenable);
preempt_enable();
}
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index c0845cb1cbb9..d9541be0605a 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs)
}
+#if XTENSA_HAVE_COPROCESSORS
+#define CP_OFFSETS(cp) \
+ { \
+ .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \
+ .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \
+ .sz = sizeof(xtregs_ ## cp ## _t), \
+ }
+
+static const struct {
+ size_t elf_xtregs_offset;
+ size_t ti_offset;
+ size_t sz;
+} cp_offsets[] = {
+ CP_OFFSETS(cp0),
+ CP_OFFSETS(cp1),
+ CP_OFFSETS(cp2),
+ CP_OFFSETS(cp3),
+ CP_OFFSETS(cp4),
+ CP_OFFSETS(cp5),
+ CP_OFFSETS(cp6),
+ CP_OFFSETS(cp7),
+};
+#endif
+
static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
{
struct pt_regs *regs = task_pt_regs(child);
struct thread_info *ti = task_thread_info(child);
elf_xtregs_t __user *xtregs = uregs;
int ret = 0;
+ int i __maybe_unused;
if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t)))
return -EIO;
@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
#if XTENSA_HAVE_COPROCESSORS
/* Flush all coprocessor registers to memory. */
coprocessor_flush_all(ti);
- ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp,
- sizeof(xtregs_coprocessor_t));
+
+ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
+ ret |= __copy_to_user((char __user *)xtregs +
+ cp_offsets[i].elf_xtregs_offset,
+ (const char *)ti +
+ cp_offsets[i].ti_offset,
+ cp_offsets[i].sz);
#endif
ret |= __copy_to_user(&xtregs->opt, &regs->xtregs_opt,
sizeof(xtregs->opt));
@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
struct pt_regs *regs = task_pt_regs(child);
elf_xtregs_t *xtregs = uregs;
int ret = 0;
+ int i __maybe_unused;
if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t)))
return -EFAULT;
@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
coprocessor_flush_all(ti);
coprocessor_release_all(ti);
- ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
- sizeof(xtregs_coprocessor_t));
+ for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
+ ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset,
+ (const char __user *)xtregs +
+ cp_offsets[i].elf_xtregs_offset,
+ cp_offsets[i].sz);
#endif
ret |= __copy_from_user(&regs->xtregs_opt, &xtregs->opt,
sizeof(xtregs->opt));
OpenPOWER on IntegriCloud