summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile7
-rw-r--r--arch/powerpc/kernel/align.c774
-rw-r--r--arch/powerpc/kernel/asm-offsets.c17
-rw-r--r--arch/powerpc/kernel/audit.c1
-rw-r--r--arch/powerpc/kernel/btext.c3
-rw-r--r--arch/powerpc/kernel/cacheinfo.c34
-rw-r--r--arch/powerpc/kernel/cacheinfo.h1
-rw-r--r--arch/powerpc/kernel/compat_audit.c1
-rw-r--r--arch/powerpc/kernel/cputable.c32
-rw-r--r--arch/powerpc/kernel/dma-iommu.c1
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c8
-rw-r--r--arch/powerpc/kernel/eeh.c70
-rw-r--r--arch/powerpc/kernel/eeh_dev.c25
-rw-r--r--arch/powerpc/kernel/eeh_driver.c8
-rw-r--r--arch/powerpc/kernel/eeh_pe.c98
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c3
-rw-r--r--arch/powerpc/kernel/entry_32.S22
-rw-r--r--arch/powerpc/kernel/entry_64.S73
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S115
-rw-r--r--arch/powerpc/kernel/fadump.c48
-rw-r--r--arch/powerpc/kernel/fsl_booke_entry_mapping.S1
-rw-r--r--arch/powerpc/kernel/head_32.S6
-rw-r--r--arch/powerpc/kernel/head_64.S24
-rw-r--r--arch/powerpc/kernel/head_8xx.S109
-rw-r--r--arch/powerpc/kernel/head_booke.h1
-rw-r--r--arch/powerpc/kernel/idle_book3s.S116
-rw-r--r--arch/powerpc/kernel/io-workarounds.c9
-rw-r--r--arch/powerpc/kernel/iomap.c1
-rw-r--r--arch/powerpc/kernel/iommu.c7
-rw-r--r--arch/powerpc/kernel/irq.c129
-rw-r--r--arch/powerpc/kernel/isa-bridge.c32
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c34
-rw-r--r--arch/powerpc/kernel/kprobes.c99
-rw-r--r--arch/powerpc/kernel/kvm.c7
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S4
-rw-r--r--arch/powerpc/kernel/legacy_serial.c13
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c4
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c12
-rw-r--r--arch/powerpc/kernel/mce.c170
-rw-r--r--arch/powerpc/kernel/mce_power.c128
-rw-r--r--arch/powerpc/kernel/module_64.c3
-rw-r--r--arch/powerpc/kernel/of_platform.c2
-rw-r--r--arch/powerpc/kernel/optprobes.c19
-rw-r--r--arch/powerpc/kernel/optprobes_head.S8
-rw-r--r--arch/powerpc/kernel/paca.c31
-rw-r--r--arch/powerpc/kernel/pci-common.c27
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/pci_64.c8
-rw-r--r--arch/powerpc/kernel/pci_dn.c22
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c24
-rw-r--r--arch/powerpc/kernel/process.c329
-rw-r--r--arch/powerpc/kernel/prom.c37
-rw-r--r--arch/powerpc/kernel/prom_init.c34
-rw-r--r--arch/powerpc/kernel/prom_parse.c1
-rw-r--r--arch/powerpc/kernel/ptrace.c55
-rw-r--r--arch/powerpc/kernel/reloc_64.S6
-rw-r--r--arch/powerpc/kernel/rtas-proc.c1
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c1
-rw-r--r--arch/powerpc/kernel/rtas.c6
-rw-r--r--arch/powerpc/kernel/rtas_pci.c33
-rw-r--r--arch/powerpc/kernel/setup-common.c48
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_32.c7
-rw-r--r--arch/powerpc/kernel/setup_64.c58
-rw-r--r--arch/powerpc/kernel/signal.c2
-rw-r--r--arch/powerpc/kernel/signal_32.c57
-rw-r--r--arch/powerpc/kernel/signal_64.c20
-rw-r--r--arch/powerpc/kernel/smp-tbsync.c1
-rw-r--r--arch/powerpc/kernel/smp.c240
-rw-r--r--arch/powerpc/kernel/swsusp_32.S1
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/swsusp_booke.S1
-rw-r--r--arch/powerpc/kernel/sysfs.c11
-rw-r--r--arch/powerpc/kernel/systbl.S14
-rw-r--r--arch/powerpc/kernel/tau_6xx.c6
-rw-r--r--arch/powerpc/kernel/tm.S60
-rw-r--r--arch/powerpc/kernel/trace/Makefile1
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S49
-rw-r--r--arch/powerpc/kernel/traps.c573
-rw-r--r--arch/powerpc/kernel/uprobes.c9
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile1
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S12
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S1
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S1
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile1
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S1
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S1
-rw-r--r--arch/powerpc/kernel/vecemu.c1
-rw-r--r--arch/powerpc/kernel/vector.S1
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S3
-rw-r--r--arch/powerpc/kernel/watchdog.c113
93 files changed, 2161 insertions, 2044 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 4aa7c147e447..1b6bc7fba996 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux kernel.
#
@@ -38,7 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
obj-$(CONFIG_VDSO32) += vdso32/
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o
+obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
@@ -83,7 +84,7 @@ extra-y := head_$(BITS).o
extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
-extra-$(CONFIG_8xx) := head_8xx.o
+extra-$(CONFIG_PPC_8xx) := head_8xx.o
extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
@@ -128,7 +129,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
-ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),)
+ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),)
obj-y += ppc_save_regs.o
endif
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index ec7a8b099dd9..3e6c0744c174 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -27,6 +27,7 @@
#include <asm/switch_to.h>
#include <asm/disassemble.h>
#include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
struct aligninfo {
unsigned char len;
@@ -40,364 +41,9 @@ struct aligninfo {
#define LD 0 /* load */
#define ST 1 /* store */
#define SE 2 /* sign-extend value, or FP ld/st as word */
-#define F 4 /* to/from fp regs */
-#define U 8 /* update index register */
-#define M 0x10 /* multiple load/store */
#define SW 0x20 /* byte swap */
-#define S 0x40 /* single-precision fp or... */
-#define SX 0x40 /* ... byte count in XER */
-#define HARD 0x80 /* string, stwcx. */
#define E4 0x40 /* SPE endianness is word */
#define E8 0x80 /* SPE endianness is double word */
-#define SPLT 0x80 /* VSX SPLAT load */
-
-/* DSISR bits reported for a DCBZ instruction: */
-#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
-
-/*
- * The PowerPC stores certain bits of the instruction that caused the
- * alignment exception in the DSISR register. This array maps those
- * bits to information about the operand length and what the
- * instruction would do.
- */
-static struct aligninfo aligninfo[128] = {
- { 4, LD }, /* 00 0 0000: lwz / lwarx */
- INVALID, /* 00 0 0001 */
- { 4, ST }, /* 00 0 0010: stw */
- INVALID, /* 00 0 0011 */
- { 2, LD }, /* 00 0 0100: lhz */
- { 2, LD+SE }, /* 00 0 0101: lha */
- { 2, ST }, /* 00 0 0110: sth */
- { 4, LD+M }, /* 00 0 0111: lmw */
- { 4, LD+F+S }, /* 00 0 1000: lfs */
- { 8, LD+F }, /* 00 0 1001: lfd */
- { 4, ST+F+S }, /* 00 0 1010: stfs */
- { 8, ST+F }, /* 00 0 1011: stfd */
- { 16, LD }, /* 00 0 1100: lq */
- { 8, LD }, /* 00 0 1101: ld/ldu/lwa */
- INVALID, /* 00 0 1110 */
- { 8, ST }, /* 00 0 1111: std/stdu */
- { 4, LD+U }, /* 00 1 0000: lwzu */
- INVALID, /* 00 1 0001 */
- { 4, ST+U }, /* 00 1 0010: stwu */
- INVALID, /* 00 1 0011 */
- { 2, LD+U }, /* 00 1 0100: lhzu */
- { 2, LD+SE+U }, /* 00 1 0101: lhau */
- { 2, ST+U }, /* 00 1 0110: sthu */
- { 4, ST+M }, /* 00 1 0111: stmw */
- { 4, LD+F+S+U }, /* 00 1 1000: lfsu */
- { 8, LD+F+U }, /* 00 1 1001: lfdu */
- { 4, ST+F+S+U }, /* 00 1 1010: stfsu */
- { 8, ST+F+U }, /* 00 1 1011: stfdu */
- { 16, LD+F }, /* 00 1 1100: lfdp */
- INVALID, /* 00 1 1101 */
- { 16, ST+F }, /* 00 1 1110: stfdp */
- INVALID, /* 00 1 1111 */
- { 8, LD }, /* 01 0 0000: ldx */
- INVALID, /* 01 0 0001 */
- { 8, ST }, /* 01 0 0010: stdx */
- INVALID, /* 01 0 0011 */
- INVALID, /* 01 0 0100 */
- { 4, LD+SE }, /* 01 0 0101: lwax */
- INVALID, /* 01 0 0110 */
- INVALID, /* 01 0 0111 */
- { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */
- { 4, LD+M+HARD }, /* 01 0 1001: lswi */
- { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */
- { 4, ST+M+HARD }, /* 01 0 1011: stswi */
- INVALID, /* 01 0 1100 */
- { 8, LD+U }, /* 01 0 1101: ldu */
- INVALID, /* 01 0 1110 */
- { 8, ST+U }, /* 01 0 1111: stdu */
- { 8, LD+U }, /* 01 1 0000: ldux */
- INVALID, /* 01 1 0001 */
- { 8, ST+U }, /* 01 1 0010: stdux */
- INVALID, /* 01 1 0011 */
- INVALID, /* 01 1 0100 */
- { 4, LD+SE+U }, /* 01 1 0101: lwaux */
- INVALID, /* 01 1 0110 */
- INVALID, /* 01 1 0111 */
- INVALID, /* 01 1 1000 */
- INVALID, /* 01 1 1001 */
- INVALID, /* 01 1 1010 */
- INVALID, /* 01 1 1011 */
- INVALID, /* 01 1 1100 */
- INVALID, /* 01 1 1101 */
- INVALID, /* 01 1 1110 */
- INVALID, /* 01 1 1111 */
- INVALID, /* 10 0 0000 */
- INVALID, /* 10 0 0001 */
- INVALID, /* 10 0 0010: stwcx. */
- INVALID, /* 10 0 0011 */
- INVALID, /* 10 0 0100 */
- INVALID, /* 10 0 0101 */
- INVALID, /* 10 0 0110 */
- INVALID, /* 10 0 0111 */
- { 4, LD+SW }, /* 10 0 1000: lwbrx */
- INVALID, /* 10 0 1001 */
- { 4, ST+SW }, /* 10 0 1010: stwbrx */
- INVALID, /* 10 0 1011 */
- { 2, LD+SW }, /* 10 0 1100: lhbrx */
- { 4, LD+SE }, /* 10 0 1101 lwa */
- { 2, ST+SW }, /* 10 0 1110: sthbrx */
- { 16, ST }, /* 10 0 1111: stq */
- INVALID, /* 10 1 0000 */
- INVALID, /* 10 1 0001 */
- INVALID, /* 10 1 0010 */
- INVALID, /* 10 1 0011 */
- INVALID, /* 10 1 0100 */
- INVALID, /* 10 1 0101 */
- INVALID, /* 10 1 0110 */
- INVALID, /* 10 1 0111 */
- INVALID, /* 10 1 1000 */
- INVALID, /* 10 1 1001 */
- INVALID, /* 10 1 1010 */
- INVALID, /* 10 1 1011 */
- INVALID, /* 10 1 1100 */
- INVALID, /* 10 1 1101 */
- INVALID, /* 10 1 1110 */
- { 0, ST+HARD }, /* 10 1 1111: dcbz */
- { 4, LD }, /* 11 0 0000: lwzx */
- INVALID, /* 11 0 0001 */
- { 4, ST }, /* 11 0 0010: stwx */
- INVALID, /* 11 0 0011 */
- { 2, LD }, /* 11 0 0100: lhzx */
- { 2, LD+SE }, /* 11 0 0101: lhax */
- { 2, ST }, /* 11 0 0110: sthx */
- INVALID, /* 11 0 0111 */
- { 4, LD+F+S }, /* 11 0 1000: lfsx */
- { 8, LD+F }, /* 11 0 1001: lfdx */
- { 4, ST+F+S }, /* 11 0 1010: stfsx */
- { 8, ST+F }, /* 11 0 1011: stfdx */
- { 16, LD+F }, /* 11 0 1100: lfdpx */
- { 4, LD+F+SE }, /* 11 0 1101: lfiwax */
- { 16, ST+F }, /* 11 0 1110: stfdpx */
- { 4, ST+F }, /* 11 0 1111: stfiwx */
- { 4, LD+U }, /* 11 1 0000: lwzux */
- INVALID, /* 11 1 0001 */
- { 4, ST+U }, /* 11 1 0010: stwux */
- INVALID, /* 11 1 0011 */
- { 2, LD+U }, /* 11 1 0100: lhzux */
- { 2, LD+SE+U }, /* 11 1 0101: lhaux */
- { 2, ST+U }, /* 11 1 0110: sthux */
- INVALID, /* 11 1 0111 */
- { 4, LD+F+S+U }, /* 11 1 1000: lfsux */
- { 8, LD+F+U }, /* 11 1 1001: lfdux */
- { 4, ST+F+S+U }, /* 11 1 1010: stfsux */
- { 8, ST+F+U }, /* 11 1 1011: stfdux */
- INVALID, /* 11 1 1100 */
- { 4, LD+F }, /* 11 1 1101: lfiwzx */
- INVALID, /* 11 1 1110 */
- INVALID, /* 11 1 1111 */
-};
-
-/*
- * The dcbz (data cache block zero) instruction
- * gives an alignment fault if used on non-cacheable
- * memory. We handle the fault mainly for the
- * case when we are running with the cache disabled
- * for debugging.
- */
-static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
-{
- long __user *p;
- int i, size;
-
-#ifdef __powerpc64__
- size = ppc64_caches.l1d.block_size;
-#else
- size = L1_CACHE_BYTES;
-#endif
- p = (long __user *) (regs->dar & -size);
- if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
- return -EFAULT;
- for (i = 0; i < size / sizeof(long); ++i)
- if (__put_user_inatomic(0, p+i))
- return -EFAULT;
- return 1;
-}
-
-/*
- * Emulate load & store multiple instructions
- * On 64-bit machines, these instructions only affect/use the
- * bottom 4 bytes of each register, and the loads clear the
- * top 4 bytes of the affected register.
- */
-#ifdef __BIG_ENDIAN__
-#ifdef CONFIG_PPC64
-#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
-#else
-#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
-#endif
-#else
-#define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3))))
-#endif
-
-#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
-
-static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int nb,
- unsigned int flags, unsigned int instr,
- unsigned long swiz)
-{
- unsigned long *rptr;
- unsigned int nb0, i, bswiz;
- unsigned long p;
-
- /*
- * We do not try to emulate 8 bytes multiple as they aren't really
- * available in our operating environments and we don't try to
- * emulate multiples operations in kernel land as they should never
- * be used/generated there at least not on unaligned boundaries
- */
- if (unlikely((nb > 4) || !user_mode(regs)))
- return 0;
-
- /* lmw, stmw, lswi/x, stswi/x */
- nb0 = 0;
- if (flags & HARD) {
- if (flags & SX) {
- nb = regs->xer & 127;
- if (nb == 0)
- return 1;
- } else {
- unsigned long pc = regs->nip ^ (swiz & 4);
-
- if (__get_user_inatomic(instr,
- (unsigned int __user *)pc))
- return -EFAULT;
- if (swiz == 0 && (flags & SW))
- instr = cpu_to_le32(instr);
- nb = (instr >> 11) & 0x1f;
- if (nb == 0)
- nb = 32;
- }
- if (nb + reg * 4 > 128) {
- nb0 = nb + reg * 4 - 128;
- nb = 128 - reg * 4;
- }
-#ifdef __LITTLE_ENDIAN__
- /*
- * String instructions are endian neutral but the code
- * below is not. Force byte swapping on so that the
- * effects of swizzling are undone in the load/store
- * loops below.
- */
- flags ^= SW;
-#endif
- } else {
- /* lwm, stmw */
- nb = (32 - reg) * 4;
- }
-
- if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
- return -EFAULT; /* bad address */
-
- rptr = &regs->gpr[reg];
- p = (unsigned long) addr;
- bswiz = (flags & SW)? 3: 0;
-
- if (!(flags & ST)) {
- /*
- * This zeroes the top 4 bytes of the affected registers
- * in 64-bit mode, and also zeroes out any remaining
- * bytes of the last register for lsw*.
- */
- memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
- if (nb0 > 0)
- memset(&regs->gpr[0], 0,
- ((nb0 + 3) / 4) * sizeof(unsigned long));
-
- for (i = 0; i < nb; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- if (nb0 > 0) {
- rptr = &regs->gpr[0];
- addr += nb;
- for (i = 0; i < nb0; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- }
-
- } else {
- for (i = 0; i < nb; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- if (nb0 > 0) {
- rptr = &regs->gpr[0];
- addr += nb;
- for (i = 0; i < nb0; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- }
- }
- return 1;
-}
-
-/*
- * Emulate floating-point pair loads and stores.
- * Only POWER6 has these instructions, and it does true little-endian,
- * so we don't need the address swizzling.
- */
-static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
- unsigned int flags)
-{
- char *ptr0 = (char *) &current->thread.TS_FPR(reg);
- char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
- int i, ret, sw = 0;
-
- if (reg & 1)
- return 0; /* invalid form: FRS/FRT must be even */
- if (flags & SW)
- sw = 7;
- ret = 0;
- for (i = 0; i < 8; ++i) {
- if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
- } else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
- }
- }
- if (ret)
- return -EFAULT;
- return 1; /* exception handled and fixed up */
-}
-
-#ifdef CONFIG_PPC64
-static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int flags)
-{
- char *ptr0 = (char *)&regs->gpr[reg];
- char *ptr1 = (char *)&regs->gpr[reg+1];
- int i, ret, sw = 0;
-
- if (reg & 1)
- return 0; /* invalid form: GPR must be even */
- if (flags & SW)
- sw = 7;
- ret = 0;
- for (i = 0; i < 8; ++i) {
- if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
- } else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
- }
- }
- if (ret)
- return -EFAULT;
- return 1; /* exception handled and fixed up */
-}
-#endif /* CONFIG_PPC64 */
#ifdef CONFIG_SPE
@@ -636,133 +282,21 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
}
#endif /* CONFIG_SPE */
-#ifdef CONFIG_VSX
-/*
- * Emulate VSX instructions...
- */
-static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
- unsigned int areg, struct pt_regs *regs,
- unsigned int flags, unsigned int length,
- unsigned int elsize)
-{
- char *ptr;
- unsigned long *lptr;
- int ret = 0;
- int sw = 0;
- int i, j;
-
- /* userland only */
- if (unlikely(!user_mode(regs)))
- return 0;
-
- flush_vsx_to_thread(current);
-
- if (reg < 32)
- ptr = (char *) &current->thread.fp_state.fpr[reg][0];
- else
- ptr = (char *) &current->thread.vr_state.vr[reg - 32];
-
- lptr = (unsigned long *) ptr;
-
-#ifdef __LITTLE_ENDIAN__
- if (flags & SW) {
- elsize = length;
- sw = length-1;
- } else {
- /*
- * The elements are BE ordered, even in LE mode, so process
- * them in reverse order.
- */
- addr += length - elsize;
-
- /* 8 byte memory accesses go in the top 8 bytes of the VR */
- if (length == 8)
- ptr += 8;
- }
-#else
- if (flags & SW)
- sw = elsize-1;
-#endif
-
- for (j = 0; j < length; j += elsize) {
- for (i = 0; i < elsize; ++i) {
- if (flags & ST)
- ret |= __put_user(ptr[i^sw], addr + i);
- else
- ret |= __get_user(ptr[i^sw], addr + i);
- }
- ptr += elsize;
-#ifdef __LITTLE_ENDIAN__
- addr -= elsize;
-#else
- addr += elsize;
-#endif
- }
-
-#ifdef __BIG_ENDIAN__
-#define VSX_HI 0
-#define VSX_LO 1
-#else
-#define VSX_HI 1
-#define VSX_LO 0
-#endif
-
- if (!ret) {
- if (flags & U)
- regs->gpr[areg] = regs->dar;
-
- /* Splat load copies the same data to top and bottom 8 bytes */
- if (flags & SPLT)
- lptr[VSX_LO] = lptr[VSX_HI];
- /* For 8 byte loads, zero the low 8 bytes */
- else if (!(flags & ST) && (8 == length))
- lptr[VSX_LO] = 0;
- } else
- return -EFAULT;
-
- return 1;
-}
-#endif
-
/*
* Called on alignment exception. Attempts to fixup
*
* Return 1 on success
* Return 0 if unable to handle the interrupt
* Return -EFAULT if data address is bad
+ * Other negative return values indicate that the instruction can't
+ * be emulated, and the process should be given a SIGBUS.
*/
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr, nb, flags, instruction = 0;
- unsigned int reg, areg;
- unsigned int dsisr;
- unsigned char __user *addr;
- unsigned long p, swiz;
- int ret, i;
- union data {
- u64 ll;
- double dd;
- unsigned char v[8];
- struct {
-#ifdef __LITTLE_ENDIAN__
- int low32;
- unsigned hi32;
-#else
- unsigned hi32;
- int low32;
-#endif
- } x32;
- struct {
-#ifdef __LITTLE_ENDIAN__
- short low16;
- unsigned char hi48[6];
-#else
- unsigned char hi48[6];
- short low16;
-#endif
- } x16;
- } data;
+ unsigned int instr;
+ struct instruction_op op;
+ int r, type;
/*
* We require a complete register set, if not, then our assembly
@@ -770,121 +304,23 @@ int fix_alignment(struct pt_regs *regs)
*/
CHECK_FULL_REGS(regs);
- dsisr = regs->dsisr;
-
- /* Some processors don't provide us with a DSISR we can use here,
- * let's make one up from the instruction
- */
- if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
- unsigned long pc = regs->nip;
-
- if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
- pc ^= 4;
- if (unlikely(__get_user_inatomic(instr,
- (unsigned int __user *)pc)))
- return -EFAULT;
- if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
- instr = cpu_to_le32(instr);
- dsisr = make_dsisr(instr);
- instruction = instr;
+ if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip)))
+ return -EFAULT;
+ if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
+ /* We don't handle PPC little-endian any more... */
+ if (cpu_has_feature(CPU_FTR_PPC_LE))
+ return -EIO;
+ instr = swab32(instr);
}
- /* extract the operation and registers from the dsisr */
- reg = (dsisr >> 5) & 0x1f; /* source/dest register */
- areg = dsisr & 0x1f; /* register to update */
-
#ifdef CONFIG_SPE
if ((instr >> 26) == 0x4) {
+ int reg = (instr >> 21) & 0x1f;
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
#endif
- instr = (dsisr >> 10) & 0x7f;
- instr |= (dsisr >> 13) & 0x60;
-
- /* Lookup the operation in our table */
- nb = aligninfo[instr].len;
- flags = aligninfo[instr].flags;
-
- /*
- * Handle some cases which give overlaps in the DSISR values.
- */
- if (IS_XFORM(instruction)) {
- switch (get_xop(instruction)) {
- case 532: /* ldbrx */
- nb = 8;
- flags = LD+SW;
- break;
- case 660: /* stdbrx */
- nb = 8;
- flags = ST+SW;
- break;
- case 20: /* lwarx */
- case 84: /* ldarx */
- case 116: /* lharx */
- case 276: /* lqarx */
- return 0; /* not emulated ever */
- }
- }
-
- /* Byteswap little endian loads and stores */
- swiz = 0;
- if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
- flags ^= SW;
-#ifdef __BIG_ENDIAN__
- /*
- * So-called "PowerPC little endian" mode works by
- * swizzling addresses rather than by actually doing
- * any byte-swapping. To emulate this, we XOR each
- * byte address with 7. We also byte-swap, because
- * the processor's address swizzling depends on the
- * operand size (it xors the address with 7 for bytes,
- * 6 for halfwords, 4 for words, 0 for doublewords) but
- * we will xor with 7 and load/store each byte separately.
- */
- if (cpu_has_feature(CPU_FTR_PPC_LE))
- swiz = 7;
-#endif
- }
-
- /* DAR has the operand effective address */
- addr = (unsigned char __user *)regs->dar;
-
-#ifdef CONFIG_VSX
- if ((instruction & 0xfc00003e) == 0x7c000018) {
- unsigned int elsize;
-
- /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
- reg |= (instruction & 0x1) << 5;
- /* Simple inline decoder instead of a table */
- /* VSX has only 8 and 16 byte memory accesses */
- nb = 8;
- if (instruction & 0x200)
- nb = 16;
-
- /* Vector stores in little-endian mode swap individual
- elements, so process them separately */
- elsize = 4;
- if (instruction & 0x80)
- elsize = 8;
-
- flags = 0;
- if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))
- flags |= SW;
- if (instruction & 0x100)
- flags |= ST;
- if (instruction & 0x040)
- flags |= U;
- /* splat load needs a special decoder */
- if ((instruction & 0x400) == 0){
- flags |= SPLT;
- nb = 8;
- }
- PPC_WARN_ALIGNMENT(vsx, regs);
- return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
- }
-#endif
/*
* ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment
@@ -896,173 +332,27 @@ int fix_alignment(struct pt_regs *regs)
* when pasting to a co-processor. Furthermore, paste_last is the
* synchronisation point for preceding copy/paste sequences.
*/
- if ((instruction & 0xfc0006fe) == PPC_INST_COPY)
+ if ((instr & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
return -EIO;
- /* A size of 0 indicates an instruction we don't support, with
- * the exception of DCBZ which is handled as a special case here
- */
- if (instr == DCBZ) {
- PPC_WARN_ALIGNMENT(dcbz, regs);
- return emulate_dcbz(regs, addr);
- }
- if (unlikely(nb == 0))
- return 0;
-
- /* Load/Store Multiple instructions are handled in their own
- * function
- */
- if (flags & M) {
- PPC_WARN_ALIGNMENT(multiple, regs);
- return emulate_multiple(regs, addr, reg, nb,
- flags, instr, swiz);
- }
-
- /* Verify the address of the operand */
- if (unlikely(user_mode(regs) &&
- !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
- addr, nb)))
- return -EFAULT;
-
- /* Force the fprs into the save area so we can reference them */
- if (flags & F) {
- /* userland only */
- if (unlikely(!user_mode(regs)))
- return 0;
- flush_fp_to_thread(current);
- }
+ r = analyse_instr(&op, regs, instr);
+ if (r < 0)
+ return -EINVAL;
- if (nb == 16) {
- if (flags & F) {
- /* Special case for 16-byte FP loads and stores */
- PPC_WARN_ALIGNMENT(fp_pair, regs);
- return emulate_fp_pair(addr, reg, flags);
- } else {
-#ifdef CONFIG_PPC64
- /* Special case for 16-byte loads and stores */
- PPC_WARN_ALIGNMENT(lq_stq, regs);
- return emulate_lq_stq(regs, addr, reg, flags);
-#else
- return 0;
-#endif
- }
- }
-
- PPC_WARN_ALIGNMENT(unaligned, regs);
-
- /* If we are loading, get the data from user space, else
- * get it from register values
- */
- if (!(flags & ST)) {
- unsigned int start = 0;
-
- switch (nb) {
- case 4:
- start = offsetof(union data, x32.low32);
- break;
- case 2:
- start = offsetof(union data, x16.low16);
- break;
- }
-
- data.ll = 0;
- ret = 0;
- p = (unsigned long)addr;
-
- for (i = 0; i < nb; i++)
- ret |= __get_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
-
- } else if (flags & F) {
- data.ll = current->thread.TS_FPR(reg);
- if (flags & S) {
- /* Single-precision FP store requires conversion... */
-#ifdef CONFIG_PPC_FPU
- preempt_disable();
- enable_kernel_fp();
- cvt_df(&data.dd, (float *)&data.x32.low32);
- disable_kernel_fp();
- preempt_enable();
-#else
- return 0;
-#endif
- }
- } else
- data.ll = regs->gpr[reg];
-
- if (flags & SW) {
- switch (nb) {
- case 8:
- data.ll = swab64(data.ll);
- break;
- case 4:
- data.x32.low32 = swab32(data.x32.low32);
- break;
- case 2:
- data.x16.low16 = swab16(data.x16.low16);
- break;
- }
- }
-
- /* Perform other misc operations like sign extension
- * or floating point single precision conversion
- */
- switch (flags & ~(U|SW)) {
- case LD+SE: /* sign extending integer loads */
- case LD+F+SE: /* sign extend for lfiwax */
- if ( nb == 2 )
- data.ll = data.x16.low16;
- else /* nb must be 4 */
- data.ll = data.x32.low32;
- break;
-
- /* Single-precision FP load requires conversion... */
- case LD+F+S:
-#ifdef CONFIG_PPC_FPU
- preempt_disable();
- enable_kernel_fp();
- cvt_fd((float *)&data.x32.low32, &data.dd);
- disable_kernel_fp();
- preempt_enable();
-#else
- return 0;
-#endif
- break;
+ type = op.type & INSTR_TYPE_MASK;
+ if (!OP_IS_LOAD_STORE(type)) {
+ if (op.type != CACHEOP + DCBZ)
+ return -EINVAL;
+ PPC_WARN_ALIGNMENT(dcbz, regs);
+ r = emulate_dcbz(op.ea, regs);
+ } else {
+ if (type == LARX || type == STCX)
+ return -EIO;
+ PPC_WARN_ALIGNMENT(unaligned, regs);
+ r = emulate_loadstore(regs, &op);
}
- /* Store result to memory or update registers */
- if (flags & ST) {
- unsigned int start = 0;
-
- switch (nb) {
- case 4:
- start = offsetof(union data, x32.low32);
- break;
- case 2:
- start = offsetof(union data, x16.low16);
- break;
- }
-
- ret = 0;
- p = (unsigned long)addr;
-
- for (i = 0; i < nb; i++)
- ret |= __put_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
- } else if (flags & F)
- current->thread.TS_FPR(reg) = data.ll;
- else
- regs->gpr[reg] = data.ll;
-
- /* Update RA as needed */
- if (flags & U)
- regs->gpr[areg] = regs->dar;
-
- return 1;
+ if (!r)
+ return 1;
+ return r;
}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6e95c2c19a7e..6b958414b4e0 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,7 +185,7 @@ int main(void)
#ifdef CONFIG_PPC_MM_SLICES
OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
- DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit));
+ OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
#endif /* CONFIG_PPC_MM_SLICES */
#endif
@@ -208,7 +208,7 @@ int main(void)
OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
#endif /* CONFIG_PPC_BOOK3E */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACASLBCACHE, paca_struct, slb_cache);
OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
@@ -230,7 +230,7 @@ int main(void)
OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
@@ -642,6 +642,7 @@ int main(void)
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
+ HSTATE_FIELD(HSTATE_TID, tid);
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
@@ -667,6 +668,8 @@ int main(void)
OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
+ OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
+ OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64
@@ -746,6 +749,14 @@ int main(void)
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
+ STOP_SPR(STOP_PID, pid);
+ STOP_SPR(STOP_LDBAR, ldbar);
+ STOP_SPR(STOP_FSCR, fscr);
+ STOP_SPR(STOP_HFSCR, hfscr);
+ STOP_SPR(STOP_MMCR1, mmcr1);
+ STOP_SPR(STOP_MMCR2, mmcr2);
+ STOP_SPR(STOP_MMCRA, mmcra);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index a4dab7cab348..a2dddd7f3d09 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/types.h>
#include <linux/audit.h>
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 8275858a434d..6537cba1a758 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Procedures for drawing on the screen early on in the boot process.
*
@@ -253,7 +254,7 @@ int __init btext_find_display(int allow_nonstdout)
for_each_node_by_type(np, "display") {
if (of_get_property(np, "linux,opened", NULL)) {
- printk("trying %s ...\n", np->full_name);
+ printk("trying %pOF ...\n", np);
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index c641983bbdd6..a8f20e5928e1 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -167,10 +167,10 @@ static void release_cache_debugcheck(struct cache *cache)
list_for_each_entry(iter, &cache_list, list)
WARN_ONCE(iter->next_local == cache,
- "cache for %s(%s) refers to cache for %s(%s)\n",
- iter->ofnode->full_name,
+ "cache for %pOF(%s) refers to cache for %pOF(%s)\n",
+ iter->ofnode,
cache_type_string(iter),
- cache->ofnode->full_name,
+ cache->ofnode,
cache_type_string(cache));
}
@@ -179,8 +179,8 @@ static void release_cache(struct cache *cache)
if (!cache)
return;
- pr_debug("freeing L%d %s cache for %s\n", cache->level,
- cache_type_string(cache), cache->ofnode->full_name);
+ pr_debug("freeing L%d %s cache for %pOF\n", cache->level,
+ cache_type_string(cache), cache->ofnode);
release_cache_debugcheck(cache);
list_del(&cache->list);
@@ -194,8 +194,8 @@ static void cache_cpu_set(struct cache *cache, int cpu)
while (next) {
WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
- "CPU %i already accounted in %s(%s)\n",
- cpu, next->ofnode->full_name,
+ "CPU %i already accounted in %pOF(%s)\n",
+ cpu, next->ofnode,
cache_type_string(next));
cpumask_set_cpu(cpu, &next->shared_cpu_map);
next = next->next_local;
@@ -355,7 +355,7 @@ static int cache_is_unified_d(const struct device_node *np)
*/
static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
{
- pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+ pr_debug("creating L%d ucache for %pOF\n", level, node);
return new_cache(cache_is_unified_d(node), level, node);
}
@@ -365,8 +365,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
{
struct cache *dcache, *icache;
- pr_debug("creating L%d dcache and icache for %s\n", level,
- node->full_name);
+ pr_debug("creating L%d dcache and icache for %pOF\n", level,
+ node);
dcache = new_cache(CACHE_TYPE_DATA, level, node);
icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
@@ -679,7 +679,6 @@ static struct kobj_type cache_index_type = {
static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
{
- const char *cache_name;
const char *cache_type;
struct cache *cache;
char *buf;
@@ -690,7 +689,6 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
return;
cache = dir->cache;
- cache_name = cache->ofnode->full_name;
cache_type = cache_type_string(cache);
/* We don't want to create an attribute that can't provide a
@@ -707,14 +705,14 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
rc = attr->show(&dir->kobj, attr, buf);
if (rc <= 0) {
pr_debug("not creating %s attribute for "
- "%s(%s) (rc = %zd)\n",
- attr->attr.name, cache_name,
+ "%pOF(%s) (rc = %zd)\n",
+ attr->attr.name, cache->ofnode,
cache_type, rc);
continue;
}
if (sysfs_create_file(&dir->kobj, &attr->attr))
- pr_debug("could not create %s attribute for %s(%s)\n",
- attr->attr.name, cache_name, cache_type);
+ pr_debug("could not create %s attribute for %pOF(%s)\n",
+ attr->attr.name, cache->ofnode, cache_type);
}
kfree(buf);
@@ -831,8 +829,8 @@ static void cache_cpu_clear(struct cache *cache, int cpu)
struct cache *next = cache->next_local;
WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
- "CPU %i not accounted in %s(%s)\n",
- cpu, cache->ofnode->full_name,
+ "CPU %i not accounted in %pOF(%s)\n",
+ cpu, cache->ofnode,
cache_type_string(cache));
cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
index a7b74d36acd7..955f5e999f1b 100644
--- a/arch/powerpc/kernel/cacheinfo.h
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_CACHEINFO_H
#define _PPC_CACHEINFO_H
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index 108ff14e2122..55c6ccda0a85 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#undef __powerpc64__
#include <asm/unistd.h>
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 6f849832a669..1350f49d81a8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -547,11 +547,31 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
- { /* Power9 */
+ { /* Power9 DD2.0 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0200,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_0,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .flush_tlb = __flush_tlb_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD 2.1 or later (see DD2.0 above) */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
.cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9,
+ .cpu_features = CPU_FTRS_POWER9_DD2_1,
.cpu_user_features = COMMON_USER_POWER9,
.cpu_user_features2 = COMMON_USER2_POWER9,
.mmu_features = MMU_FTRS_POWER9,
@@ -1259,10 +1279,10 @@ static struct cpu_spec __initdata cpu_specs[] = {
.platform = "ppc603",
},
#endif /* CONFIG_PPC_BOOK3S_32 */
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
- .pvr_value = 0x00500000,
+ .pvr_value = PVR_8xx,
.cpu_name = "8xx",
/* CPU_FTR_MAYBE_CAN_DOZE is possible,
* if the 8xx code is there.... */
@@ -1274,7 +1294,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_8xx,
.platform = "ppc823",
},
-#endif /* CONFIG_8xx */
+#endif /* CONFIG_PPC_8xx */
#ifdef CONFIG_40x
{ /* 403GC */
.pvr_mask = 0xffffff00,
@@ -1936,6 +1956,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_440A,
.platform = "ppc440",
},
+#ifdef CONFIG_PPC_47x
{ /* 476 DD2 core */
.pvr_mask = 0xffffffff,
.pvr_value = 0x11a52080,
@@ -1992,6 +2013,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_47x,
.platform = "ppc470",
},
+#endif /* CONFIG_PPC_47x */
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 8f7abf9baa63..66f33e7f8d40 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
*
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 1df770e8cbe0..8bdc2f96c5d6 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -102,10 +102,10 @@ static void cpufeatures_flush_tlb(void)
case PVR_POWER8:
case PVR_POWER8E:
case PVR_POWER8NVL:
- __flush_tlb_power8(POWER8_TLB_SETS);
+ __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL);
break;
case PVR_POWER9:
- __flush_tlb_power9(POWER9_TLB_SETS_HASH);
+ __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL);
break;
default:
pr_err("unknown CPU version for boot TLB flush\n");
@@ -634,7 +634,7 @@ static struct dt_cpu_feature_match __initdata
{"no-execute", feat_enable, 0},
{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
{"cache-inhibited-large-page", feat_enable_large_ci, 0},
- {"coprocessor-icswx", feat_enable, CPU_FTR_ICSWX},
+ {"coprocessor-icswx", feat_enable, 0},
{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
{"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
{"wait", feat_enable, 0},
@@ -735,6 +735,8 @@ static __init void cpufeatures_cpu_quirks(void)
*/
if ((version & 0xffffff00) == 0x004e0100)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+ else if ((version & 0xffffefff) == 0x004e0201)
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
}
static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 63992b2d8e15..cbca0a667682 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -44,6 +44,7 @@
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
+#include <asm/pte-walk.h>
/** Overview:
@@ -169,10 +170,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
char buffer[128];
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
- edev->phb->global_number, pdn->busno,
+ pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
- edev->phb->global_number, pdn->busno,
+ pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
@@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
* worried about _PAGE_SPLITTING/collapse. Also we will not hit
* page table free, because of init_mm.
*/
- ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token,
- NULL, &hugepage_shift);
+ ptep = find_init_mm_pte(token, &hugepage_shift);
if (!ptep)
return token;
WARN_ON(hugepage_shift);
@@ -435,7 +435,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
int ret;
int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
- struct pci_dn *pdn;
+ struct device_node *dn;
struct pci_dev *dev;
struct eeh_pe *pe, *parent_pe, *phb_pe;
int rc = 0;
@@ -493,9 +493,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
if (pe->check_count % EEH_MAX_FAILS == 0) {
- pdn = eeh_dev_to_pdn(edev);
- if (pdn->node)
- location = of_get_property(pdn->node, "ibm,loc-code", NULL);
+ dn = pci_device_to_OF_node(dev);
+ if (dn)
+ location = of_get_property(dn, "ibm,loc-code",
+ NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
pe->check_count,
@@ -971,6 +972,18 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
+void eeh_probe_devices(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct pci_dn *pdn;
+
+ /* Enable EEH for all adapters */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ pdn = hose->pci_data;
+ traverse_pci_dn(pdn, eeh_ops->probe, NULL);
+ }
+}
+
/**
* eeh_init - EEH initialization
*
@@ -986,22 +999,11 @@ static struct notifier_block eeh_reboot_nb = {
* Even if force-off is set, the EEH hardware is still enabled, so that
* newer systems can boot.
*/
-int eeh_init(void)
+static int eeh_init(void)
{
struct pci_controller *hose, *tmp;
- struct pci_dn *pdn;
- static int cnt = 0;
int ret = 0;
- /*
- * We have to delay the initialization on PowerNV after
- * the PCI hierarchy tree has been built because the PEs
- * are figured out based on PCI devices instead of device
- * tree nodes
- */
- if (machine_is(powernv) && cnt++ <= 0)
- return ret;
-
/* Register reboot notifier */
ret = register_reboot_notifier(&eeh_reboot_nb);
if (ret) {
@@ -1018,27 +1020,16 @@ int eeh_init(void)
} else if ((ret = eeh_ops->init()))
return ret;
+ /* Initialize PHB PEs */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ eeh_dev_phb_init_dynamic(hose);
+
/* Initialize EEH event */
ret = eeh_event_init();
if (ret)
return ret;
- /* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- pdn = hose->pci_data;
- traverse_pci_dn(pdn, eeh_ops->probe, NULL);
- }
-
- /*
- * Call platform post-initialization. Actually, It's good chance
- * to inform platform that EEH is ready to supply service if the
- * I/O cache stuff has been built up.
- */
- if (eeh_ops->post_init) {
- ret = eeh_ops->post_init();
- if (ret)
- return ret;
- }
+ eeh_probe_devices();
if (eeh_enabled())
pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
@@ -1064,7 +1055,7 @@ core_initcall_sync(eeh_init);
*/
void eeh_add_device_early(struct pci_dn *pdn)
{
- struct pci_controller *phb;
+ struct pci_controller *phb = pdn ? pdn->phb : NULL;
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
if (!edev)
@@ -1074,7 +1065,6 @@ void eeh_add_device_early(struct pci_dn *pdn)
return;
/* USB Bus children of PCI devices will not have BUID's */
- phb = edev->phb;
if (NULL == phb ||
(eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
return;
@@ -1753,10 +1743,6 @@ static int eeh_enable_dbgfs_set(void *data, u64 val)
else
eeh_add_flag(EEH_FORCE_DISABLED);
- /* Notify the backend */
- if (eeh_ops->post_init)
- eeh_ops->post_init();
-
return 0;
}
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index d6b2ca70d14d..a34e6912c15e 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -50,21 +50,16 @@
*/
struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
{
- struct pci_controller *phb = pdn->phb;
struct eeh_dev *edev;
/* Allocate EEH device */
edev = kzalloc(sizeof(*edev), GFP_KERNEL);
- if (!edev) {
- pr_warn("%s: out of memory\n",
- __func__);
+ if (!edev)
return NULL;
- }
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
- edev->phb = phb;
INIT_LIST_HEAD(&edev->list);
INIT_LIST_HEAD(&edev->rmv_list);
@@ -83,21 +78,3 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
/* EEH PE for PHB */
eeh_phb_pe_create(phb);
}
-
-/**
- * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
- *
- * Scan all the existing PHBs and create EEH devices for their OF
- * nodes and their children OF nodes
- */
-static int __init eeh_dev_phb_init(void)
-{
- struct pci_controller *phb, *tmp;
-
- list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
- eeh_dev_phb_init_dynamic(phb);
-
- return 0;
-}
-
-core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index c405c79e50cd..4f71e4c9beb7 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -428,7 +428,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
if (!(edev->physfn)) {
pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
- __func__, edev->phb->global_number, pdn->busno,
+ __func__, pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
return NULL;
}
@@ -441,7 +441,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
}
#ifdef CONFIG_PPC_POWERNV
- pci_iov_add_virtfn(edev->physfn, pdn->vf_index, 0);
+ pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
#endif
return NULL;
}
@@ -499,7 +499,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
#ifdef CONFIG_PPC_POWERNV
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
- pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0);
+ pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
edev->pdev = NULL;
/*
@@ -623,7 +623,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
struct eeh_rmv_data *rmv_data)
{
struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
- struct timeval tstamp;
+ time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index cc4b206f77e4..2d4956e97aa9 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -230,10 +230,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
* Bus/Device/Function number. The extra data referred by flag
* indicates which type of address should be used.
*/
+struct eeh_pe_get_flag {
+ int pe_no;
+ int config_addr;
+};
+
static void *__eeh_pe_get(void *data, void *flag)
{
struct eeh_pe *pe = (struct eeh_pe *)data;
- struct eeh_dev *edev = (struct eeh_dev *)flag;
+ struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
/* Unexpected PHB PE */
if (pe->type & EEH_PE_PHB)
@@ -244,17 +249,17 @@ static void *__eeh_pe_get(void *data, void *flag)
* have non-zero PE address
*/
if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
- if (edev->pe_config_addr == pe->addr)
+ if (tmp->pe_no == pe->addr)
return pe;
} else {
- if (edev->pe_config_addr &&
- (edev->pe_config_addr == pe->addr))
+ if (tmp->pe_no &&
+ (tmp->pe_no == pe->addr))
return pe;
}
/* Try BDF address */
- if (edev->config_addr &&
- (edev->config_addr == pe->config_addr))
+ if (tmp->config_addr &&
+ (tmp->config_addr == pe->config_addr))
return pe;
return NULL;
@@ -262,7 +267,9 @@ static void *__eeh_pe_get(void *data, void *flag)
/**
* eeh_pe_get - Search PE based on the given address
- * @edev: EEH device
+ * @phb: PCI controller
+ * @pe_no: PE number
+ * @config_addr: Config address
*
* Search the corresponding PE based on the specified address which
* is included in the eeh device. The function is used to check if
@@ -271,12 +278,14 @@ static void *__eeh_pe_get(void *data, void *flag)
* which is composed of PCI bus/device/function number, or unified
* PE address.
*/
-struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
+ int pe_no, int config_addr)
{
- struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+ struct eeh_pe *root = eeh_phb_pe_get(phb);
+ struct eeh_pe_get_flag tmp = { pe_no, config_addr };
struct eeh_pe *pe;
- pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+ pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
return pe;
}
@@ -330,11 +339,13 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
int eeh_add_to_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent;
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+ int config_addr = (pdn->busno << 8) | (pdn->devfn);
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
- __func__, edev->config_addr, edev->phb->global_number);
+ __func__, config_addr, pdn->phb->global_number);
return -EINVAL;
}
@@ -344,7 +355,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* PE should be composed of PCI bus and its subordinate
* components.
*/
- pe = eeh_pe_get(edev);
+ pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
if (pe && !(pe->type & EEH_PE_INVALID)) {
/* Mark the PE as type of PCI bus */
pe->type = EEH_PE_BUS;
@@ -353,11 +364,11 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Put the edev to PE */
list_add_tail(&edev->list, &pe->edevs);
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
- pe->addr);
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
+ pe->addr);
return 0;
} else if (pe && (pe->type & EEH_PE_INVALID)) {
list_add_tail(&edev->list, &pe->edevs);
@@ -376,25 +387,25 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
"PE#%x, Parent PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
- pe->addr, pe->parent->addr);
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
+ pe->addr, pe->parent->addr);
return 0;
}
/* Create a new EEH PE */
if (edev->physfn)
- pe = eeh_pe_alloc(edev->phb, EEH_PE_VF);
+ pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF);
else
- pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE);
if (!pe) {
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
}
pe->addr = edev->pe_config_addr;
- pe->config_addr = edev->config_addr;
+ pe->config_addr = config_addr;
/*
* Put the new EEH PE into hierarchy tree. If the parent
@@ -404,10 +415,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
*/
parent = eeh_pe_get_parent(edev);
if (!parent) {
- parent = eeh_phb_pe_get(edev->phb);
+ parent = eeh_phb_pe_get(pdn->phb);
if (!parent) {
pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
- __func__, edev->phb->global_number);
+ __func__, pdn->phb->global_number);
edev->pe = NULL;
kfree(pe);
return -EEXIST;
@@ -424,10 +435,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
edev->pe = pe;
pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
"Device PE#%x, Parent PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
pe->addr, pe->parent->addr);
return 0;
@@ -446,13 +457,14 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
int cnt;
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!edev->pe) {
pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
- __func__, edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF));
+ __func__, pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn));
return -EEXIST;
}
@@ -514,16 +526,16 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
void eeh_pe_update_time_stamp(struct eeh_pe *pe)
{
- struct timeval tstamp;
+ time64_t tstamp;
if (!pe) return;
if (pe->freeze_count <= 0) {
pe->freeze_count = 0;
- do_gettimeofday(&pe->tstamp);
+ pe->tstamp = ktime_get_seconds();
} else {
- do_gettimeofday(&tstamp);
- if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
+ tstamp = ktime_get_seconds();
+ if (tstamp - pe->tstamp > 3600) {
pe->tstamp = tstamp;
pe->freeze_count = 0;
}
@@ -712,10 +724,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
return;
pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF));
+ __func__, pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn));
/* Check slot status */
cap = edev->pcie_cap;
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 1ceecdda810b..797549289798 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -51,7 +51,6 @@ static ssize_t eeh_show_##_name(struct device *dev, \
static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
static ssize_t eeh_pe_state_show(struct device *dev,
@@ -103,7 +102,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
return;
rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
- rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
@@ -128,7 +126,6 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
}
device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
- device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 8587059ad848..e780e1fbf6c2 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -43,6 +43,13 @@
#define LOAD_MSR_KERNEL(r, x) li r,(x)
#endif
+/*
+ * Align to 4k in order to ensure that all functions modyfing srr0/srr1
+ * fit into one page in order to not encounter a TLB miss between the
+ * modification of srr0/srr1 and the associated rfi.
+ */
+ .align 12
+
#ifdef CONFIG_BOOKE
.globl mcheck_transfer_to_handler
mcheck_transfer_to_handler:
@@ -586,6 +593,10 @@ ppc_swapcontext:
handle_page_fault:
stw r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_6xx
+ andis. r0,r5,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+#endif
bl do_page_fault
cmpwi r3,0
beq+ ret_from_except
@@ -599,6 +610,17 @@ handle_page_fault:
bl bad_page_fault
b ret_from_except_full
+#ifdef CONFIG_6xx
+ /* We have a data breakpoint exception - handle it */
+handle_dabr_fault:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ clrrwi r0,r0,1
+ stw r0,_TRAP(r1)
+ bl do_break
+ b ret_from_except_full
+#endif
+
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 49d8422767b4..3320bcac7192 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -223,17 +223,27 @@ system_call_exit:
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- .Lsyscall_exit_work
- /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
- li r7,MSR_FP
+ andi. r0,r8,MSR_FP
+ beq 2f
#ifdef CONFIG_ALTIVEC
- oris r7,r7,MSR_VEC@h
+ andis. r0,r8,MSR_VEC@h
+ bne 3f
#endif
- and r0,r8,r7
- cmpd r0,r7
- bne .Lsyscall_restore_math
-.Lsyscall_restore_math_cont:
+2: addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_BOOK3S
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Restore RI */
+#endif
+ bl restore_math
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,0
+ mtmsrd r11,1
+#endif
+ ld r8,_MSR(r1)
+ ld r3,RESULT(r1)
+ li r11,-MAX_ERRNO
- cmpld r3,r11
+3: cmpld r3,r11
ld r5,_CCR(r1)
bge- .Lsyscall_error
.Lsyscall_error_cont:
@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r5,_CCR(r1)
b .Lsyscall_error_cont
-.Lsyscall_restore_math:
- /*
- * Some initial tests from restore_math to avoid the heavyweight
- * C code entry and MSR manipulations.
- */
- LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
- and. r0,r0,r8
- bne 1f
-
- ld r7,PACACURRENT(r13)
- lbz r0,THREAD+THREAD_LOAD_FP(r7)
-#ifdef CONFIG_ALTIVEC
- lbz r6,THREAD+THREAD_LOAD_VEC(r7)
- add r0,r0,r6
-#endif
- cmpdi r0,0
- beq .Lsyscall_restore_math_cont
-
-1: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
- bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
-#endif
- /* Restore volatiles, reload MSR from updated one */
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
- b .Lsyscall_restore_math_cont
-
/* Traced system call support */
.Lsyscall_dotrace:
bl save_nvgprs
@@ -563,7 +539,7 @@ _GLOBAL(_switch)
std r6,PACACURRENT(r13) /* Set new 'current' */
ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_MMU_FTR_SECTION
b 2f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
@@ -612,7 +588,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
slbmte r7,r0
isync
2:
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
@@ -990,16 +966,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
#ifdef CONFIG_PPC_BOOK3E
cmpwi cr0,r3,0x280
#else
- BEGIN_FTR_SECTION
- cmpwi cr0,r3,0xe80
- FTR_SECTION_ELSE
- cmpwi cr0,r3,0xa00
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ cmpwi cr0,r3,0xa00
#endif /* CONFIG_PPC_BOOK3E */
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
bl doorbell_exception
- b ret_from_except
#endif /* CONFIG_PPC_DOORBELL */
1: b ret_from_except /* What else to do here ? */
@@ -1133,7 +1104,7 @@ _ASM_NOKPROBE_SYMBOL(__enter_rtas)
_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
.align 3
-1: .llong rtas_restore_regs
+1: .8byte rtas_restore_regs
rtas_restore_regs:
/* relocation is on at this point */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9029afd1fa2a..e441b469dc8f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This file contains the 64-bit "server" PowerPC variant
* of the low level exception handling including exception
@@ -113,6 +114,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
cmpwi cr3,r10,2 ; \
BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
+ KVMTEST_PR(n) ; \
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#else
#define IDLETEST NOTEST
@@ -129,6 +131,7 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
+TRAMP_KVM(PACA_EXNMI, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
@@ -232,7 +235,7 @@ BEGIN_FTR_SECTION
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
/* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,4
+ cmpwi r10,MAX_MCE_DEPTH
bgt 2f /* Check if we hit limit of 4 */
std r11,GPR1(r1) /* Save r1 on the stack. */
std r11,0(r1) /* make stack chain pointer */
@@ -541,7 +544,7 @@ EXC_COMMON_BEGIN(instruction_access_common)
RECONCILE_IRQ_STATE(r10, r11)
ld r12,_MSR(r1)
ld r3,_NIP(r1)
- andis. r4,r12,0x5820
+ andis. r4,r12,DSISR_SRR1_MATCH_64S@h
li r5,0x400
std r3,_DAR(r1)
std r4,_DSISR(r1)
@@ -605,7 +608,7 @@ EXC_COMMON_BEGIN(slb_miss_common)
cmpdi cr5,r11,MSR_RI
crset 4*cr0+eq
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_MMU_FTR_SECTION
bl slb_allocate
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
@@ -734,7 +737,29 @@ EXC_REAL(program_check, 0x700, 0x100)
EXC_VIRT(program_check, 0x4700, 0x100, 0x700)
TRAMP_KVM(PACA_EXGEN, 0x700)
EXC_COMMON_BEGIN(program_check_common)
- EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+ /*
+ * It's possible to receive a TM Bad Thing type program check with
+ * userspace register values (in particular r1), but with SRR1 reporting
+ * that we came from the kernel. Normally that would confuse the bad
+ * stack logic, and we would report a bad kernel stack pointer. Instead
+ * we switch to the emergency stack if we're taking a TM Bad Thing from
+ * the kernel.
+ */
+ li r10,MSR_PR /* Build a mask of MSR_PR .. */
+ oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */
+ and r10,r10,r12 /* Mask SRR1 with that. */
+ srdi r10,r10,8 /* Shift it so we can compare */
+ cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */
+ bne 1f /* If != go to normal path. */
+
+ /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */
+ andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */
+ /* 3 in EXCEPTION_PROLOG_COMMON */
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ b 3f /* Jump into the macro !! */
+1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -865,12 +890,6 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
#define LOAD_SYSCALL_HANDLER(reg) \
__LOAD_HANDLER(reg, system_call_common)
-#define SYSCALL_FASTENDIAN_TEST \
-BEGIN_FTR_SECTION \
- cmpdi r0,0x1ebe ; \
- beq- 1f ; \
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
-
/*
* After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
* and HMT_MEDIUM.
@@ -885,6 +904,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
rfid ; \
b . ; /* prevent speculative execution */
+#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
+#define SYSCALL_FASTENDIAN_TEST \
+BEGIN_FTR_SECTION \
+ cmpdi r0,0x1ebe ; \
+ beq- 1f ; \
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
+
#define SYSCALL_FASTENDIAN \
/* Fast LE/BE switch system call */ \
1: mfspr r12,SPRN_SRR1 ; \
@@ -893,6 +919,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
mr r13,r9 ; \
rfid ; /* return to userspace */ \
b . ; /* prevent speculative execution */
+#else
+#define SYSCALL_FASTENDIAN_TEST
+#define SYSCALL_FASTENDIAN
+#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */
#if defined(CONFIG_RELOCATABLE)
/*
@@ -1010,6 +1040,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
+ cmpdi cr0,r3,0
+
/* Windup the stack. */
/* Move original HSRR0 and HSRR1 into the respective regs */
ld r9,_MSR(r1)
@@ -1026,10 +1058,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
REST_8GPRS(2, r1)
REST_GPR(10, r1)
ld r11,_CCR(r1)
+ REST_2GPRS(12, r1)
+ bne 1f
mtcr r11
REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
+ ld r1,GPR1(r1)
+ hrfid
+
+1: mtcr r11
+ REST_GPR(11, r1)
ld r1,GPR1(r1)
/*
@@ -1042,8 +1079,9 @@ hmi_exception_after_realmode:
EXCEPTION_PROLOG_0(PACA_EXGEN)
b tramp_real_hmi_exception
-EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception)
-
+EXC_COMMON_BEGIN(hmi_exception_common)
+EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
+ ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
@@ -1314,7 +1352,7 @@ EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+#ifdef CONFIG_PPC_WATCHDOG
#define MASKED_DEC_HANDLER_LABEL 3f
@@ -1325,20 +1363,28 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r10,PACA_EXGEN+EX_R13(r13); \
EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
+/*
+ * Branch to soft_nmi_interrupt using the emergency stack. The emergency
+ * stack is one that is usable by maskable interrupts so long as MSR_EE
+ * remains off. It is used for recovery when something has corrupted the
+ * normal kernel stack, for example. The "soft NMI" must not use the process
+ * stack because we want irq disabled sections to avoid touching the stack
+ * at all (other than PMU interrupts), so use the emergency stack for this,
+ * and run it entirely with interrupts hard disabled.
+ */
EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
- ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
system_reset, soft_nmi_interrupt,
ADD_NVGPRS;ADD_RECONCILE)
b ret_from_except
-#else
+#else /* CONFIG_PPC_WATCHDOG */
#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
#define MASKED_DEC_HANDLER(_H)
-#endif
+#endif /* CONFIG_PPC_WATCHDOG */
/*
* An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -1362,19 +1408,16 @@ masked_##_H##interrupt: \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
b MASKED_DEC_HANDLER_LABEL; \
-1: cmpwi r10,PACA_IRQ_DBELL; \
- beq 2f; \
- cmpwi r10,PACA_IRQ_HMI; \
- beq 2f; \
+1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \
+ bne 2f; \
mfspr r10,SPRN_##_H##SRR1; \
- rldicl r10,r10,48,1; /* clear MSR_EE */ \
- rotldi r10,r10,16; \
+ xori r10,r10,MSR_EE; /* clear MSR_EE */ \
mtspr SPRN_##_H##SRR1,r10; \
2: mtcrf 0x80,r9; \
ld r9,PACA_EXGEN+EX_R9(r13); \
ld r10,PACA_EXGEN+EX_R10(r13); \
ld r11,PACA_EXGEN+EX_R11(r13); \
- GET_SCRATCH0(r13); \
+ /* returns to kernel where r13 must be set up, so don't restore it */ \
##_H##rfid; \
b .; \
MASKED_DEC_HANDLER(_H)
@@ -1477,8 +1520,10 @@ USE_TEXT_SECTION()
*/
.balign IFETCH_ALIGN_BYTES
do_hash_page:
-#ifdef CONFIG_PPC_STD_MMU_64
- andis. r0,r4,0xa450 /* weird error? */
+#ifdef CONFIG_PPC_BOOK3S_64
+ lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
+ ori r0,r0,DSISR_BAD_FAULT_64S@l
+ and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
@@ -1506,7 +1551,7 @@ do_hash_page:
/* Reload DSISR into r4 for the DABR check below */
ld r4,_DSISR(r1)
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
@@ -1535,7 +1580,7 @@ handle_dabr_fault:
12: b ret_from_except_lite
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
@@ -1661,25 +1706,27 @@ _GLOBAL(__replay_interrupt)
* we don't give a damn about, so we don't bother storing them.
*/
mfmsr r12
- LOAD_REG_ADDR(r11, 1f)
+ LOAD_REG_ADDR(r11, replay_interrupt_return)
mfcr r9
ori r12,r12,MSR_EE
cmpwi r3,0x900
beq decrementer_common
cmpwi r3,0x500
+BEGIN_FTR_SECTION
+ beq h_virt_irq_common
+FTR_SECTION_ELSE
beq hardware_interrupt_common
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
- cmpwi r3,0xe80
+ cmpwi r3,0xa00
beq h_doorbell_common_msgclr
- cmpwi r3,0xea0
- beq h_virt_irq_common
cmpwi r3,0xe60
beq hmi_exception_common
FTR_SECTION_ELSE
cmpwi r3,0xa00
beq doorbell_super_common_msgclr
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-1:
+replay_interrupt_return:
blr
_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index dc0c49cfd90a..04ea5c04fd24 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -125,6 +125,13 @@ int is_fadump_boot_memory_area(u64 addr, ulong size)
return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
}
+int should_fadump_crash(void)
+{
+ if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ return 0;
+ return 1;
+}
+
int is_fadump_active(void)
{
return fw_dump.dump_active;
@@ -518,7 +525,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
struct fadump_crash_info_header *fdh = NULL;
int old_cpu, this_cpu;
- if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ if (!should_fadump_crash())
return;
/*
@@ -1263,10 +1270,15 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
+ int input = -1;
+
if (!fw_dump.dump_active)
return -EPERM;
- if (buf[0] == '1') {
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
+ if (input == 1) {
/*
* Take away the '/proc/vmcore'. We are releasing the dump
* memory, hence it will not be valid anymore.
@@ -1300,21 +1312,25 @@ static ssize_t fadump_register_store(struct kobject *kobj,
const char *buf, size_t count)
{
int ret = 0;
+ int input = -1;
if (!fw_dump.fadump_enabled || fdm_active)
return -EPERM;
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
mutex_lock(&fadump_mutex);
- switch (buf[0]) {
- case '0':
+ switch (input) {
+ case 0:
if (fw_dump.dump_registered == 0) {
goto unlock_out;
}
/* Un-register Firmware-assisted dump */
fadump_unregister_dump(&fdm);
break;
- case '1':
+ case 1:
if (fw_dump.dump_registered == 1) {
ret = -EEXIST;
goto unlock_out;
@@ -1446,6 +1462,25 @@ static void fadump_init_files(void)
return;
}
+static int fadump_panic_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * If firmware-assisted dump has been registered then trigger
+ * firmware-assisted dump and let firmware handle everything
+ * else. If this returns, then fadump was not registered, so
+ * go through the rest of the panic path.
+ */
+ crash_fadump(NULL, ptr);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block fadump_panic_block = {
+ .notifier_call = fadump_panic_event,
+ .priority = INT_MIN /* may not return; must be done last */
+};
+
/*
* Prepare for firmware-assisted dump.
*/
@@ -1478,6 +1513,9 @@ int __init setup_fadump(void)
init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
fadump_init_files();
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &fadump_panic_block);
+
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index 83dd0f6776b3..ea065282b303 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* 1. Find the index of the entry we're executing in */
bl invstr /* Find our address */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e22734278458..29b2fed93289 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -388,7 +388,7 @@ DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
- andis. r0,r10,0xa470 /* weird error? */
+ andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
@@ -403,13 +403,13 @@ DataAccess:
DO_KVM 0x400
InstructionAccess:
EXCEPTION_PROLOG
- andis. r0,r9,0x4000 /* no pte found? */
+ andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */
beq 1f /* if so, try to put a PTE */
li r3,0 /* into the hash table */
mr r4,r12 /* SRR0 is fault address */
bl hash_page
1: mr r4,r12
- mr r5,r9
+ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
EXC_XFER_LITE(0x400, handle_page_fault)
/* External interrupt */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 0ddc602b33a4..aa71a90f5222 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -55,12 +55,18 @@
*
* For pSeries or server processors:
* 1. The MMU is off & open firmware is running in real mode.
- * 2. The kernel is entered at __start
+ * 2. The primary CPU enters at __start.
+ * 3. If the RTAS supports "query-cpu-stopped-state", then secondary
+ * CPUs will enter as directed by "start-cpu" RTAS call, which is
+ * generic_secondary_smp_init, with PIR in r3.
+ * 4. Else the secondary CPUs will enter at secondary_hold (0x60) as
+ * directed by the "start-cpu" RTS call, with PIR in r3.
* -or- For OPAL entry:
- * 1. The MMU is off, processor in HV mode, primary CPU enters at 0
- * with device-tree in gpr3. We also get OPAL base in r8 and
- * entry in r9 for debugging purposes
- * 2. Secondary processors enter at 0x60 with PIR in gpr3
+ * 1. The MMU is off, processor in HV mode.
+ * 2. The primary CPU enters at 0 with device-tree in r3, OPAL base
+ * in r8, and entry in r9 for debugging purposes.
+ * 3. Secondary CPUs enter as directed by OPAL_START_CPU call, which
+ * is at generic_secondary_smp_init, with PIR in r3.
*
* For Book3E processors:
* 1. The MMU is on running in AS0 in a state defined in ePAPR
@@ -92,13 +98,13 @@ END_FTR_SECTION(0, 1)
.balign 8
.globl __secondary_hold_spinloop
__secondary_hold_spinloop:
- .llong 0x0
+ .8byte 0x0
/* Secondary processors write this value with their cpu # */
/* after they enter the spin loop immediately below. */
.globl __secondary_hold_acknowledge
__secondary_hold_acknowledge:
- .llong 0x0
+ .8byte 0x0
#ifdef CONFIG_RELOCATABLE
/* This flag is set to 1 by a loader if the kernel should run
@@ -650,7 +656,7 @@ __after_prom_start:
bctr
.balign 8
-p_end: .llong _end - copy_to_here
+p_end: .8byte _end - copy_to_here
4:
/*
@@ -892,7 +898,7 @@ _GLOBAL(relative_toc)
blr
.balign 8
-p_toc: .llong __toc_start + 0x8000 - 0b
+p_toc: .8byte __toc_start + 0x8000 - 0b
/*
* This is where the main kernel code starts.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index c032fe8c2d26..4fee00d414e8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -50,18 +50,20 @@
mtspr spr, reg
#endif
-/* Macro to test if an address is a kernel address */
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
-#define IS_KERNEL(tmp, addr) \
- andis. tmp, addr, 0x8000 /* Address >= 0x80000000 */
-#define BRANCH_UNLESS_KERNEL(label) beq label
-#else
-#define IS_KERNEL(tmp, addr) \
- rlwinm tmp, addr, 16, 16, 31; \
- cmpli cr0, tmp, PAGE_OFFSET >> 16
-#define BRANCH_UNLESS_KERNEL(label) blt label
+/* By simply checking Address >= 0x80000000, we know if its a kernel address */
+#define SIMPLE_KERNEL_ADDRESS 1
#endif
+/*
+ * We need an ITLB miss handler for kernel addresses if:
+ * - Either we have modules
+ * - Or we have not pinned the first 8M
+ */
+#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \
+ defined(CONFIG_DEBUG_PAGEALLOC)
+#define ITLB_MISS_KERNEL 1
+#endif
/*
* Value for the bits that have fixed value in RPN entries.
@@ -123,7 +125,6 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
rfi /* enables MMU */
/*
@@ -170,7 +171,7 @@ turn_on_mmu:
stw r1,0(r11); \
tovirt(r1,r11); /* set new kernel sp */ \
li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- MTMSRD(r10); /* (except for mach check in rtas) */ \
+ mtmsr r10; \
stw r0,GPR0(r11); \
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
@@ -300,7 +301,7 @@ SystemCall:
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
- EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD)
+ EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
. = 0x1100
/*
@@ -325,7 +326,7 @@ SystemCall:
#endif
InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtspr SPRN_SPRG_SCRATCH2, r3
#endif
EXCEPTION_PROLOG_0
@@ -343,15 +344,32 @@ InstructionTLBMiss:
INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfcr r3
#endif
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
- IS_KERNEL(r11, r10)
+#ifdef ITLB_MISS_KERNEL
+#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+ andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
+#else
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
+#ifndef CONFIG_PIN_TLB_TEXT
+ /* It is assumed that kernel code fits into the first 8M page */
+_ENTRY(ITLBMiss_cmp)
+ cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+#endif
+#endif
#endif
mfspr r11, SPRN_M_TW /* Get level 1 table */
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
- BRANCH_UNLESS_KERNEL(3f)
+#ifdef ITLB_MISS_KERNEL
+#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+ beq+ 3f
+#else
+ blt+ 3f
+#endif
+#ifndef CONFIG_PIN_TLB_TEXT
+ blt cr7, ITLBMissLinear
+#endif
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
#endif
@@ -369,7 +387,7 @@ InstructionTLBMiss:
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
4:
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtcr r3
#endif
/* Insert the APG into the TWC from the Linux PTE. */
@@ -400,7 +418,7 @@ InstructionTLBMiss:
MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */
/* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfspr r3, SPRN_SPRG_SCRATCH2
#endif
EXCEPTION_EPILOG_0
@@ -447,23 +465,23 @@ DataStoreTLBMiss:
* kernel page tables.
*/
mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TW /* Get level 1 table */
blt+ 3f
+ rlwinm r11, r10, 16, 0xfff8
#ifndef CONFIG_PIN_TLB_IMMR
- cmpli cr0, r10, VIRT_IMMR_BASE@h
+ cmpli cr0, r11, VIRT_IMMR_BASE@h
#endif
_ENTRY(DTLBMiss_cmp)
- cmpli cr7, r10, (PAGE_OFFSET + 0x1800000)@h
- lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
+ cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
#ifndef CONFIG_PIN_TLB_IMMR
_ENTRY(DTLBMiss_jmp)
beq- DTLBMissIMMR
#endif
blt cr7, DTLBMissLinear
+ lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
- mfspr r10, SPRN_MD_EPN
/* Insert level 1 index */
rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -569,8 +587,8 @@ _ENTRY(DTLBMiss_jmp)
InstructionTLBError:
EXCEPTION_PROLOG
mr r4,r12
- mr r5,r9
- andis. r10,r5,0x4000
+ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+ andis. r10,r9,SRR1_ISI_NOPT@h
beq+ 1f
tlbie r4
itlbie:
@@ -595,7 +613,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
mfspr r4,SPRN_DAR
- andis. r10,r5,0x4000
+ andis. r10,r5,DSISR_NOHPTE@h
beq+ 1f
tlbie r4
dtlbie:
@@ -684,7 +702,7 @@ DTLBMissLinear:
/* Set 8M byte page and mark it valid */
li r11, MD_PS8MEG | MD_SVALID
MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
- rlwinm r10, r10, 16, 0x0f800000 /* 8xx supports max 256Mb RAM */
+ rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
_PAGE_PRESENT
MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
@@ -695,6 +713,22 @@ DTLBMissLinear:
EXCEPTION_EPILOG_0
rfi
+#ifndef CONFIG_PIN_TLB_TEXT
+ITLBMissLinear:
+ mtcr r3
+ /* Set 8M byte page and mark it valid */
+ li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC
+ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)
+ rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ _PAGE_PRESENT
+ MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */
+
+ mfspr r3, SPRN_SPRG_SCRATCH2
+ EXCEPTION_EPILOG_0
+ rfi
+#endif
+
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
@@ -705,9 +739,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr r10, SPRN_SRR0
- IS_KERNEL(r11, r10)
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TW /* Get level 1 table */
- BRANCH_UNLESS_KERNEL(3f)
+ blt+ 3f
rlwinm r11, r10, 16, 0xfff8
_ENTRY(FixupDAR_cmp)
cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
@@ -915,10 +950,8 @@ start_here:
rfi
/* Load up the kernel context */
2:
- SYNC /* Force all PTE updates to finish */
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
- TLBSYNC /* ... on all CPUs */
/* set up the PTE pointers for the Abatron bdiGDB.
*/
@@ -955,15 +988,14 @@ initial_mmu:
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-/* Always pin the first 8 MB ITLB to prevent ITLB
- misses while mucking around with SRR0/SRR1 in asm
-*/
+#ifdef CONFIG_PIN_TLB_TEXT
lis r8, MI_RSV4I@h
ori r8, r8, 0x1c00
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
+#endif
-#ifdef CONFIG_PIN_TLB
+#ifdef CONFIG_PIN_TLB_DATA
oris r10, r10, MD_RSV4I@h
mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
#endif
@@ -989,6 +1021,7 @@ initial_mmu:
* internal registers (among other things).
*/
#ifdef CONFIG_PIN_TLB_IMMR
+ oris r10, r10, MD_RSV4I@h
ori r10, r10, 0x1c00
mtspr SPRN_MD_CTR, r10
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index a620203f7de3..d0862a100d29 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __HEAD_BOOKE_H__
#define __HEAD_BOOKE_H__
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 516ebef905c0..01e1c1997893 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -85,7 +85,66 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
std r3,_WORT(r1)
mfspr r3,SPRN_WORC
std r3,_WORC(r1)
+/*
+ * On POWER9, there are idle states such as stop4, invoked via cpuidle,
+ * that lose hypervisor resources. In such cases, we need to save
+ * additional SPRs before entering those idle states so that they can
+ * be restored to their older values on wakeup from the idle state.
+ *
+ * On POWER8, the only such deep idle state is winkle which is used
+ * only in the context of CPU-Hotplug, where these additional SPRs are
+ * reinitiazed to a sane value. Hence there is no need to save/restore
+ * these SPRs.
+ */
+BEGIN_FTR_SECTION
+ blr
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+power9_save_additional_sprs:
+ mfspr r3, SPRN_PID
+ mfspr r4, SPRN_LDBAR
+ std r3, STOP_PID(r13)
+ std r4, STOP_LDBAR(r13)
+
+ mfspr r3, SPRN_FSCR
+ mfspr r4, SPRN_HFSCR
+ std r3, STOP_FSCR(r13)
+ std r4, STOP_HFSCR(r13)
+
+ mfspr r3, SPRN_MMCRA
+ mfspr r4, SPRN_MMCR0
+ std r3, STOP_MMCRA(r13)
+ std r4, _MMCR0(r1)
+
+ mfspr r3, SPRN_MMCR1
+ mfspr r4, SPRN_MMCR2
+ std r3, STOP_MMCR1(r13)
+ std r4, STOP_MMCR2(r13)
+ blr
+
+power9_restore_additional_sprs:
+ ld r3,_LPCR(r1)
+ ld r4, STOP_PID(r13)
+ mtspr SPRN_LPCR,r3
+ mtspr SPRN_PID, r4
+
+ ld r3, STOP_LDBAR(r13)
+ ld r4, STOP_FSCR(r13)
+ mtspr SPRN_LDBAR, r3
+ mtspr SPRN_FSCR, r4
+
+ ld r3, STOP_HFSCR(r13)
+ ld r4, STOP_MMCRA(r13)
+ mtspr SPRN_HFSCR, r3
+ mtspr SPRN_MMCRA, r4
+
+ ld r3, _MMCR0(r1)
+ ld r4, STOP_MMCR1(r13)
+ mtspr SPRN_MMCR0, r3
+ mtspr SPRN_MMCR1, r4
+
+ ld r3, STOP_MMCR2(r13)
+ mtspr SPRN_MMCR2, r3
blr
/*
@@ -141,7 +200,16 @@ pnv_powersave_common:
std r5,_CCR(r1)
std r1,PACAR1(r13)
+BEGIN_FTR_SECTION
/*
+ * POWER9 does not require real mode to stop, and presently does not
+ * set hwthread_state for KVM (threads don't share MMU context), so
+ * we can remain in virtual mode for this.
+ */
+ bctr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ /*
+ * POWER8
* Go to real mode to do the nap, as required by the architecture.
* Also, we need to be in real mode before setting hwthread_state,
* because as soon as we do that, another thread can switch
@@ -151,6 +219,20 @@ pnv_powersave_common:
mtmsrd r7,0
bctr
+/*
+ * This is the sequence required to execute idle instructions, as
+ * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ */
+#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+236: cmpd cr0,r0,r0; \
+ bne 236b; \
+ IDLE_INST;
+
+
.globl pnv_enter_arch207_idle_mode
pnv_enter_arch207_idle_mode:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -255,7 +337,7 @@ power_enter_stop:
andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
bne .Lhandle_esl_ec_set
- IDLE_STATE_ENTER_SEQ(PPC_STOP)
+ PPC_STOP
li r3,0 /* Since we didn't lose state, return 0 */
/*
@@ -273,13 +355,15 @@ power_enter_stop:
b pnv_wakeup_noloss
.Lhandle_esl_ec_set:
+BEGIN_FTR_SECTION
/*
- * POWER9 DD2 can incorrectly set PMAO when waking up after a
- * state-loss idle. Saving and restoring MMCR0 over idle is a
+ * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
+ * a state-loss idle. Saving and restoring MMCR0 over idle is a
* workaround.
*/
mfspr r4,SPRN_MMCR0
std r4,_MMCR0(r1)
+END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
/*
* Check if the requested state is a deep idle state.
@@ -288,7 +372,8 @@ power_enter_stop:
ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
cmpd r3,r4
bge .Lhandle_deep_stop
- IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
+ PPC_STOP /* Does not return (system reset interrupt) */
+
.Lhandle_deep_stop:
/*
* Entering deep idle state.
@@ -310,7 +395,7 @@ lwarx_loop_stop:
bl save_sprs_to_stack
- IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
+ PPC_STOP /* Does not return (system reset interrupt) */
/*
* Entered with MSR[EE]=0 and no soft-masked interrupts pending.
@@ -460,13 +545,21 @@ pnv_restore_hyp_resource_arch300:
/*
* Workaround for POWER9, if we lost resources, the ERAT
* might have been mixed up and needs flushing. We also need
- * to reload MMCR0 (see comment above).
+ * to reload MMCR0 (see comment above). We also need to set
+ * then clear bit 60 in MMCRA to ensure the PMU starts running.
*/
blt cr3,1f
+BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
ld r1,PACAR1(r13)
ld r4,_MMCR0(r1)
mtspr SPRN_MMCR0,r4
+END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
+ mfspr r4,SPRN_MMCRA
+ ori r4,r4,(1 << (63-60))
+ mtspr SPRN_MMCRA,r4
+ xori r4,r4,(1 << (63-60))
+ mtspr SPRN_MMCRA,r4
1:
/*
* POWER ISA 3. Use PSSCR to determine if we
@@ -803,9 +896,16 @@ no_segments:
mtctr r12
bctrl
+/*
+ * On POWER9, we can come here on wakeup from a cpuidle stop state.
+ * Hence restore the additional SPRs to the saved value.
+ *
+ * On POWER8, we come here only on winkle. Since winkle is used
+ * only in the case of CPU-Hotplug, we don't need to restore
+ * the additional SPRs.
+ */
BEGIN_FTR_SECTION
- ld r4,_LPCR(r1)
- mtspr SPRN_LPCR,r4
+ bl power9_restore_additional_sprs
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
hypervisor_state_restored:
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index a582e0d42525..aa9f1b8261db 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -19,6 +19,8 @@
#include <asm/pgtable.h>
#include <asm/ppc-pci.h>
#include <asm/io-workarounds.h>
+#include <asm/pte-walk.h>
+
#define IOWA_MAX_BUS 8
@@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
* We won't find huge pages here (iomem). Also can't hit
* a page table free due to init_mm
*/
- ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
- NULL, &hugepage_shift);
+ ptep = find_init_mm_pte(vaddr, &hugepage_shift);
if (ptep == NULL)
paddr = 0;
else {
@@ -192,7 +193,7 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
if (iowa_bus_count >= IOWA_MAX_BUS) {
pr_err("IOWA:Too many pci bridges, "
- "workarounds disabled for %s\n", np->full_name);
+ "workarounds disabled for %pOF\n", np);
return;
}
@@ -207,6 +208,6 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
iowa_bus_count++;
- pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
+ pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np);
}
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index a1854d1ded8b..aab456ed2a00 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* ppc64 "iomap" interface implementation.
*
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 233ca3fe4754..af7a20dc6e09 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -127,8 +127,7 @@ static ssize_t fail_iommu_store(struct device *dev,
return count;
}
-static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
- fail_iommu_store);
+static DEVICE_ATTR_RW(fail_iommu);
static int fail_iommu_bus_notify(struct notifier_block *nb,
unsigned long action, void *data)
@@ -190,7 +189,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
unsigned int pool_nr;
struct iommu_pool *pool;
- align_mask = 0xffffffffffffffffl >> (64 - align_order);
+ align_mask = (1ull << align_order) - 1;
/* This allocator was derived from x86_64's bit string search */
@@ -208,7 +207,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
* We don't need to disable preemption here because any CPU can
* safely use any IOMMU pool.
*/
- pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
+ pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
if (largealloc)
pool = &(tbl->large_pool);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 0bcec745a672..b7a84522e652 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -24,7 +24,7 @@
* mask register (of which only 16 are defined), hence the weird shifting
* and complement of the cached_irq_mask. I want to be able to stuff
* this right into the SIU SMASK register.
- * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx
* to reduce code space and undefined function references.
*/
@@ -143,8 +143,29 @@ notrace unsigned int __check_irq_replay(void)
*/
unsigned char happened = local_paca->irq_happened;
- /* Clear bit 0 which we wouldn't clear otherwise */
- local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ /*
+ * We are responding to the next interrupt, so interrupt-off
+ * latencies should be reset here.
+ */
+ trace_hardirqs_on();
+ trace_hardirqs_off();
+
+ if (happened & PACA_IRQ_HARD_DIS) {
+ /* Clear bit 0 which we wouldn't clear otherwise */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ /*
+ * We may have missed a decrementer interrupt if hard disabled.
+ * Check the decrementer register in case we had a rollover
+ * while hard disabled.
+ */
+ if (!(happened & PACA_IRQ_DEC)) {
+ if (decrementer_check_overflow()) {
+ local_paca->irq_happened |= PACA_IRQ_DEC;
+ happened |= PACA_IRQ_DEC;
+ }
+ }
+ }
/*
* Force the delivery of pending soft-disabled interrupts on PS3.
@@ -160,41 +181,39 @@ notrace unsigned int __check_irq_replay(void)
* This is a higher priority interrupt than the others, so
* replay it first.
*/
- local_paca->irq_happened &= ~PACA_IRQ_HMI;
- if (happened & PACA_IRQ_HMI)
+ if (happened & PACA_IRQ_HMI) {
+ local_paca->irq_happened &= ~PACA_IRQ_HMI;
return 0xe60;
+ }
- /*
- * We may have missed a decrementer interrupt. We check the
- * decrementer itself rather than the paca irq_happened field
- * in case we also had a rollover while hard disabled
- */
- local_paca->irq_happened &= ~PACA_IRQ_DEC;
- if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow())
+ if (happened & PACA_IRQ_DEC) {
+ local_paca->irq_happened &= ~PACA_IRQ_DEC;
return 0x900;
+ }
- /* Finally check if an external interrupt happened */
- local_paca->irq_happened &= ~PACA_IRQ_EE;
- if (happened & PACA_IRQ_EE)
+ if (happened & PACA_IRQ_EE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE;
return 0x500;
+ }
#ifdef CONFIG_PPC_BOOK3E
- /* Finally check if an EPR external interrupt happened
- * this bit is typically set if we need to handle another
- * "edge" interrupt from within the MPIC "EPR" handler
+ /*
+ * Check if an EPR external interrupt happened this bit is typically
+ * set if we need to handle another "edge" interrupt from within the
+ * MPIC "EPR" handler.
*/
- local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- if (happened & PACA_IRQ_EE_EDGE)
+ if (happened & PACA_IRQ_EE_EDGE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
return 0x500;
+ }
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- if (happened & PACA_IRQ_DBELL)
+ if (happened & PACA_IRQ_DBELL) {
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
return 0x280;
+ }
#else
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
if (happened & PACA_IRQ_DBELL) {
- if (cpu_has_feature(CPU_FTR_HVMODE))
- return 0xe80;
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
return 0xa00;
}
#endif /* CONFIG_PPC_BOOK3E */
@@ -258,6 +277,7 @@ notrace void arch_local_irq_restore(unsigned long en)
#endif /* CONFIG_TRACE_IRQFLAGS */
set_soft_enabled(0);
+ trace_hardirqs_off();
/*
* Check if anything needs to be re-emitted. We haven't
@@ -267,6 +287,7 @@ notrace void arch_local_irq_restore(unsigned long en)
replay = __check_irq_replay();
/* We can soft-enable now */
+ trace_hardirqs_on();
set_soft_enabled(1);
/*
@@ -382,11 +403,19 @@ bool prep_irq_for_idle_irqsoff(void)
/*
* Take the SRR1 wakeup reason, index into this table to find the
* appropriate irq_happened bit.
+ *
+ * Sytem reset exceptions taken in idle state also come through here,
+ * but they are NMI interrupts so do not need to wait for IRQs to be
+ * restored, and should be taken as early as practical. These are marked
+ * with 0xff in the table. The Power ISA specifies 0100b as the system
+ * reset interrupt reason.
*/
+#define IRQ_SYSTEM_RESET 0xff
+
static const u8 srr1_to_lazyirq[0x10] = {
0, 0, 0,
PACA_IRQ_DBELL,
- 0,
+ IRQ_SYSTEM_RESET,
PACA_IRQ_DBELL,
PACA_IRQ_DEC,
0,
@@ -395,15 +424,43 @@ static const u8 srr1_to_lazyirq[0x10] = {
PACA_IRQ_HMI,
0, 0, 0, 0, 0 };
+void replay_system_reset(void)
+{
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ regs.trap = 0x100;
+ get_paca()->in_nmi = 1;
+ system_reset_exception(&regs);
+ get_paca()->in_nmi = 0;
+}
+EXPORT_SYMBOL_GPL(replay_system_reset);
+
void irq_set_pending_from_srr1(unsigned long srr1)
{
unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
+ u8 reason = srr1_to_lazyirq[idx];
+
+ /*
+ * Take the system reset now, which is immediately after registers
+ * are restored from idle. It's an NMI, so interrupts need not be
+ * re-enabled before it is taken.
+ */
+ if (unlikely(reason == IRQ_SYSTEM_RESET)) {
+ replay_system_reset();
+ return;
+ }
/*
* The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
- * so this can be called unconditionally with srr1 wake reason.
+ * so this can be called unconditionally with the SRR1 wake
+ * reason as returned by the idle code, which uses 0 to mean no
+ * interrupt.
+ *
+ * If a future CPU was to designate this as an interrupt reason,
+ * then a new index for no interrupt must be assigned.
*/
- local_paca->irq_happened |= srr1_to_lazyirq[idx];
+ local_paca->irq_happened |= reason;
}
#endif /* CONFIG_PPC_BOOK3S */
@@ -470,6 +527,18 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, " Hypervisor Maintenance Interrupts\n");
}
+ seq_printf(p, "%*s: ", prec, "NMI");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs);
+ seq_printf(p, " System Reset interrupts\n");
+
+#ifdef CONFIG_PPC_WATCHDOG
+ seq_printf(p, "%*s: ", prec, "WDG");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs);
+ seq_printf(p, " Watchdog soft-NMI interrupts\n");
+#endif
+
#ifdef CONFIG_PPC_DOORBELL
if (cpu_has_feature(CPU_FTR_DBELL)) {
seq_printf(p, "%*s: ", prec, "DBL");
@@ -494,6 +563,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += per_cpu(irq_stat, cpu).spurious_irqs;
sum += per_cpu(irq_stat, cpu).timer_irqs_others;
sum += per_cpu(irq_stat, cpu).hmi_exceptions;
+ sum += per_cpu(irq_stat, cpu).sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+ sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
+#endif
#ifdef CONFIG_PPC_DOORBELL
sum += per_cpu(irq_stat, cpu).doorbell_irqs;
#endif
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index bb6f8993412e..1df6c74aa731 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -164,7 +164,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
- pr_debug("ISA bridge (early) is %s\n", np->full_name);
+ pr_debug("ISA bridge (early) is %pOF\n", np);
}
/**
@@ -187,15 +187,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
pna = of_n_addr_cells(np);
if (of_property_read_u32(np, "#address-cells", &na) ||
of_property_read_u32(np, "#size-cells", &ns)) {
- pr_warn("ISA: Non-PCI bridge %s is missing address format\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n",
+ np);
return;
}
/* Check it's a supported address format */
if (na != 2 || ns != 1) {
- pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n",
+ np);
return;
}
rs = na + ns + pna;
@@ -203,8 +203,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Grab the ranges property */
ranges = of_get_property(np, "ranges", &rlen);
if (ranges == NULL || rlen < rs) {
- pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n",
+ np);
return;
}
@@ -220,8 +220,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Got something ? */
if (!size || !pbasep) {
- pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n",
+ np);
return;
}
@@ -233,15 +233,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Map pbase */
pbase = of_translate_address(np, pbasep);
if (pbase == OF_BAD_ADDR) {
- pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n",
+ np);
return;
}
/* We need page alignment */
if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) {
- pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n",
+ np);
return;
}
@@ -255,7 +255,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
__ioremap_at(pbase, (void *)ISA_IO_BASE,
size, pgprot_val(pgprot_noncached(__pgprot(0))));
- pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name);
+ pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
/**
@@ -277,8 +277,8 @@ static void isa_bridge_find_late(struct pci_dev *pdev,
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
- pr_debug("ISA bridge (late) is %s on %s\n",
- devnode->full_name, pci_name(pdev));
+ pr_debug("ISA bridge (late) is %pOF on %s\n",
+ devnode, pci_name(pdev));
}
/**
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index dbf098121ce6..35e240a0a408 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -67,9 +67,9 @@ static struct hard_trap_info
#endif
#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
{ 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
-#if defined(CONFIG_8xx)
+#if defined(CONFIG_PPC_8xx)
{ 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
-#else /* ! CONFIG_8xx */
+#else /* ! CONFIG_PPC_8xx */
{ 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
{ 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
{ 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 6c089d9757c9..7a1f99f1b47f 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -25,6 +25,21 @@
#include <linux/preempt.h>
#include <linux/ftrace.h>
+/*
+ * This is called from ftrace code after invoking registered handlers to
+ * disambiguate regs->nip changes done by jprobes and livepatch. We check if
+ * there is an active jprobe at the provided address (mcount location).
+ */
+int __is_active_jprobe(unsigned long addr)
+{
+ if (!preemptible()) {
+ struct kprobe *p = raw_cpu_read(current_kprobe);
+ return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+ }
+
+ return 0;
+}
+
static nokprobe_inline
int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, unsigned long orig_nip)
@@ -60,11 +75,8 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
- unsigned long flags;
- /* Disable irq for emulating a breakpoint and avoiding preempt */
- local_irq_save(flags);
- hard_irq_disable();
+ preempt_disable();
p = get_kprobe((kprobe_opcode_t *)nip);
if (unlikely(!p) || kprobe_disabled(p))
@@ -86,13 +98,17 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs))
__skip_singlestep(p, regs, kcb, orig_nip);
- /*
- * If pre_handler returns !0, it sets regs->nip and
- * resets current kprobe.
- */
+ else {
+ /*
+ * If pre_handler returns !0, it sets regs->nip and
+ * resets current kprobe. In this case, we should not
+ * re-enable preemption.
+ */
+ return;
+ }
}
end:
- local_irq_restore(flags);
+ preempt_enable_no_resched();
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 367494dc67d9..ca5d5a081e75 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,12 +43,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
-int is_current_kprobe_addr(unsigned long addr)
-{
- struct kprobe *p = kprobe_running();
- return (p && (unsigned long)p->addr == addr) ? 1 : 0;
-}
-
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
@@ -59,7 +53,7 @@ bool arch_within_kprobe_blacklist(unsigned long addr)
kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
{
- kprobe_opcode_t *addr;
+ kprobe_opcode_t *addr = NULL;
#ifdef PPC64_ELF_ABI_v2
/* PPC64 ABIv2 needs local entry point */
@@ -91,36 +85,29 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
* Also handle <module:symbol> format.
*/
char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN];
- const char *modsym;
bool dot_appended = false;
- if ((modsym = strchr(name, ':')) != NULL) {
- modsym++;
- if (*modsym != '\0' && *modsym != '.') {
- /* Convert to <module:.symbol> */
- strncpy(dot_name, name, modsym - name);
- dot_name[modsym - name] = '.';
- dot_name[modsym - name + 1] = '\0';
- strncat(dot_name, modsym,
- sizeof(dot_name) - (modsym - name) - 2);
- dot_appended = true;
- } else {
- dot_name[0] = '\0';
- strncat(dot_name, name, sizeof(dot_name) - 1);
- }
- } else if (name[0] != '.') {
- dot_name[0] = '.';
- dot_name[1] = '\0';
- strncat(dot_name, name, KSYM_NAME_LEN - 2);
+ const char *c;
+ ssize_t ret = 0;
+ int len = 0;
+
+ if ((c = strnchr(name, MODULE_NAME_LEN, ':')) != NULL) {
+ c++;
+ len = c - name;
+ memcpy(dot_name, name, len);
+ } else
+ c = name;
+
+ if (*c != '\0' && *c != '.') {
+ dot_name[len++] = '.';
dot_appended = true;
- } else {
- dot_name[0] = '\0';
- strncat(dot_name, name, KSYM_NAME_LEN - 1);
}
- addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
- if (!addr && dot_appended) {
- /* Let's try the original non-dot symbol lookup */
+ ret = strscpy(dot_name + len, c, KSYM_NAME_LEN);
+ if (ret > 0)
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
+
+ /* Fallback to the original non-dot symbol lookup */
+ if (!addr && dot_appended)
addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
- }
#else
addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
#endif
@@ -239,7 +226,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
+static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
unsigned int insn = *p->ainsn.insn;
@@ -261,9 +248,20 @@ int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
*/
printk("Can't step on instruction %x\n", insn);
BUG();
- } else if (ret == 0)
- /* This instruction can't be boosted */
- p->ainsn.boostable = -1;
+ } else {
+ /*
+ * If we haven't previously emulated this instruction, then it
+ * can't be boosted. Note it down so we don't try to do so again.
+ *
+ * If, however, we had emulated this instruction in the past,
+ * then this is just an error with the current run (for
+ * instance, exceptions due to a load/store). We return 0 so
+ * that this is now single-stepped, but continue to try
+ * emulating it in subsequent probe hits.
+ */
+ if (unlikely(p->ainsn.boostable != 1))
+ p->ainsn.boostable = -1;
+ }
return ret;
}
@@ -600,7 +598,12 @@ NOKPROBE_SYMBOL(kprobe_fault_handler);
unsigned long arch_deref_entry_point(void *entry)
{
- return ppc_global_function_entry(entry);
+#ifdef PPC64_ELF_ABI_v1
+ if (!kernel_text_address((unsigned long)entry))
+ return ppc_global_function_entry(entry);
+ else
+#endif
+ return (unsigned long)entry;
}
NOKPROBE_SYMBOL(arch_deref_entry_point);
@@ -634,24 +637,22 @@ NOKPROBE_SYMBOL(setjmp_pre_handler);
void __used jprobe_return(void)
{
- asm volatile("trap" ::: "memory");
+ asm volatile("jprobe_return_trap:\n"
+ "trap\n"
+ ::: "memory");
}
NOKPROBE_SYMBOL(jprobe_return);
-static void __used jprobe_return_end(void)
-{
-}
-NOKPROBE_SYMBOL(jprobe_return_end);
-
int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- /*
- * FIXME - we should ideally be validating that we got here 'cos
- * of the "trap" in jprobe_return() above, before restoring the
- * saved regs...
- */
+ if (regs->nip != ppc_kallsyms_lookup_name("jprobe_return_trap")) {
+ pr_debug("longjmp_break_handler NIP (0x%lx) does not match jprobe_return_trap (0x%lx)\n",
+ regs->nip, ppc_kallsyms_lookup_name("jprobe_return_trap"));
+ return 0;
+ }
+
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
/* It's OK to start function graph tracing again */
unpause_graph_tracing();
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 1086ea37c832..9ad37f827a97 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -25,7 +25,6 @@
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
-#include <linux/nmi.h> /* hardlockup_detector_disable() */
#include <asm/reg.h>
#include <asm/sections.h>
@@ -719,12 +718,6 @@ static __init void kvm_free_tmp(void)
static int __init kvm_guest_init(void)
{
- /*
- * The hardlockup detector is likely to get false positives in
- * KVM guests, so disable it by default.
- */
- hardlockup_detector_disable();
-
if (!kvm_para_available())
goto free_tmp;
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 97ec8557f974..6408f09dbbd9 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -181,7 +181,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
mtctr r4
li r4,0
1:
- lwzx r0,r0,r4
+ lwzx r0,0,r4
addi r4,r4,32 /* Go to start of next cache line */
bdnz 1b
isync
@@ -328,7 +328,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
mtctr r4
li r4,0
1:
- lwzx r0,r0,r4
+ lwzx r0,0,r4
dcbf 0,r4
addi r4,r4,32 /* Go to start of next cache line */
bdnz 1b
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 0694d20f85b6..33b34a58fc62 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
@@ -147,8 +148,8 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
legacy_serial_ports[index].serial_out = tsi_serial_out;
}
- printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
- index, np->full_name);
+ printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n",
+ index, np);
printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
(iotype == UPIO_PORT) ? "port" : "mem",
(unsigned long long)base, (unsigned long long)taddr, irq,
@@ -207,7 +208,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
int index = -1;
u64 taddr;
- DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
+ DBG(" -> add_legacy_isa_port(%pOF)\n", np);
/* Get the ISA port number */
reg = of_get_property(np, "reg", NULL);
@@ -256,7 +257,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
unsigned int flags;
int iotype, index = -1, lindex = 0;
- DBG(" -> add_legacy_pci_port(%s)\n", np->full_name);
+ DBG(" -> add_legacy_pci_port(%pOF)\n", np);
/* We only support ports that have a clock frequency properly
* encoded in the device-tree (that is have an fcode). Anything
@@ -374,7 +375,7 @@ void __init find_legacy_serial_ports(void)
if (path != NULL) {
stdout = of_find_node_by_path(path);
if (stdout)
- DBG("stdout is %s\n", stdout->full_name);
+ DBG("stdout is %pOF\n", stdout);
} else {
DBG(" no linux,stdout-path !\n");
}
@@ -603,7 +604,7 @@ static int __init check_legacy_serial_console(void)
DBG(" can't find stdout package %s !\n", name);
return -ENODEV;
}
- DBG("stdout is %s\n", prom_stdout->full_name);
+ DBG("stdout is %pOF\n", prom_stdout);
name = of_get_property(prom_stdout, "name", NULL);
if (!name) {
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 5c12e21d0d1a..49d34d7271e7 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -360,7 +360,7 @@ void default_machine_kexec(struct kimage *image)
/* NOTREACHED */
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/* Values we need to export to the second kernel via the device tree. */
static unsigned long htab_base;
static unsigned long htab_size;
@@ -402,4 +402,4 @@ static int __init export_htab_values(void)
return 0;
}
late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index 992c0d258e5d..e4395f937d63 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -91,11 +91,13 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
* and that value will be returned. If all free regions are visited without
* func returning non-zero, then zero will be returned.
*/
-int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
+int arch_kexec_walk_mem(struct kexec_buf *kbuf,
+ int (*func)(struct resource *, void *))
{
int ret = 0;
u64 i;
phys_addr_t mstart, mend;
+ struct resource res = { };
if (kbuf->top_down) {
for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0,
@@ -105,7 +107,9 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
* range while in kexec, end points to the last byte
* in the range.
*/
- ret = func(mstart, mend - 1, kbuf);
+ res.start = mstart;
+ res.end = mend - 1;
+ ret = func(&res, kbuf);
if (ret)
break;
}
@@ -117,7 +121,9 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
* range while in kexec, end points to the last byte
* in the range.
*/
- ret = func(mstart, mend - 1, kbuf);
+ res.start = mstart;
+ res.end = mend - 1;
+ ret = func(&res, kbuf);
if (ret)
break;
}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index e0e131e662ed..742e4658c5dc 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -22,11 +22,14 @@
#undef DEBUG
#define pr_fmt(fmt) "mce: " fmt
+#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/irq_work.h>
+
+#include <asm/machdep.h>
#include <asm/mce.h>
static DEFINE_PER_CPU(int, mce_nest_count);
@@ -36,11 +39,21 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
static DEFINE_PER_CPU(int, mce_queue_count);
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+/* Queue for delayed MCE UE events. */
+static DEFINE_PER_CPU(int, mce_ue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
+ mce_ue_event_queue);
+
static void machine_check_process_queued_event(struct irq_work *work);
+void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_process_ue_event(struct work_struct *work);
+
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+
static void mce_set_error_info(struct machine_check_event *mce,
struct mce_error_info *mce_err)
{
@@ -79,7 +92,7 @@ static void mce_set_error_info(struct machine_check_event *mce,
*/
void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
- uint64_t nip, uint64_t addr)
+ uint64_t nip, uint64_t addr, uint64_t phys_addr)
{
int index = __this_cpu_inc_return(mce_nest_count) - 1;
struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
@@ -137,6 +150,11 @@ void save_mce_event(struct pt_regs *regs, long handled,
} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
mce->u.ue_error.effective_address_provided = true;
mce->u.ue_error.effective_address = addr;
+ if (phys_addr != ULONG_MAX) {
+ mce->u.ue_error.physical_address_provided = true;
+ mce->u.ue_error.physical_address = phys_addr;
+ machine_check_ue_event(mce);
+ }
}
return;
}
@@ -190,6 +208,26 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_ue_event(struct machine_check_event *evt)
+{
+ int index;
+
+ index = __this_cpu_inc_return(mce_ue_count) - 1;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ __this_cpu_dec(mce_ue_count);
+ return;
+ }
+ memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
+
+ /* Queue work to process this event later. */
+ schedule_work(&mce_ue_event_work);
+}
+
/*
* Queue up the MCE event which then can be handled later.
*/
@@ -212,7 +250,39 @@ void machine_check_queue_event(void)
/* Queue irq work to process this event later. */
irq_work_queue(&mce_event_process_work);
}
-
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_process_ue_event(struct work_struct *work)
+{
+ int index;
+ struct machine_check_event *evt;
+
+ while (__this_cpu_read(mce_ue_count) > 0) {
+ index = __this_cpu_read(mce_ue_count) - 1;
+ evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * This should probably queued elsewhere, but
+ * oh! well
+ */
+ if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.physical_address_provided) {
+ unsigned long pfn;
+
+ pfn = evt->u.ue_error.physical_address >>
+ PAGE_SHIFT;
+ memory_failure(pfn, SIGBUS, 0);
+ } else
+ pr_warn("Failed to identify bad address from "
+ "where the uncorrectable error (UE) "
+ "was generated\n");
+ }
+#endif
+ __this_cpu_dec(mce_ue_count);
+ }
+}
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
@@ -220,6 +290,7 @@ void machine_check_queue_event(void)
static void machine_check_process_queued_event(struct irq_work *work)
{
int index;
+ struct machine_check_event *evt;
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -229,8 +300,8 @@ static void machine_check_process_queued_event(struct irq_work *work)
*/
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
- machine_check_print_event_info(
- this_cpu_ptr(&mce_event_queue[index]), false);
+ evt = this_cpu_ptr(&mce_event_queue[index]);
+ machine_check_print_event_info(evt, false);
__this_cpu_dec(mce_queue_count);
}
}
@@ -337,7 +408,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
printk("%s Effective address: %016llx\n",
level, evt->u.ue_error.effective_address);
if (evt->u.ue_error.physical_address_provided)
- printk("%s Physical address: %016llx\n",
+ printk("%s Physical address: %016llx\n",
level, evt->u.ue_error.physical_address);
break;
case MCE_ERROR_TYPE_SLB:
@@ -408,41 +479,60 @@ void machine_check_print_event_info(struct machine_check_event *evt,
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
-uint64_t get_mce_fault_addr(struct machine_check_event *evt)
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+long machine_check_early(struct pt_regs *regs)
{
- switch (evt->error_type) {
- case MCE_ERROR_TYPE_UE:
- if (evt->u.ue_error.effective_address_provided)
- return evt->u.ue_error.effective_address;
- break;
- case MCE_ERROR_TYPE_SLB:
- if (evt->u.slb_error.effective_address_provided)
- return evt->u.slb_error.effective_address;
- break;
- case MCE_ERROR_TYPE_ERAT:
- if (evt->u.erat_error.effective_address_provided)
- return evt->u.erat_error.effective_address;
- break;
- case MCE_ERROR_TYPE_TLB:
- if (evt->u.tlb_error.effective_address_provided)
- return evt->u.tlb_error.effective_address;
- break;
- case MCE_ERROR_TYPE_USER:
- if (evt->u.user_error.effective_address_provided)
- return evt->u.user_error.effective_address;
- break;
- case MCE_ERROR_TYPE_RA:
- if (evt->u.ra_error.effective_address_provided)
- return evt->u.ra_error.effective_address;
- break;
- case MCE_ERROR_TYPE_LINK:
- if (evt->u.link_error.effective_address_provided)
- return evt->u.link_error.effective_address;
- break;
- default:
- case MCE_ERROR_TYPE_UNKNOWN:
- break;
+ long handled = 0;
+
+ __this_cpu_inc(irq_stat.mce_exceptions);
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+ return handled;
+}
+
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+ __this_cpu_inc(irq_stat.hmi_exceptions);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
+ if (pvr_version_is(PVR_POWER9)) {
+ unsigned long hmer = mfspr(SPRN_HMER);
+
+ /* Do we have the debug bit set */
+ if (hmer & PPC_BIT(17)) {
+ hmer &= ~PPC_BIT(17);
+ mtspr(SPRN_HMER, hmer);
+
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * user space
+ */
+ if (user_mode(regs))
+ local_paca->hmi_p9_special_emu = 1;
+
+ /*
+ * Don't bother going to OPAL if that's the
+ * only relevant bit.
+ */
+ if (!(hmer & mfspr(SPRN_HMEER)))
+ return local_paca->hmi_p9_special_emu;
+ }
}
- return 0;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+ wait_for_subcore_guest_exit();
+
+ if (ppc_md.hmi_exception_early)
+ ppc_md.hmi_exception_early(regs);
+
+ wait_for_tb_resync();
+
+ return 1;
}
-EXPORT_SYMBOL(get_mce_fault_addr);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index b76ca198e09c..644f7040b91c 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -27,6 +27,36 @@
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/pte-walk.h>
+#include <asm/sstep.h>
+#include <asm/exception-64s.h>
+
+/*
+ * Convert an address related to an mm to a PFN. NOTE: we are in real
+ * mode, we could potentially race with page table updates.
+ */
+static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+{
+ pte_t *ptep;
+ unsigned long flags;
+ struct mm_struct *mm;
+
+ if (user_mode(regs))
+ mm = current->mm;
+ else
+ mm = &init_mm;
+
+ local_irq_save(flags);
+ if (mm == current->mm)
+ ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
+ else
+ ptep = find_init_mm_pte(addr, NULL);
+ local_irq_restore(flags);
+ if (!ptep || pte_special(*ptep))
+ return ULONG_MAX;
+ return pte_pfn(*ptep);
+}
static void flush_tlb_206(unsigned int num_sets, unsigned int action)
{
@@ -128,7 +158,7 @@ void __flush_tlb_power9(unsigned int action)
{
unsigned int num_sets;
- if (radix_enabled())
+ if (early_radix_enabled())
num_sets = POWER9_TLB_SETS_RADIX;
else
num_sets = POWER9_TLB_SETS_HASH;
@@ -138,7 +168,7 @@ void __flush_tlb_power9(unsigned int action)
/* flush SLBs and reload */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
static void flush_and_reload_slb(void)
{
struct slb_shadow *slb;
@@ -185,7 +215,7 @@ static void flush_erat(void)
static int mce_flush(int what)
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
return 1;
@@ -421,9 +451,45 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
{ 0, false, 0, 0, 0, 0 } };
+static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+ uint64_t *phys_addr)
+{
+ /*
+ * Carefully look at the NIP to determine
+ * the instruction to analyse. Reading the NIP
+ * in real-mode is tricky and can lead to recursive
+ * faults
+ */
+ int instr;
+ unsigned long pfn, instr_addr;
+ struct instruction_op op;
+ struct pt_regs tmp = *regs;
+
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
+ instr = *(unsigned int *)(instr_addr);
+ if (!analyse_instr(&op, &tmp, instr)) {
+ pfn = addr_to_pfn(regs, op.ea);
+ *addr = op.ea;
+ *phys_addr = (pfn << PAGE_SHIFT);
+ return 0;
+ }
+ /*
+ * analyse_instr() might fail if the instruction
+ * is not a load/store, although this is unexpected
+ * for load/store errors or if we got the NIP
+ * wrong
+ */
+ }
+ *addr = 0;
+ return -1;
+}
+
static int mce_handle_ierror(struct pt_regs *regs,
const struct mce_ierror_table table[],
- struct mce_error_info *mce_err, uint64_t *addr)
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
uint64_t srr1 = regs->msr;
int handled = 0;
@@ -475,8 +541,22 @@ static int mce_handle_ierror(struct pt_regs *regs,
}
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
- if (table[i].nip_valid)
+ if (table[i].nip_valid) {
*addr = regs->nip;
+ if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ unsigned long pfn;
+
+ if (get_paca()->in_mce < MAX_MCE_DEPTH) {
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ *phys_addr =
+ (pfn << PAGE_SHIFT);
+ handled = 1;
+ }
+ }
+ }
+ }
return handled;
}
@@ -489,7 +569,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
static int mce_handle_derror(struct pt_regs *regs,
const struct mce_derror_table table[],
- struct mce_error_info *mce_err, uint64_t *addr)
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
uint64_t dsisr = regs->dsisr;
int handled = 0;
@@ -555,7 +636,17 @@ static int mce_handle_derror(struct pt_regs *regs,
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
-
+ else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ /*
+ * We do a maximum of 4 nested MCE calls, see
+ * kernel/exception-64s.h
+ */
+ if (get_paca()->in_mce < MAX_MCE_DEPTH)
+ if (!mce_find_instr_ea_and_pfn(regs, addr,
+ phys_addr))
+ handled = 1;
+ }
found = 1;
}
@@ -592,19 +683,21 @@ static long mce_handle_error(struct pt_regs *regs,
const struct mce_ierror_table itable[])
{
struct mce_error_info mce_err = { 0 };
- uint64_t addr;
+ uint64_t addr, phys_addr;
uint64_t srr1 = regs->msr;
long handled;
if (SRR1_MC_LOADSTORE(srr1))
- handled = mce_handle_derror(regs, dtable, &mce_err, &addr);
+ handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
+ &phys_addr);
else
- handled = mce_handle_ierror(regs, itable, &mce_err, &addr);
+ handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
+ &phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
handled = mce_handle_ue_error(regs);
- save_mce_event(regs, handled, &mce_err, regs->nip, addr);
+ save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
return handled;
}
@@ -624,5 +717,18 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
long __machine_check_early_realmode_p9(struct pt_regs *regs)
{
+ /*
+ * On POWER9 DD2.1 and below, it's possible to get a machine check
+ * caused by a paste instruction where only DSISR bit 25 is set. This
+ * will result in the MCE handler seeing an unknown event and the kernel
+ * crashing. An MCE that occurs like this is spurious, so we don't need
+ * to do anything in terms of servicing it. If there is something that
+ * needs to be serviced, the CPU will raise the MCE again with the
+ * correct DSISR so that it can be serviced properly. So detect this
+ * case and mark it as handled.
+ */
+ if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
+ return 1;
+
return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 0b0f89685b67..759104b99f9f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -429,7 +429,8 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
/* Find this stub, or if that fails, the next avail. entry */
stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
- BUG_ON(i >= num_stubs);
+ if (WARN_ON(i >= num_stubs))
+ return 0;
if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
return (unsigned long)&stubs[i];
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 34aeac54f120..becaec990140 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -45,7 +45,7 @@ static int of_pci_phb_probe(struct platform_device *dev)
if (ppc_md.pci_setup_phb == NULL)
return -ENODEV;
- pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name);
+ pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node);
/* Alloc and setup PHB data structure */
phb = pcibios_alloc_controller(dev->dev.of_node);
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 6f8273f5e988..8237884ca389 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -104,8 +104,10 @@ static unsigned long can_optimize(struct kprobe *p)
* and that can be emulated.
*/
if (!is_conditional_branch(*p->ainsn.insn) &&
- analyse_instr(&op, &regs, *p->ainsn.insn))
+ analyse_instr(&op, &regs, *p->ainsn.insn) == 1) {
+ emulate_update_regs(&regs, &op);
nip = regs.nip;
+ }
return nip;
}
@@ -113,32 +115,23 @@ static unsigned long can_optimize(struct kprobe *p)
static void optimized_callback(struct optimized_kprobe *op,
struct pt_regs *regs)
{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long flags;
-
/* This is possible if op is under delayed unoptimizing */
if (kprobe_disabled(&op->kp))
return;
- local_irq_save(flags);
- hard_irq_disable();
+ preempt_disable();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
} else {
__this_cpu_write(current_kprobe, &op->kp);
regs->nip = (unsigned long)op->kp.addr;
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
}
- /*
- * No need for an explicit __hard_irq_enable() here.
- * local_irq_restore() will re-enable interrupts,
- * if they were hard disabled.
- */
- local_irq_restore(flags);
+ preempt_enable_no_resched();
}
NOKPROBE_SYMBOL(optimized_callback);
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 4937bef7652f..52fc864cdec4 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -60,10 +60,6 @@ optprobe_template_entry:
std r5,_CCR(r1)
lbz r5,PACASOFTIRQEN(r13)
std r5,SOFTE(r1)
- mfdar r5
- std r5,_DAR(r1)
- mfdsisr r5
- std r5,_DSISR(r1)
/*
* We may get here from a module, so load the kernel TOC in r2.
@@ -122,10 +118,6 @@ optprobe_template_call_emulate:
mtxer r5
ld r5,_CCR(r1)
mtcr r5
- ld r5,_DAR(r1)
- mtdar r5
- ld r5,_DSISR(r1)
- mtdsisr r5
REST_GPR(0,r1)
REST_10GPRS(2,r1)
REST_10GPRS(12,r1)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 8d63627e067f..d6597038931d 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -90,7 +90,7 @@ static inline void free_lppacas(void) { }
#endif /* CONFIG_PPC_BOOK3S */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* 3 persistent SLBs are registered here. The buffer will be zero
@@ -99,18 +99,27 @@ static inline void free_lppacas(void) { }
* If you make the number of persistent SLB entries dynamic, please also
* update PR KVM to flush and restore them accordingly.
*/
-static struct slb_shadow *slb_shadow;
+static struct slb_shadow * __initdata slb_shadow;
static void __init allocate_slb_shadows(int nr_cpus, int limit)
{
int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
+
+ if (early_radix_enabled())
+ return;
+
slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
memset(slb_shadow, 0, size);
}
static struct slb_shadow * __init init_slb_shadow(int cpu)
{
- struct slb_shadow *s = &slb_shadow[cpu];
+ struct slb_shadow *s;
+
+ if (early_radix_enabled())
+ return NULL;
+
+ s = &slb_shadow[cpu];
/*
* When we come through here to initialise boot_paca, the slb_shadow
@@ -126,11 +135,11 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
return s;
}
-#else /* CONFIG_PPC_STD_MMU_64 */
+#else /* !CONFIG_PPC_BOOK3S_64 */
static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
/* The Paca is an array with one entry per processor. Each contains an
* lppaca, which contains the information shared between the
@@ -161,9 +170,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif
#ifdef CONFIG_PPC_BOOK3E
/* For now -- if we have threads this will be adjusted later */
@@ -215,7 +224,7 @@ void __init allocate_pacas(void)
paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
memset(paca, 0, paca_size);
- printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
+ printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n",
paca_size, nr_cpu_ids, paca);
allocate_lppacas(nr_cpu_ids, limit);
@@ -253,8 +262,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_id = context->id;
#ifdef CONFIG_PPC_MM_SLICES
- VM_BUG_ON(!mm->context.addr_limit);
- get_paca()->addr_limit = mm->context.addr_limit;
+ VM_BUG_ON(!mm->context.slb_addr_limit);
+ get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
memcpy(&get_paca()->mm_ctx_high_slices_psize,
&context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
@@ -262,7 +271,7 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
#endif
-#else /* CONFIG_PPC_BOOK3S */
+#else /* !CONFIG_PPC_BOOK3S */
return;
#endif
}
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 341a7469cab8..0ac7aa346c69 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -373,9 +373,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
if (virq)
irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
} else {
- pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
- oirq.args_count, oirq.args[0], oirq.args[1],
- of_node_full_name(oirq.np));
+ pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n",
+ oirq.args_count, oirq.args[0], oirq.args[1], oirq.np);
virq = irq_create_of_mapping(&oirq);
}
@@ -741,8 +740,8 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct of_pci_range range;
struct of_pci_range_parser parser;
- printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
- dev->full_name, primary ? "(primary)" : "");
+ printk(KERN_INFO "PCI host bridge %pOF %s ranges:\n",
+ dev, primary ? "(primary)" : "");
/* Check for ranges property */
if (of_pci_range_parser_init(&parser, dev))
@@ -1556,8 +1555,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
if (!res->flags) {
pr_debug("PCI: I/O resource not set for host"
- " bridge %s (domain %d)\n",
- hose->dn->full_name, hose->global_number);
+ " bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
} else {
offset = pcibios_io_space_offset(hose);
@@ -1668,7 +1667,7 @@ void pcibios_scan_phb(struct pci_controller *hose)
struct device_node *node = hose->dn;
int mode;
- pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));
+ pr_debug("PCI: Scanning PHB %pOF\n", node);
/* Get some IO space for the new PHB */
pcibios_setup_phb_io_space(hose);
@@ -1741,15 +1740,3 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
-
-static void fixup_vga(struct pci_dev *pdev)
-{
- u16 cmd;
-
- pci_read_config_word(pdev, PCI_COMMAND, &cmd);
- if ((cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) || !vga_default_device())
- vga_set_default_device(pdev);
-
-}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_DISPLAY_VGA, 8, fixup_vga);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 41c86c6b6e4d..1d817f4d97d9 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -79,8 +79,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
return;
bus_range = of_get_property(node, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s, "
- "assuming it starts at 0\n", node->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, "
+ "assuming it starts at 0\n", node);
pci_to_OF_bus_map[pci_bus] = 0;
} else
pci_to_OF_bus_map[pci_bus] = bus_range[0];
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index ed5e9ff61a68..15ce0306b092 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -90,14 +90,14 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
* to do an appropriate TLB flush here too
*/
if (bus->self) {
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
struct resource *res = bus->resource[0];
#endif
pr_debug("IO unmapping for PCI-PCI bridge %s\n",
pci_name(bus->self));
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
__flush_hash_table_range(&init_mm, res->start + _IO_BASE,
res->end + _IO_BASE + 1);
#endif
@@ -111,7 +111,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
if (hose->io_base_alloc == NULL)
return 0;
- pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name);
+ pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
/* This is a PHB, we fully unmap the IO area */
@@ -151,7 +151,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
hose->io_base_virt = (void __iomem *)(area->addr +
hose->io_base_phys - phys_page);
- pr_debug("IO mapping for PHB %s\n", hose->dn->full_name);
+ pr_debug("IO mapping for PHB %pOF\n", hose->dn);
pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 592693437070..0e395afbf0f4 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -139,7 +139,6 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
#ifdef CONFIG_PCI_IOV
static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
- struct pci_dev *pdev,
int vf_index,
int busno, int devfn)
{
@@ -150,10 +149,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
return NULL;
pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
- if (!pdn) {
- dev_warn(&pdev->dev, "%s: Out of memory!\n", __func__);
+ if (!pdn)
return NULL;
- }
pdn->phb = parent->phb;
pdn->parent = parent;
@@ -167,13 +164,6 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
INIT_LIST_HEAD(&pdn->list);
list_add_tail(&pdn->list, &parent->child_list);
- /*
- * If we already have PCI device instance, lets
- * bind them.
- */
- if (pdev)
- pdev->dev.archdata.pci_data = pdn;
-
return pdn;
}
#endif
@@ -201,7 +191,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
struct eeh_dev *edev __maybe_unused;
- pdn = add_one_dev_pci_data(parent, NULL, i,
+ pdn = add_one_dev_pci_data(parent, i,
pci_iov_virtfn_bus(pdev, i),
pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
@@ -303,7 +293,6 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
if (pdn == NULL)
return NULL;
dn->data = pdn;
- pdn->node = dn;
pdn->phb = hose;
#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
@@ -352,6 +341,7 @@ EXPORT_SYMBOL_GPL(pci_add_device_node_info);
void pci_remove_device_node_info(struct device_node *dn)
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+ struct device_node *parent;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -364,8 +354,10 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
- if (pdn->parent)
- of_node_put(pdn->parent->node);
+
+ parent = of_get_parent(dn);
+ if (parent)
+ of_node_put(parent);
dn->data = NULL;
kfree(pdn);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index ea3d98115b88..0d790f8432d2 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -211,19 +211,19 @@ void of_scan_pci_bridge(struct pci_dev *dev)
unsigned int flags;
u64 size;
- pr_debug("of_scan_pci_bridge(%s)\n", node->full_name);
+ pr_debug("of_scan_pci_bridge(%pOF)\n", node);
/* parse bus-range property */
busrange = of_get_property(node, "bus-range", &len);
if (busrange == NULL || len != 8) {
- printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
- node->full_name);
+ printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %pOF\n",
+ node);
return;
}
ranges = of_get_property(node, "ranges", &len);
if (ranges == NULL) {
- printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
- node->full_name);
+ printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %pOF\n",
+ node);
return;
}
@@ -233,8 +233,8 @@ void of_scan_pci_bridge(struct pci_dev *dev)
bus = pci_add_new_bus(dev->bus, dev,
of_read_number(busrange, 1));
if (!bus) {
- printk(KERN_ERR "Failed to create pci bus for %s\n",
- node->full_name);
+ printk(KERN_ERR "Failed to create pci bus for %pOF\n",
+ node);
return;
}
}
@@ -262,13 +262,13 @@ void of_scan_pci_bridge(struct pci_dev *dev)
res = bus->resource[0];
if (res->flags) {
printk(KERN_ERR "PCI: ignoring extra I/O range"
- " for bridge %s\n", node->full_name);
+ " for bridge %pOF\n", node);
continue;
}
} else {
if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
printk(KERN_ERR "PCI: too many memory ranges"
- " for bridge %s\n", node->full_name);
+ " for bridge %pOF\n", node);
continue;
}
res = bus->resource[i];
@@ -307,7 +307,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn));
#endif
- pr_debug(" * %s\n", dn->full_name);
+ pr_debug(" * %pOF\n", dn);
if (!of_device_is_available(dn))
return NULL;
@@ -350,8 +350,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
struct device_node *child;
struct pci_dev *dev;
- pr_debug("of_scan_bus(%s) bus no %d...\n",
- node->full_name, bus->number);
+ pr_debug("of_scan_bus(%pOF) bus no %d...\n",
+ node, bus->number);
/* Scan direct children */
for_each_child_of_node(node, child) {
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9f3e2c932dcc..bfdd783e3916 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -77,6 +77,13 @@
extern unsigned long _get_SP(void);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Are we running in "Suspend disabled" mode? If so we have to block any
+ * sigreturn that would get us into suspended state, and we also warn in some
+ * other paths that we should never reach with suspend disabled.
+ */
+bool tm_suspend_disabled __ro_after_init = false;
+
static void check_if_tm_restore_required(struct task_struct *tsk)
{
/*
@@ -97,9 +104,23 @@ static inline bool msr_tm_active(unsigned long msr)
{
return MSR_TM_ACTIVE(msr);
}
+
+static bool tm_active_with_fp(struct task_struct *tsk)
+{
+ return msr_tm_active(tsk->thread.regs->msr) &&
+ (tsk->thread.ckpt_regs.msr & MSR_FP);
+}
+
+static bool tm_active_with_altivec(struct task_struct *tsk)
+{
+ return msr_tm_active(tsk->thread.regs->msr) &&
+ (tsk->thread.ckpt_regs.msr & MSR_VEC);
+}
#else
static inline bool msr_tm_active(unsigned long msr) { return false; }
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
+static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
+static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
bool strict_msr_control;
@@ -230,8 +251,9 @@ void enable_kernel_fp(void)
}
EXPORT_SYMBOL(enable_kernel_fp);
-static int restore_fp(struct task_struct *tsk) {
- if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) {
+static int restore_fp(struct task_struct *tsk)
+{
+ if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
return 1;
@@ -313,7 +335,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
static int restore_altivec(struct task_struct *tsk)
{
if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
- (tsk->thread.load_vec || msr_tm_active(tsk->thread.regs->msr))) {
+ (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
load_vr_state(&tsk->thread.vr_state);
tsk->thread.used_vr = 1;
tsk->thread.load_vec++;
@@ -330,11 +352,19 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#ifdef CONFIG_VSX
static void __giveup_vsx(struct task_struct *tsk)
{
- if (tsk->thread.regs->msr & MSR_FP)
+ unsigned long msr = tsk->thread.regs->msr;
+
+ /*
+ * We should never be ssetting MSR_VSX without also setting
+ * MSR_FP and MSR_VEC
+ */
+ WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
+
+ /* __giveup_fpu will clear MSR_VSX */
+ if (msr & MSR_FP)
__giveup_fpu(tsk);
- if (tsk->thread.regs->msr & MSR_VEC)
+ if (msr & MSR_VEC)
__giveup_altivec(tsk);
- tsk->thread.regs->msr &= ~MSR_VSX;
}
static void giveup_vsx(struct task_struct *tsk)
@@ -346,14 +376,6 @@ static void giveup_vsx(struct task_struct *tsk)
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
}
-static void save_vsx(struct task_struct *tsk)
-{
- if (tsk->thread.regs->msr & MSR_FP)
- save_fpu(tsk);
- if (tsk->thread.regs->msr & MSR_VEC)
- save_altivec(tsk);
-}
-
void enable_kernel_vsx(void)
{
unsigned long cpumsr;
@@ -362,7 +384,8 @@ void enable_kernel_vsx(void)
cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
- if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
+ if (current->thread.regs &&
+ (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
check_if_tm_restore_required(current);
/*
* If a thread has already been reclaimed then the
@@ -373,10 +396,6 @@ void enable_kernel_vsx(void)
*/
if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
return;
- if (current->thread.regs->msr & MSR_FP)
- __giveup_fpu(current);
- if (current->thread.regs->msr & MSR_VEC)
- __giveup_altivec(current);
__giveup_vsx(current);
}
}
@@ -386,7 +405,7 @@ void flush_vsx_to_thread(struct task_struct *tsk)
{
if (tsk->thread.regs) {
preempt_disable();
- if (tsk->thread.regs->msr & MSR_VSX) {
+ if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
BUG_ON(tsk != current);
giveup_vsx(tsk);
}
@@ -406,7 +425,6 @@ static int restore_vsx(struct task_struct *tsk)
}
#else
static inline int restore_vsx(struct task_struct *tsk) { return 0; }
-static inline void save_vsx(struct task_struct *tsk) { }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -486,6 +504,8 @@ void giveup_all(struct task_struct *tsk)
msr_check_and_set(msr_all_available);
check_if_tm_restore_required(tsk);
+ WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
+
#ifdef CONFIG_PPC_FPU
if (usermsr & MSR_FP)
__giveup_fpu(tsk);
@@ -494,10 +514,6 @@ void giveup_all(struct task_struct *tsk)
if (usermsr & MSR_VEC)
__giveup_altivec(tsk);
#endif
-#ifdef CONFIG_VSX
- if (usermsr & MSR_VSX)
- __giveup_vsx(tsk);
-#endif
#ifdef CONFIG_SPE
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
@@ -511,10 +527,6 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
- /*
- * Syscall exit makes a similar initial check before branching
- * to restore_math. Keep them in synch.
- */
if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
@@ -556,19 +568,13 @@ void save_all(struct task_struct *tsk)
msr_check_and_set(msr_all_available);
- /*
- * Saving the way the register space is in hardware, save_vsx boils
- * down to a save_fpu() and save_altivec()
- */
- if (usermsr & MSR_VSX) {
- save_vsx(tsk);
- } else {
- if (usermsr & MSR_FP)
- save_fpu(tsk);
+ WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
- if (usermsr & MSR_VEC)
- save_altivec(tsk);
- }
+ if (usermsr & MSR_FP)
+ save_fpu(tsk);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(tsk);
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
@@ -868,6 +874,10 @@ static void tm_reclaim_thread(struct thread_struct *thr,
if (!MSR_TM_SUSPENDED(mfmsr()))
return;
+ giveup_all(container_of(thr, struct task_struct, thread));
+
+ tm_reclaim(thr, cause);
+
/*
* If we are in a transaction and FP is off then we can't have
* used FP inside that transaction. Hence the checkpointed
@@ -886,10 +896,6 @@ static void tm_reclaim_thread(struct thread_struct *thr,
if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
memcpy(&thr->ckvr_state, &thr->vr_state,
sizeof(struct thread_vr_state));
-
- giveup_all(container_of(thr, struct task_struct, thread));
-
- tm_reclaim(thr, thr->ckpt_regs.msr, cause);
}
void tm_reclaim_current(uint8_t cause)
@@ -918,6 +924,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
if (!MSR_TM_ACTIVE(thr->regs->msr))
goto out_and_saveregs;
+ WARN_ON(tm_suspend_disabled);
+
TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
"ccr=%lx, msr=%lx, trap=%lx)\n",
tsk->pid, thr->regs->nip,
@@ -938,11 +946,9 @@ out_and_saveregs:
tm_save_sprs(thr);
}
-extern void __tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr);
+extern void __tm_recheckpoint(struct thread_struct *thread);
-void tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr)
+void tm_recheckpoint(struct thread_struct *thread)
{
unsigned long flags;
@@ -961,15 +967,13 @@ void tm_recheckpoint(struct thread_struct *thread,
*/
tm_restore_sprs(thread);
- __tm_recheckpoint(thread, orig_msr);
+ __tm_recheckpoint(thread);
local_irq_restore(flags);
}
static inline void tm_recheckpoint_new_task(struct task_struct *new)
{
- unsigned long msr;
-
if (!cpu_has_feature(CPU_FTR_TM))
return;
@@ -988,13 +992,11 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
tm_restore_sprs(&new->thread);
return;
}
- msr = new->thread.ckpt_regs.msr;
/* Recheckpoint to restore original checkpointed register state. */
- TM_DEBUG("*** tm_recheckpoint of pid %d "
- "(new->msr 0x%lx, new->origmsr 0x%lx)\n",
- new->pid, new->thread.regs->msr, msr);
+ TM_DEBUG("*** tm_recheckpoint of pid %d (new->msr 0x%lx)\n",
+ new->pid, new->thread.regs->msr);
- tm_recheckpoint(&new->thread, msr);
+ tm_recheckpoint(&new->thread);
/*
* The checkpointed state has been restored but the live state has
@@ -1134,6 +1136,10 @@ static inline void restore_sprs(struct thread_struct *old_thread,
if (old_thread->tar != new_thread->tar)
mtspr(SPRN_TAR, new_thread->tar);
}
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ old_thread->tidr != new_thread->tidr)
+ mtspr(SPRN_TIDR, new_thread->tidr);
#endif
}
@@ -1170,7 +1176,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
}
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1178,7 +1184,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
__flush_tlb_pending(batch);
batch->active = 0;
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
switch_booke_debug_regs(&new->thread.debug);
@@ -1224,7 +1230,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
last = _switch(old_thread, new_thread);
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
batch = this_cpu_ptr(&ppc64_tlb_batch);
@@ -1238,22 +1244,22 @@ struct task_struct *__switch_to(struct task_struct *prev,
* The copy-paste buffer can only store into foreign real
* addresses, so unprivileged processes can not see the
* data or use it in any way unless they have foreign real
- * mappings. We don't have a VAS driver that allocates those
- * yet, so no cpabort is required.
+ * mappings. If the new process has the foreign real address
+ * mappings, we must issue a cp_abort to clear any state and
+ * prevent snooping, corruption or a covert channel.
+ *
+ * DD1 allows paste into normal system memory so we do an
+ * unpaired copy, rather than cp_abort, to clear the buffer,
+ * since cp_abort is quite expensive.
*/
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- /*
- * DD1 allows paste into normal system memory, so we
- * do an unpaired copy here to clear the buffer and
- * prevent a covert channel being set up.
- *
- * cpabort is not used because it is quite expensive.
- */
+ if (current_thread_info()->task->thread.used_vas) {
+ asm volatile(PPC_CP_ABORT);
+ } else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
asm volatile(PPC_COPY(%0, %1)
: : "r"(dummy_copy_buffer), "r"(0));
}
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
}
@@ -1395,13 +1401,13 @@ void show_regs(struct pt_regs * regs)
show_regs_print_info(KERN_DEFAULT);
- printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
+ printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
printk("REGS: %p TRAP: %04lx %s (%s)\n",
regs, regs->trap, print_tainted(), init_utsname()->release);
- printk("MSR: "REG" ", regs->msr);
+ printk("MSR: "REG" ", regs->msr);
print_msr_bits(regs->msr);
- printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
+ pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
@@ -1449,6 +1455,137 @@ void flush_thread(void)
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
+int set_thread_uses_vas(void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -EINVAL;
+
+ current->thread.used_vas = 1;
+
+ /*
+ * Even a process that has no foreign real address mapping can use
+ * an unpaired COPY instruction (to no real effect). Issue CP_ABORT
+ * to clear any pending COPY and prevent a covert channel.
+ *
+ * __switch_to() will issue CP_ABORT on future context switches.
+ */
+ asm volatile(PPC_CP_ABORT);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static DEFINE_SPINLOCK(vas_thread_id_lock);
+static DEFINE_IDA(vas_thread_ida);
+
+/*
+ * We need to assign a unique thread id to each thread in a process.
+ *
+ * This thread id, referred to as TIDR, and separate from the Linux's tgid,
+ * is intended to be used to direct an ASB_Notify from the hardware to the
+ * thread, when a suitable event occurs in the system.
+ *
+ * One such event is a "paste" instruction in the context of Fast Thread
+ * Wakeup (aka Core-to-core wake up in the Virtual Accelerator Switchboard
+ * (VAS) in POWER9.
+ *
+ * To get a unique TIDR per process we could simply reuse task_pid_nr() but
+ * the problem is that task_pid_nr() is not yet available copy_thread() is
+ * called. Fixing that would require changing more intrusive arch-neutral
+ * code in code path in copy_process()?.
+ *
+ * Further, to assign unique TIDRs within each process, we need an atomic
+ * field (or an IDR) in task_struct, which again intrudes into the arch-
+ * neutral code. So try to assign globally unique TIDRs for now.
+ *
+ * NOTE: TIDR 0 indicates that the thread does not need a TIDR value.
+ * For now, only threads that expect to be notified by the VAS
+ * hardware need a TIDR value and we assign values > 0 for those.
+ */
+#define MAX_THREAD_CONTEXT ((1 << 16) - 1)
+static int assign_thread_tidr(void)
+{
+ int index;
+ int err;
+
+again:
+ if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL))
+ return -ENOMEM;
+
+ spin_lock(&vas_thread_id_lock);
+ err = ida_get_new_above(&vas_thread_ida, 1, &index);
+ spin_unlock(&vas_thread_id_lock);
+
+ if (err == -EAGAIN)
+ goto again;
+ else if (err)
+ return err;
+
+ if (index > MAX_THREAD_CONTEXT) {
+ spin_lock(&vas_thread_id_lock);
+ ida_remove(&vas_thread_ida, index);
+ spin_unlock(&vas_thread_id_lock);
+ return -ENOMEM;
+ }
+
+ return index;
+}
+
+static void free_thread_tidr(int id)
+{
+ spin_lock(&vas_thread_id_lock);
+ ida_remove(&vas_thread_ida, id);
+ spin_unlock(&vas_thread_id_lock);
+}
+
+/*
+ * Clear any TIDR value assigned to this thread.
+ */
+void clear_thread_tidr(struct task_struct *t)
+{
+ if (!t->thread.tidr)
+ return;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ mtspr(SPRN_TIDR, 0);
+ free_thread_tidr(t->thread.tidr);
+ t->thread.tidr = 0;
+}
+
+void arch_release_task_struct(struct task_struct *t)
+{
+ clear_thread_tidr(t);
+}
+
+/*
+ * Assign a unique TIDR (thread id) for task @t and set it in the thread
+ * structure. For now, we only support setting TIDR for 'current' task.
+ */
+int set_thread_tidr(struct task_struct *t)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -EINVAL;
+
+ if (t != current)
+ return -EINVAL;
+
+ t->thread.tidr = assign_thread_tidr();
+ if (t->thread.tidr < 0)
+ return t->thread.tidr;
+
+ mtspr(SPRN_TIDR, t->thread.tidr);
+
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
void
release_thread(struct task_struct *t)
{
@@ -1482,7 +1619,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -1595,6 +1732,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
}
if (cpu_has_feature(CPU_FTR_HAS_PPR))
p->thread.ppr = INIT_PPR;
+
+ p->thread.tidr = 0;
#endif
kregs->nip = ppc_function_entry(f);
return 0;
@@ -1913,7 +2052,8 @@ unsigned long get_wchan(struct task_struct *p)
do {
sp = *(unsigned long *)sp;
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
+ p->state == TASK_RUNNING)
return 0;
if (count > 0) {
ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
@@ -1994,11 +2134,25 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
void notrace __ppc64_runlatch_on(void)
{
struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl |= CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ /*
+ * Least significant bit (RUN) is the only writable bit of
+ * the CTRL register, so we can avoid mfspr. 2.06 is not the
+ * earliest ISA where this is the case, but it's convenient.
+ */
+ mtspr(SPRN_CTRLT, CTRL_RUNLATCH);
+ } else {
+ unsigned long ctrl;
+
+ /*
+ * Some architectures (e.g., Cell) have writable fields other
+ * than RUN, so do the read-modify-write.
+ */
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl |= CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+ }
ti->local_flags |= _TLF_RUNLATCH;
}
@@ -2007,13 +2161,18 @@ void notrace __ppc64_runlatch_on(void)
void notrace __ppc64_runlatch_off(void)
{
struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
ti->local_flags &= ~_TLF_RUNLATCH;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl &= ~CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ mtspr(SPRN_CTRLT, 0);
+ } else {
+ unsigned long ctrl;
+
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl &= ~CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+ }
}
#endif /* CONFIG_PPC64 */
@@ -2042,7 +2201,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
unsigned long base = mm->brk;
unsigned long ret;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* If we are using 1TB segments and we are allowed to randomise
* the heap, we can put it above 1TB so it is backed by a 1TB
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f83056297441..b15bae265c90 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -47,6 +47,7 @@
#include <asm/mmu.h>
#include <asm/paca.h>
#include <asm/pgtable.h>
+#include <asm/powernv.h>
#include <asm/iommu.h>
#include <asm/btext.h>
#include <asm/sections.h>
@@ -228,7 +229,7 @@ static void __init check_cpu_pa_features(unsigned long node)
ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
static void __init init_mmu_slb_size(unsigned long node)
{
const __be32 *slb_size_ptr;
@@ -658,6 +659,38 @@ static void __init early_reserve_mem(void)
#endif
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static bool tm_disabled __initdata;
+
+static int __init parse_ppc_tm(char *str)
+{
+ bool res;
+
+ if (kstrtobool(str, &res))
+ return -EINVAL;
+
+ tm_disabled = !res;
+
+ return 0;
+}
+early_param("ppc_tm", parse_ppc_tm);
+
+static void __init tm_init(void)
+{
+ if (tm_disabled) {
+ pr_info("Disabling hardware transactional memory (HTM)\n");
+ cur_cpu_spec->cpu_user_features2 &=
+ ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
+ return;
+ }
+
+ pnv_tm_init();
+}
+#else
+static void tm_init(void) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
void __init early_init_devtree(void *params)
{
phys_addr_t limit;
@@ -767,6 +800,8 @@ void __init early_init_devtree(void *params)
powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
#endif
+ tm_init();
+
DBG(" <- early_init_devtree()\n");
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 613f79f03877..02190e90c7ae 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -177,6 +177,7 @@ struct platform_support {
bool hash_mmu;
bool radix_mmu;
bool radix_gtse;
+ bool xive;
};
/* Platforms codes are now obsolete in the kernel. Now only used within this
@@ -1041,6 +1042,27 @@ static void __init prom_parse_mmu_model(u8 val,
}
}
+static void __init prom_parse_xive_model(u8 val,
+ struct platform_support *support)
+{
+ switch (val) {
+ case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
+ prom_debug("XIVE - either mode supported\n");
+ support->xive = true;
+ break;
+ case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
+ prom_debug("XIVE - exploitation mode supported\n");
+ support->xive = true;
+ break;
+ case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
+ prom_debug("XIVE - legacy mode supported\n");
+ break;
+ default:
+ prom_debug("Unknown xive support option: 0x%x\n", val);
+ break;
+ }
+}
+
static void __init prom_parse_platform_support(u8 index, u8 val,
struct platform_support *support)
{
@@ -1054,6 +1076,10 @@ static void __init prom_parse_platform_support(u8 index, u8 val,
support->radix_gtse = true;
}
break;
+ case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
+ prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
+ support);
+ break;
}
}
@@ -1062,7 +1088,8 @@ static void __init prom_check_platform_support(void)
struct platform_support supported = {
.hash_mmu = false,
.radix_mmu = false,
- .radix_gtse = false
+ .radix_gtse = false,
+ .xive = false
};
int prop_len = prom_getproplen(prom.chosen,
"ibm,arch-vec-5-platform-support");
@@ -1095,6 +1122,11 @@ static void __init prom_check_platform_support(void)
/* We're probably on a legacy hypervisor */
prom_debug("Assuming legacy hash support\n");
}
+
+ if (supported.xive) {
+ prom_debug("Asking for XIVE\n");
+ ibm_architecture_vec.vec5.intarch = OV5_FEAT(OV5_XIVE_EXPLOIT);
+ }
}
static void __init prom_send_capabilities(void)
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 6295e646f78c..9cb7f88df563 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#undef DEBUG
#include <linux/kernel.h>
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 925a4ef90559..f52ad5bb7109 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
* If task is not current, it will have been flushed already to
* it's thread_struct during __switch_to().
*
- * A reclaim flushes ALL the state.
+ * A reclaim flushes ALL the state or if not in TM save TM SPRs
+ * in the appropriate thread structures from live.
*/
- if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
- tm_reclaim_current(TM_CAUSE_SIGNAL);
+ if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
+ return;
+ if (MSR_TM_SUSPENDED(mfmsr())) {
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+ } else {
+ tm_enable();
+ tm_save_sprs(&(tsk->thread));
+ }
}
#else
static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
@@ -1587,11 +1594,8 @@ static int ppr_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ppr, 0, sizeof(u64));
}
static int ppr_set(struct task_struct *target,
@@ -1599,11 +1603,8 @@ static int ppr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ppr, 0, sizeof(u64));
}
static int dscr_get(struct task_struct *target,
@@ -1611,22 +1612,16 @@ static int dscr_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
}
static int dscr_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
}
#endif
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1635,22 +1630,16 @@ static int tar_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
}
static int tar_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
}
static int ebb_active(struct task_struct *target,
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index d88736fbece6..e8cfc69f59ae 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -82,7 +82,7 @@ _GLOBAL(relocate)
6: blr
.balign 8
-p_dyn: .llong __dynamic_start - 0b
-p_rela: .llong __rela_dyn_start - 0b
-p_st: .llong _stext - 0b
+p_dyn: .8byte __dynamic_start - 0b
+p_rela: .8byte __rela_dyn_start - 0b
+p_st: .8byte _stext - 0b
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index df56dfc4b681..c8c5f3a550c2 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2000 Tilmann Bitterberg
* (tilmann@bitterberg.de)
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
index c57c19358a26..49600985c7ef 100644
--- a/arch/powerpc/kernel/rtas-rtc.c
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/time.h>
#include <linux/timer.h>
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b8a4987f58cf..3f1c4fcbe0aa 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -78,7 +78,7 @@ static unsigned long lock_rtas(void)
local_irq_save(flags);
preempt_disable();
- arch_spin_lock_flags(&rtas.lock, flags);
+ arch_spin_lock(&rtas.lock);
return flags;
}
@@ -914,7 +914,7 @@ int rtas_online_cpus_mask(cpumask_var_t cpus)
if (ret) {
cpumask_var_t tmp_mask;
- if (!alloc_cpumask_var(&tmp_mask, GFP_TEMPORARY))
+ if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL))
return ret;
/* Use tmp_mask to preserve cpus mask from first failure */
@@ -962,7 +962,7 @@ int rtas_ibm_suspend_me(u64 handle)
return -EIO;
}
- if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
+ if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL))
return -ENOMEM;
atomic_set(&data.working, 0);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 73f1934582c2..c2b148b1634a 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -91,26 +91,14 @@ static int rtas_pci_read_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 *val)
{
- struct device_node *busdn, *dn;
struct pci_dn *pdn;
- bool found = false;
int ret;
- /* Search only direct children of the bus */
*val = 0xFFFFFFFF;
- busdn = pci_bus_to_OF_node(bus);
- for (dn = busdn->child; dn; dn = dn->sibling) {
- pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn)) {
- found = true;
- break;
- }
- }
- if (!found)
- return PCIBIOS_DEVICE_NOT_FOUND;
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
+ /* Validity of pdn is checked in here */
ret = rtas_read_config(pdn, where, size, val);
if (*val == EEH_IO_ERROR_VALUE(size) &&
eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
@@ -153,24 +141,11 @@ static int rtas_pci_write_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 val)
{
- struct device_node *busdn, *dn;
struct pci_dn *pdn;
- bool found = false;
-
- /* Search only direct children of the bus */
- busdn = pci_bus_to_OF_node(bus);
- for (dn = busdn->child; dn; dn = dn->sibling) {
- pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn)) {
- found = true;
- break;
- }
- }
- if (!found)
- return PCIBIOS_DEVICE_NOT_FOUND;
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
+ /* Validity of pdn is checked in here. */
return rtas_write_config(pdn, where, size, val);
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 94a948207cd2..2075322cd225 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -481,7 +481,7 @@ void __init smp_setup_cpu_maps(void)
__be32 cpu_be;
int j, len;
- DBG(" * %s...\n", dn->full_name);
+ DBG(" * %pOF...\n", dn);
intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
&len);
@@ -551,7 +551,7 @@ void __init smp_setup_cpu_maps(void)
if (maxcpus > nr_cpu_ids) {
printk(KERN_WARNING
"Partition configured for %d cpus, "
- "operating system maximum is %d.\n",
+ "operating system maximum is %u.\n",
maxcpus, nr_cpu_ids);
maxcpus = nr_cpu_ids;
} else
@@ -704,30 +704,6 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-static int ppc_panic_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- /*
- * If firmware-assisted dump has been registered then trigger
- * firmware-assisted dump and let firmware handle everything else.
- */
- crash_fadump(NULL, ptr);
- ppc_md.panic(ptr); /* May not return */
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ppc_panic_block = {
- .notifier_call = ppc_panic_event,
- .priority = INT_MIN /* may not return; must be done last */
-};
-
-void __init setup_panic(void)
-{
- if (!ppc_md.panic)
- return;
- atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
-}
-
#ifdef CONFIG_CHECK_CACHE_COHERENCY
/*
* For platforms that have configurable cache-coherency. This function
@@ -797,7 +773,7 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
#endif
#ifdef CONFIG_PPC_STD_MMU_32
@@ -824,7 +800,7 @@ static __init void print_system_info(void)
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
#endif
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (htab_address)
pr_info("htab_address = 0x%p\n", htab_address);
if (htab_hash_mask)
@@ -872,9 +848,6 @@ void __init setup_arch(char **cmdline_p)
/* Probe the machine type, establish ppc_md. */
probe_machine();
- /* Setup panic notifier if requested by the platform. */
- setup_panic();
-
/*
* Configure ppc_md.power_save (ppc32 only, 64-bit machines do
* it from their respective probe() function.
@@ -916,13 +889,6 @@ void __init setup_arch(char **cmdline_p)
/* Reserve large chunks of memory for use by CMA for KVM. */
kvm_cma_reserve();
- /*
- * Reserve any gigantic pages requested on the command line.
- * memblock needs to have been initialized by the time this is
- * called since this will reserve memory.
- */
- reserve_hugetlb_gpages();
-
klp_init_thread_info(&init_thread_info);
init_mm.start_code = (unsigned long)_stext;
@@ -932,15 +898,13 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC_MM_SLICES
#ifdef CONFIG_PPC64
- init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
+ if (!radix_enabled())
+ init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
#else
#error "context.addr_limit not initialized."
#endif
#endif
-#ifdef CONFIG_PPC_64K_PAGES
- init_mm.context.pte_frag = NULL;
-#endif
#ifdef CONFIG_SPAPR_TCE_IOMMU
mm_iommu_init(&init_mm);
#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index cfba134b3024..21c18071d9d5 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -45,6 +45,12 @@ void emergency_stack_init(void);
static inline void emergency_stack_init(void) { };
#endif
+#ifdef CONFIG_PPC64
+void record_spr_defaults(void);
+#else
+static inline void record_spr_defaults(void) { };
+#endif
+
/*
* Having this in kvm_ppc.h makes include dependencies too
* tricky to solve for setup-common.c so have it here.
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 2f88f6cf1a42..51ebc01fff52 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -98,6 +98,9 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
+ unsigned int *addr = &memset_nocache_branch;
+ unsigned long insn;
+
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
@@ -105,7 +108,9 @@ notrace void __init machine_init(u64 dt_ptr)
udbg_early_init();
patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP);
- patch_instruction(&memset_nocache_branch, PPC_INST_NOP);
+
+ insn = create_cond_branch(addr, branch_target(addr), 0x820000);
+ patch_instruction(addr, insn); /* replace b by bne cr0 */
/* Do some early initialization based on the flat device tree */
early_init_devtree(__va(dt_ptr));
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index af23d4b576ec..8956a9856604 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -69,6 +69,8 @@
#include <asm/opal.h>
#include <asm/cputhreads.h>
+#include "setup.h"
+
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#else
@@ -317,6 +319,13 @@ void __init early_setup(unsigned long dt_ptr)
early_init_mmu();
/*
+ * After firmware and early platform setup code has set things up,
+ * we note the SPR values for configurable control/performance
+ * registers, and use those as initial defaults.
+ */
+ record_spr_defaults();
+
+ /*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
* have IR and DR set and enable AIL if it exists
@@ -360,8 +369,16 @@ void early_setup_secondary(void)
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
static bool use_spinloop(void)
{
- if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
+ /*
+ * See comments in head_64.S -- not all platforms insert
+ * secondaries at __secondary_hold and wait at the spin
+ * loop.
+ */
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ return false;
return true;
+ }
/*
* When book3e boots from kexec, the ePAPR spin table does
@@ -564,6 +581,9 @@ static __init u64 safe_stack_limit(void)
/* Other BookE, we assume the first GB is bolted */
return 1ul << 30;
#else
+ if (early_radix_enabled())
+ return ULONG_MAX;
+
/* BookS, the first segment is bolted */
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
return 1UL << SID_SHIFT_1T;
@@ -578,7 +598,8 @@ void __init irqstack_early_init(void)
/*
* Interrupt stacks must be in the first segment since we
- * cannot afford to take SLB misses on them.
+ * cannot afford to take SLB misses on them. They are not
+ * accessed in realmode.
*/
for_each_possible_cpu(i) {
softirq_ctx[i] = (struct thread_info *)
@@ -649,8 +670,9 @@ void __init emergency_stack_init(void)
* aligned.
*
* Since we use these as temporary stacks during secondary CPU
- * bringup, we need to get at them in real mode. This means they
- * must also be within the RMO region.
+ * bringup, machine check, system reset, and HMI, we need to get
+ * at them in real mode. This means they must also be within the RMO
+ * region.
*
* The IRQ stacks allocated elsewhere in this file are zeroed and
* initialized in kernel/irq.c. These are initialized here in order
@@ -751,3 +773,31 @@ unsigned long memory_block_size_bytes(void)
struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io);
#endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+ return ppc_proc_freq * watchdog_thresh;
+}
+#endif
+
+/*
+ * The perf based hardlockup detector breaks PMU event based branches, so
+ * disable it by default. Book3S has a soft-nmi hardlockup detector based
+ * on the decrementer interrupt, so it does not suffer from this problem.
+ *
+ * It is likely to get false positives in VM guests, so disable it there
+ * by default too.
+ */
+static int __init disable_hardlockup_detector(void)
+{
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+ hardlockup_detector_disable();
+#else
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ hardlockup_detector_disable();
+#endif
+
+ return 0;
+}
+early_initcall(disable_hardlockup_detector);
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index e9436c5e1e09..3d7539b90010 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -103,7 +103,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
static void do_signal(struct task_struct *tsk)
{
sigset_t *oldset = sigmask_to_save();
- struct ksignal ksig;
+ struct ksignal ksig = { .sig = 0 };
int ret;
int is32 = is_32bit_task();
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 97bb1385e771..9ffd73296f64 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -94,40 +94,13 @@
*/
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
{
- compat_sigset_t cset;
-
- switch (_NSIG_WORDS) {
- case 4: cset.sig[6] = set->sig[3] & 0xffffffffull;
- cset.sig[7] = set->sig[3] >> 32;
- case 3: cset.sig[4] = set->sig[2] & 0xffffffffull;
- cset.sig[5] = set->sig[2] >> 32;
- case 2: cset.sig[2] = set->sig[1] & 0xffffffffull;
- cset.sig[3] = set->sig[1] >> 32;
- case 1: cset.sig[0] = set->sig[0] & 0xffffffffull;
- cset.sig[1] = set->sig[0] >> 32;
- }
- return copy_to_user(uset, &cset, sizeof(*uset));
+ return put_compat_sigset(uset, set, sizeof(*uset));
}
static inline int get_sigset_t(sigset_t *set,
const compat_sigset_t __user *uset)
{
- compat_sigset_t s32;
-
- if (copy_from_user(&s32, uset, sizeof(*uset)))
- return -EFAULT;
-
- /*
- * Swap the 2 words of the 64-bit sigset_t (they are stored
- * in the "wrong" endian in 32-bit user storage).
- */
- switch (_NSIG_WORDS) {
- case 4: set->sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
- case 3: set->sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
- case 2: set->sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
- case 1: set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
- }
- return 0;
+ return get_compat_sigset(set, uset);
}
#define to_user_ptr(p) ptr_to_compat(p)
@@ -519,6 +492,8 @@ static int save_tm_user_regs(struct pt_regs *regs,
{
unsigned long msr = regs->msr;
+ WARN_ON(tm_suspend_disabled);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
@@ -769,6 +744,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
int i;
#endif
+ if (tm_suspend_disabled)
+ return 1;
/*
* restore general registers but not including MSR or SOFTE. Also
* take care of keeping r2 (TLS) intact if not a signal.
@@ -876,7 +853,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
/* Make sure the transaction is marked as failed */
current->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&current->thread, msr);
+ tm_recheckpoint(&current->thread);
/* This loads the speculative FP/VEC state, if used */
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
@@ -913,42 +890,40 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
*/
err = __put_user(s->si_signo, &d->si_signo);
err |= __put_user(s->si_errno, &d->si_errno);
- err |= __put_user((short)s->si_code, &d->si_code);
+ err |= __put_user(s->si_code, &d->si_code);
if (s->si_code < 0)
err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad,
SI_PAD_SIZE32);
- else switch(s->si_code >> 16) {
- case __SI_CHLD >> 16:
+ else switch(siginfo_layout(s->si_signo, s->si_code)) {
+ case SIL_CHLD:
err |= __put_user(s->si_pid, &d->si_pid);
err |= __put_user(s->si_uid, &d->si_uid);
err |= __put_user(s->si_utime, &d->si_utime);
err |= __put_user(s->si_stime, &d->si_stime);
err |= __put_user(s->si_status, &d->si_status);
break;
- case __SI_FAULT >> 16:
+ case SIL_FAULT:
err |= __put_user((unsigned int)(unsigned long)s->si_addr,
&d->si_addr);
break;
- case __SI_POLL >> 16:
+ case SIL_POLL:
err |= __put_user(s->si_band, &d->si_band);
err |= __put_user(s->si_fd, &d->si_fd);
break;
- case __SI_TIMER >> 16:
+ case SIL_TIMER:
err |= __put_user(s->si_tid, &d->si_tid);
err |= __put_user(s->si_overrun, &d->si_overrun);
err |= __put_user(s->si_int, &d->si_int);
break;
- case __SI_SYS >> 16:
+ case SIL_SYS:
err |= __put_user(ptr_to_compat(s->si_call_addr), &d->si_call_addr);
err |= __put_user(s->si_syscall, &d->si_syscall);
err |= __put_user(s->si_arch, &d->si_arch);
break;
- case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
- case __SI_MESGQ >> 16:
+ case SIL_RT:
err |= __put_user(s->si_int, &d->si_int);
/* fallthrough */
- case __SI_KILL >> 16:
- default:
+ case SIL_KILL:
err |= __put_user(s->si_pid, &d->si_pid);
err |= __put_user(s->si_uid, &d->si_uid);
break;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index c83c115858c1..4b9ca3570344 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -214,6 +214,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+ WARN_ON(tm_suspend_disabled);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
@@ -430,6 +432,9 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
BUG_ON(tsk != current);
+ if (tm_suspend_disabled)
+ return -EINVAL;
+
/* copy the GPRs */
err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr));
err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs,
@@ -452,9 +457,20 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
if (MSR_TM_RESV(msr))
return -EINVAL;
- /* pull in MSR TM from user context */
+ /* pull in MSR TS bits from user context */
regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+ /*
+ * Ensure that TM is enabled in regs->msr before we leave the signal
+ * handler. It could be the case that (a) user disabled the TM bit
+ * through the manipulation of the MSR bits in uc_mcontext or (b) the
+ * TM bit was disabled because a sufficient number of context switches
+ * happened whilst in the signal handler and load_tm overflowed,
+ * disabling the TM bit. In either case we can end up with an illegal
+ * TM state leading to a TM Bad Thing when we return to userspace.
+ */
+ regs->msr |= MSR_TM;
+
/* pull in MSR LE from user context */
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
@@ -547,7 +563,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
/* Make sure the transaction is marked as failed */
tsk->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&tsk->thread, msr);
+ tm_recheckpoint(&tsk->thread);
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c
index 7a37ecd3afa3..21c39355b25e 100644
--- a/arch/powerpc/kernel/smp-tbsync.c
+++ b/arch/powerpc/kernel/smp-tbsync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Smp timebase synchronization for ppc.
*
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 997c88d54acf..e0a4c1f82e25 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -75,9 +75,11 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
struct thread_info *secondary_ti;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
/* SMP operations for this machine */
@@ -351,7 +353,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
hard_irq_disable();
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
- cpu_relax();
+ spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
@@ -360,7 +362,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
static void nmi_ipi_lock(void)
{
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
- cpu_relax();
+ spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
}
static void nmi_ipi_unlock(void)
@@ -475,7 +477,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
nmi_ipi_lock_start(&flags);
while (nmi_ipi_busy_count) {
nmi_ipi_unlock_end(&flags);
- cpu_relax();
+ spin_until_cond(nmi_ipi_busy_count == 0);
nmi_ipi_lock_start(&flags);
}
@@ -571,6 +573,26 @@ static void smp_store_cpu_info(int id)
#endif
}
+/*
+ * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
+ * rather than just passing around the cpumask we pass around a function that
+ * returns the that cpumask for the given CPU.
+ */
+static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int))
+{
+ cpumask_set_cpu(i, get_cpumask(j));
+ cpumask_set_cpu(j, get_cpumask(i));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void set_cpus_unrelated(int i, int j,
+ struct cpumask *(*get_cpumask)(int))
+{
+ cpumask_clear_cpu(i, get_cpumask(j));
+ cpumask_clear_cpu(j, get_cpumask(i));
+}
+#endif
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
@@ -590,6 +612,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
/*
@@ -602,7 +626,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
+ /* Init the cpumasks so the boot CPU is related to itself */
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+ cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
if (smp_ops && smp_ops->probe)
@@ -828,33 +854,6 @@ int cpu_first_thread_of_core(int core)
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
-static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
-{
- const struct cpumask *mask;
- struct device_node *np;
- int i, plen;
- const __be32 *prop;
-
- mask = add ? cpu_online_mask : cpu_present_mask;
- for_each_cpu(i, mask) {
- np = of_get_cpu_node(i, NULL);
- if (!np)
- continue;
- prop = of_get_property(np, "ibm,chip-id", &plen);
- if (prop && plen == sizeof(int) &&
- of_read_number(prop, 1) == chipid) {
- if (add) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- } else {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
- }
- of_node_put(np);
- }
-}
-
/* Must be called when no change can occur to cpu_present_mask,
* i.e. during cpu online or offline.
*/
@@ -877,52 +876,93 @@ static struct device_node *cpu_to_l2cache(int cpu)
return cache;
}
-static void traverse_core_siblings(int cpu, bool add)
+static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
{
struct device_node *l2_cache, *np;
- const struct cpumask *mask;
- int i, chip, plen;
- const __be32 *prop;
-
- /* First see if we have ibm,chip-id properties in cpu nodes */
- np = of_get_cpu_node(cpu, NULL);
- if (np) {
- chip = -1;
- prop = of_get_property(np, "ibm,chip-id", &plen);
- if (prop && plen == sizeof(int))
- chip = of_read_number(prop, 1);
- of_node_put(np);
- if (chip >= 0) {
- traverse_siblings_chip_id(cpu, add, chip);
- return;
- }
- }
+ int i;
l2_cache = cpu_to_l2cache(cpu);
- mask = add ? cpu_online_mask : cpu_present_mask;
- for_each_cpu(i, mask) {
+ if (!l2_cache)
+ return false;
+
+ for_each_cpu(i, cpu_online_mask) {
+ /*
+ * when updating the marks the current CPU has not been marked
+ * online, but we need to update the cache masks
+ */
np = cpu_to_l2cache(i);
if (!np)
continue;
- if (np == l2_cache) {
- if (add) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- } else {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
- }
+
+ if (np == l2_cache)
+ set_cpus_related(cpu, i, mask_fn);
+
of_node_put(np);
}
of_node_put(l2_cache);
+
+ return true;
}
+#ifdef CONFIG_HOTPLUG_CPU
+static void remove_cpu_from_masks(int cpu)
+{
+ int i;
+
+ /* NB: cpu_core_mask is a superset of the others */
+ for_each_cpu(i, cpu_core_mask(cpu)) {
+ set_cpus_unrelated(cpu, i, cpu_core_mask);
+ set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
+ set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ }
+}
+#endif
+
+static void add_cpu_to_masks(int cpu)
+{
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int chipid = cpu_to_chip_id(cpu);
+ int i;
+
+ /*
+ * This CPU will not be in the online mask yet so we need to manually
+ * add it to it's own thread sibling mask.
+ */
+ cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++)
+ if (cpu_online(i))
+ set_cpus_related(i, cpu, cpu_sibling_mask);
+
+ /*
+ * Copy the thread sibling mask into the cache sibling mask
+ * and mark any CPUs that share an L2 with this CPU.
+ */
+ for_each_cpu(i, cpu_sibling_mask(cpu))
+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
+ update_mask_by_l2(cpu, cpu_l2_cache_mask);
+
+ /*
+ * Copy the cache sibling mask into core sibling mask and mark
+ * any CPUs on the same chip as this CPU.
+ */
+ for_each_cpu(i, cpu_l2_cache_mask(cpu))
+ set_cpus_related(cpu, i, cpu_core_mask);
+
+ if (chipid == -1)
+ return;
+
+ for_each_cpu(i, cpu_online_mask)
+ if (cpu_to_chip_id(i) == chipid)
+ set_cpus_related(cpu, i, cpu_core_mask);
+}
+
+static bool shared_caches;
+
/* Activate a secondary processor. */
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
- int i, base;
mmgrab(&init_mm);
current->active_mm = &init_mm;
@@ -945,22 +985,15 @@ void start_secondary(void *unused)
vdso_getcpu_init();
#endif
- /* Update sibling maps */
- base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core; i++) {
- if (cpu_is_offline(base + i) && (cpu != base + i))
- continue;
- cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
- cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
+ /* Update topology CPU masks */
+ add_cpu_to_masks(cpu);
- /* cpu_core_map should be a superset of
- * cpu_sibling_map even if we don't have cache
- * information, so update the former here, too.
- */
- cpumask_set_cpu(cpu, cpu_core_mask(base + i));
- cpumask_set_cpu(base + i, cpu_core_mask(cpu));
- }
- traverse_core_siblings(cpu, true);
+ /*
+ * Check for any shared caches. Note that this must be done on a
+ * per-core basis because one core in the pair might be disabled.
+ */
+ if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+ shared_caches = true;
set_numa_node(numa_cpu_lookup_table[cpu]);
set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
@@ -1003,35 +1036,65 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ NULL, },
};
-static __init long smp_setup_cpu_workfn(void *data __always_unused)
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
{
- smp_ops->setup_cpu(boot_cpuid);
- return 0;
+ return SD_SHARE_PKG_RESOURCES;
}
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+ return cpu_l2_cache_mask(cpu);
+}
+
+static struct sched_domain_topology_level power9_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+ { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
void __init smp_cpus_done(unsigned int max_cpus)
{
/*
- * We want the setup_cpu() here to be called on the boot CPU, but
- * init might run on any CPU, so make sure it's invoked on the boot
- * CPU.
+ * We are running pinned to the boot CPU, see rest_init().
*/
if (smp_ops && smp_ops->setup_cpu)
- work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL);
+ smp_ops->setup_cpu(boot_cpuid);
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
dump_numa_cpu_topology();
- set_sched_topology(powerpc_topology);
+ /*
+ * If any CPU detects that it's sharing a cache with another CPU then
+ * use the deeper topology that is aware of this sharing.
+ */
+ if (shared_caches) {
+ pr_info("Using shared cache scheduler topology\n");
+ set_sched_topology(power9_topology);
+ } else {
+ pr_info("Using standard scheduler topology\n");
+ set_sched_topology(powerpc_topology);
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
int cpu = smp_processor_id();
- int base, i;
int err;
if (!smp_ops->cpu_disable)
@@ -1042,14 +1105,7 @@ int __cpu_disable(void)
return err;
/* Update sibling maps */
- base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
- cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
- cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
- cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
- cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
- }
- traverse_core_siblings(cpu, false);
+ remove_cpu_from_masks(cpu);
return 0;
}
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index ba4dee3d233f..34b73a262709 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/threads.h>
#include <asm/processor.h>
#include <asm/page.h>
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 988f38dced0f..82d8aae81c6a 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -179,7 +179,7 @@ nothing_to_copy:
sld r3, r3, r0
li r0, 0
1:
- dcbf r0,r3
+ dcbf 0,r3
addi r3,r3,0x20
bdnz 1b
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S
index 553c1405ee05..88cfdbd530f1 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_booke.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Based on swsusp_32.S, modified for FSL BookE by
* Anton Vorontsov <avorontsov@ru.mvista.com>
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70c7c2b..b8d4a1dac39f 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -590,6 +590,17 @@ static void sysfs_create_dscr_default(void)
if (cpu_has_feature(CPU_FTR_DSCR))
err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
}
+
+void __init record_spr_defaults(void)
+{
+ int cpu;
+
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ dscr_default = mfspr(SPRN_DSCR);
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ paca[cpu].dscr_default = dscr_default;
+ }
+}
#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 4d6b1d3a747f..7ccb7f81f8db 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -17,13 +17,13 @@
#include <asm/ppc_asm.h>
#ifdef CONFIG_PPC64
-#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func)
-#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
-#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func)
-#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
-#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
-#define PPC64ONLY(func) .llong DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
-#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264)
+#define SYSCALL(func) .8byte DOTSYM(sys_##func),DOTSYM(sys_##func)
+#define COMPAT_SYS(func) .8byte DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
+#define PPC_SYS(func) .8byte DOTSYM(ppc_##func),DOTSYM(ppc_##func)
+#define OLDSYS(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
+#define SYS32ONLY(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
+#define PPC64ONLY(func) .8byte DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
+#define SYSX(f, f3264, f32) .8byte DOTSYM(f),DOTSYM(f3264)
#else
#define SYSCALL(func) .long sys_##func
#define COMPAT_SYS(func) .long sys_##func
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index a753b72efbc0..8cdd852aedd1 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* temp.c Thermal management for cpu's with Thermal Assist Units
*
@@ -187,7 +188,7 @@ static void tau_timeout(void * info)
local_irq_restore(flags);
}
-static void tau_timeout_smp(unsigned long unused)
+static void tau_timeout_smp(struct timer_list *unused)
{
/* schedule ourselves to be run again */
@@ -229,8 +230,7 @@ int __init TAU_init(void)
/* first, set up the window shrinking timer */
- init_timer(&tau_timer);
- tau_timer.function = tau_timeout_smp;
+ timer_setup(&tau_timer, tau_timeout_smp, 0);
tau_timer.expires = jiffies + shrink_timer;
add_timer(&tau_timer);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index c4ba37822ba0..b92ac8e711db 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Transactional memory support routines to reclaim and recheckpoint
* transactional process state.
@@ -79,15 +80,12 @@ _GLOBAL(tm_abort)
blr
/* void tm_reclaim(struct thread_struct *thread,
- * unsigned long orig_msr,
* uint8_t cause)
*
* - Performs a full reclaim. This destroys outstanding
* transactions and updates thread->regs.tm_ckpt_* with the
* original checkpointed state. Note that thread->regs is
* unchanged.
- * - FP regs are written back to thread->transact_fpr before
- * reclaiming. These are the transactional (current) versions.
*
* Purpose is to both abort transactions of, and preserve the state of,
* a transactions at a context switch. We preserve/restore both sets of process
@@ -98,9 +96,9 @@ _GLOBAL(tm_abort)
* Call with IRQs off, stacks get all out of sync for some periods in here!
*/
_GLOBAL(tm_reclaim)
- mfcr r6
+ mfcr r5
mflr r0
- stw r6, 8(r1)
+ stw r5, 8(r1)
std r0, 16(r1)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
@@ -108,7 +106,6 @@ _GLOBAL(tm_reclaim)
/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
std r3, STK_PARAM(R3)(r1)
- std r4, STK_PARAM(R4)(r1)
SAVE_NVGPRS(r1)
/* We need to setup MSR for VSX register save instructions. */
@@ -138,8 +135,8 @@ _GLOBAL(tm_reclaim)
std r1, PACAR1(r13)
/* Clear MSR RI since we are about to change r1, EE is already off. */
- li r4, 0
- mtmsrd r4, 1
+ li r5, 0
+ mtmsrd r5, 1
/*
* BE CAREFUL HERE:
@@ -151,7 +148,7 @@ _GLOBAL(tm_reclaim)
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
*/
- TRECLAIM(R5) /* Cause in r5 */
+ TRECLAIM(R4) /* Cause in r4 */
/* ******************** GPRs ******************** */
/* Stash the checkpointed r13 away in the scratch SPR and get the real
@@ -242,40 +239,30 @@ _GLOBAL(tm_reclaim)
/* ******************** FPR/VR/VSRs ************
- * After reclaiming, capture the checkpointed FPRs/VRs /if used/.
- *
- * (If VSX used, FP and VMX are implied. Or, we don't need to look
- * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.)
- *
- * We're passed the thread's MSR as the second parameter
+ * After reclaiming, capture the checkpointed FPRs/VRs.
*
* We enabled VEC/FP/VSX in the msr above, so we can execute these
* instructions!
*/
- ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */
mr r3, r12
- andis. r0, r4, MSR_VEC@h
- beq dont_backup_vec
+ /* Altivec (VEC/VMX/VR)*/
addi r7, r3, THREAD_CKVRSTATE
SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
mfvscr v0
li r6, VRSTATE_VSCR
stvx v0, r7, r6
-dont_backup_vec:
+
+ /* VRSAVE */
mfspr r0, SPRN_VRSAVE
std r0, THREAD_CKVRSAVE(r3)
- andi. r0, r4, MSR_FP
- beq dont_backup_fp
-
+ /* Floating Point (FP) */
addi r7, r3, THREAD_CKFPSTATE
SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
-
mffs fr0
stfd fr0,FPSTATE_FPSCR(r7)
-dont_backup_fp:
/* TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
@@ -343,22 +330,19 @@ _GLOBAL(__tm_recheckpoint)
*/
subi r7, r7, STACK_FRAME_OVERHEAD
+ /* We need to setup MSR for FP/VMX/VSX register save instructions. */
mfmsr r6
- /* R4 = original MSR to indicate whether thread used FP/Vector etc. */
-
- /* Enable FP/vec in MSR if necessary! */
- lis r5, MSR_VEC@h
+ mr r5, r6
ori r5, r5, MSR_FP
- and. r5, r4, r5
- beq restore_gprs /* if neither, skip both */
-
+#ifdef CONFIG_ALTIVEC
+ oris r5, r5, MSR_VEC@h
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
- oris r5, r5, MSR_VSX@h
+ oris r5,r5, MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */
- mtmsr r5
+ mtmsrd r5
#ifdef CONFIG_ALTIVEC
/*
@@ -367,28 +351,20 @@ _GLOBAL(__tm_recheckpoint)
* thread.fp_state[] version holds the 'live' (transactional)
* and will be loaded subsequently by any FPUnavailable trap.
*/
- andis. r0, r4, MSR_VEC@h
- beq dont_restore_vec
-
addi r8, r3, THREAD_CKVRSTATE
li r5, VRSTATE_VSCR
lvx v0, r8, r5
mtvscr v0
REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */
-dont_restore_vec:
ld r5, THREAD_CKVRSAVE(r3)
mtspr SPRN_VRSAVE, r5
#endif
- andi. r0, r4, MSR_FP
- beq dont_restore_fp
-
addi r8, r3, THREAD_CKFPSTATE
lfd fr0, FPSTATE_FPSCR(r8)
MTFSF_L(fr0)
REST_32FPRS_VSRS(0, R4, R8)
-dont_restore_fp:
mtmsr r6 /* FP/Vec off again! */
restore_gprs:
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index 729dffc5f7bc..d22d8bafb643 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the powerpc trace subsystem
#
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 32509de6ce4c..4741fe112f05 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Code for replacing ftrace calls with jumps.
*
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index c98e90b4ea7b..3f3e81852422 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -110,9 +110,9 @@ ftrace_call:
/* NIP has not been altered, skip over further checks */
beq 1f
- /* Check if there is an active kprobe on us */
+ /* Check if there is an active jprobe on us */
subi r3, r14, 4
- bl is_current_kprobe_addr
+ bl __is_active_jprobe
nop
/*
@@ -181,34 +181,25 @@ _GLOBAL(ftrace_stub)
* - we have no stack frame and can not allocate one
* - LR points back to the original caller (in A)
* - CTR holds the new NIP in C
- * - r0 & r12 are free
- *
- * r0 can't be used as the base register for a DS-form load or store, so
- * we temporarily shuffle r1 (stack pointer) into r0 and then put it back.
+ * - r0, r11 & r12 are free
*/
livepatch_handler:
CURRENT_THREAD_INFO(r12, r1)
- /* Save stack pointer into r0 */
- mr r0, r1
-
/* Allocate 3 x 8 bytes */
- ld r1, TI_livepatch_sp(r12)
- addi r1, r1, 24
- std r1, TI_livepatch_sp(r12)
+ ld r11, TI_livepatch_sp(r12)
+ addi r11, r11, 24
+ std r11, TI_livepatch_sp(r12)
/* Save toc & real LR on livepatch stack */
- std r2, -24(r1)
+ std r2, -24(r11)
mflr r12
- std r12, -16(r1)
+ std r12, -16(r11)
/* Store stack end marker */
lis r12, STACK_END_MAGIC@h
ori r12, r12, STACK_END_MAGIC@l
- std r12, -8(r1)
-
- /* Restore real stack pointer */
- mr r1, r0
+ std r12, -8(r11)
/* Put ctr in r12 for global entry and branch there */
mfctr r12
@@ -216,36 +207,30 @@ livepatch_handler:
/*
* Now we are returning from the patched function to the original
- * caller A. We are free to use r0 and r12, and we can use r2 until we
+ * caller A. We are free to use r11, r12 and we can use r2 until we
* restore it.
*/
CURRENT_THREAD_INFO(r12, r1)
- /* Save stack pointer into r0 */
- mr r0, r1
-
- ld r1, TI_livepatch_sp(r12)
+ ld r11, TI_livepatch_sp(r12)
/* Check stack marker hasn't been trashed */
lis r2, STACK_END_MAGIC@h
ori r2, r2, STACK_END_MAGIC@l
- ld r12, -8(r1)
+ ld r12, -8(r11)
1: tdne r12, r2
EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
/* Restore LR & toc from livepatch stack */
- ld r12, -16(r1)
+ ld r12, -16(r11)
mtlr r12
- ld r2, -24(r1)
+ ld r2, -24(r11)
/* Pop livepatch stack frame */
- CURRENT_THREAD_INFO(r12, r0)
- subi r1, r1, 24
- std r1, TI_livepatch_sp(r12)
-
- /* Restore real stack pointer */
- mr r1, r0
+ CURRENT_THREAD_INFO(r12, r1)
+ subi r11, r11, 24
+ std r11, TI_livepatch_sp(r12)
/* Return to original caller of live patched function */
blr
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bfcfd9ef09f2..f3eb61be0d30 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -37,6 +37,7 @@
#include <linux/kdebug.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
+#include <linux/smp.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
@@ -114,6 +115,28 @@ static void pmac_backlight_unblank(void)
static inline void pmac_backlight_unblank(void) { }
#endif
+/*
+ * If oops/die is expected to crash the machine, return true here.
+ *
+ * This should not be expected to be 100% accurate, there may be
+ * notifiers registered or other unexpected conditions that may bring
+ * down the kernel. Or if the current process in the kernel is holding
+ * locks or has other critical state, the kernel may become effectively
+ * unusable anyway.
+ */
+bool die_will_crash(void)
+{
+ if (should_fadump_crash())
+ return true;
+ if (kexec_should_crash(current))
+ return true;
+ if (in_interrupt() || panic_on_oops ||
+ !current->pid || is_global_init(current))
+ return true;
+
+ return false;
+}
+
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
@@ -162,21 +185,9 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
crash_fadump(regs, "die oops");
- /*
- * A system reset (0x100) is a request to dump, so we always send
- * it through the crashdump code.
- */
- if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) {
+ if (kexec_should_crash(current))
crash_kexec(regs);
- /*
- * We aren't the primary crash CPU. We need to send it
- * to a holding pattern to avoid it ending up in the panic
- * code.
- */
- crash_kexec_secondary(regs);
- }
-
if (!signr)
return;
@@ -202,18 +213,25 @@ NOKPROBE_SYMBOL(oops_end);
static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP NR_CPUS=%d ", NR_CPUS);
-#endif
+
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ printk("LE ");
+ else
+ printk("BE ");
+
+ if (IS_ENABLED(CONFIG_PREEMPT))
+ pr_cont("PREEMPT ");
+
+ if (IS_ENABLED(CONFIG_SMP))
+ pr_cont("SMP NR_CPUS=%d ", NR_CPUS);
+
if (debug_pagealloc_enabled())
- printk("DEBUG_PAGEALLOC ");
-#ifdef CONFIG_NUMA
- printk("NUMA ");
-#endif
- printk("%s\n", ppc_md.name ? ppc_md.name : "");
+ pr_cont("DEBUG_PAGEALLOC ");
+
+ if (IS_ENABLED(CONFIG_NUMA))
+ pr_cont("NUMA ");
+
+ pr_cont("%s\n", ppc_md.name ? ppc_md.name : "");
if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
return 1;
@@ -288,23 +306,52 @@ void system_reset_exception(struct pt_regs *regs)
if (!nested)
nmi_enter();
+ __this_cpu_inc(irq_stat.sreset_irqs);
+
/* See if any machine dependent calls */
if (ppc_md.system_reset_exception) {
if (ppc_md.system_reset_exception(regs))
goto out;
}
- die("System Reset", regs, SIGABRT);
+ if (debugger(regs))
+ goto out;
+
+ /*
+ * A system reset is a request to dump, so we always send
+ * it through the crashdump code (if fadump or kdump are
+ * registered).
+ */
+ crash_fadump(regs, "System Reset");
+
+ crash_kexec(regs);
+
+ /*
+ * We aren't the primary crash CPU. We need to send it
+ * to a holding pattern to avoid it ending up in the panic
+ * code.
+ */
+ crash_kexec_secondary(regs);
+
+ /*
+ * No debugger or crash dump registered, print logs then
+ * panic.
+ */
+ __die("System Reset", regs, SIGABRT);
+
+ mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ nmi_panic(regs, "System Reset");
out:
#ifdef CONFIG_PPC_BOOK3S_64
BUG_ON(get_paca()->in_nmi == 0);
if (get_paca()->in_nmi > 1)
- panic("Unrecoverable nested System Reset");
+ nmi_panic(regs, "Unrecoverable nested System Reset");
#endif
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
- panic("Unrecoverable System Reset");
+ nmi_panic(regs, "Unrecoverable System Reset");
if (!nested)
nmi_exit();
@@ -312,39 +359,6 @@ out:
/* What should we do here? We could issue a shutdown or hard reset. */
}
-#ifdef CONFIG_PPC64
-/*
- * This function is called in real mode. Strictly no printk's please.
- *
- * regs->nip and regs->msr contains srr0 and ssr1.
- */
-long machine_check_early(struct pt_regs *regs)
-{
- long handled = 0;
-
- __this_cpu_inc(irq_stat.mce_exceptions);
-
- if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
- handled = cur_cpu_spec->machine_check_early(regs);
- return handled;
-}
-
-long hmi_exception_realmode(struct pt_regs *regs)
-{
- __this_cpu_inc(irq_stat.hmi_exceptions);
-
- wait_for_subcore_guest_exit();
-
- if (ppc_md.hmi_exception_early)
- ppc_md.hmi_exception_early(regs);
-
- wait_for_tb_resync();
-
- return 0;
-}
-
-#endif
-
/*
* I/O accesses can cause machine checks on powermacs.
* Check if the NIP corresponds to the address of a sync
@@ -397,11 +411,6 @@ static inline int check_io_access(struct pt_regs *regs)
/* On 4xx, the reason for the machine check or program exception
is in the ESR. */
#define get_reason(regs) ((regs)->dsisr)
-#ifndef CONFIG_FSL_BOOKE
-#define get_mc_reason(regs) ((regs)->dsisr)
-#else
-#define get_mc_reason(regs) (mfspr(SPRN_MCSR))
-#endif
#define REASON_FP ESR_FP
#define REASON_ILLEGAL (ESR_PIL | ESR_PUO)
#define REASON_PRIVILEGED ESR_PPR
@@ -415,111 +424,21 @@ static inline int check_io_access(struct pt_regs *regs)
/* On non-4xx, the reason for the machine check or program
exception is in the MSR. */
#define get_reason(regs) ((regs)->msr)
-#define get_mc_reason(regs) ((regs)->msr)
-#define REASON_TM 0x200000
-#define REASON_FP 0x100000
-#define REASON_ILLEGAL 0x80000
-#define REASON_PRIVILEGED 0x40000
-#define REASON_TRAP 0x20000
+#define REASON_TM SRR1_PROGTM
+#define REASON_FP SRR1_PROGFPE
+#define REASON_ILLEGAL SRR1_PROGILL
+#define REASON_PRIVILEGED SRR1_PROGPRIV
+#define REASON_TRAP SRR1_PROGTRAP
#define single_stepping(regs) ((regs)->msr & MSR_SE)
#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
#endif
-#if defined(CONFIG_4xx)
-int machine_check_4xx(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- if (reason & ESR_IMCP) {
- printk("Instruction");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- } else
- printk("Data");
- printk(" machine check in kernel mode.\n");
-
- return 0;
-}
-
-int machine_check_440A(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- printk("Machine check in kernel mode.\n");
- if (reason & ESR_IMCP){
- printk("Instruction Synchronous Machine Check exception\n");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- }
- else {
- u32 mcsr = mfspr(SPRN_MCSR);
- if (mcsr & MCSR_IB)
- printk("Instruction Read PLB Error\n");
- if (mcsr & MCSR_DRB)
- printk("Data Read PLB Error\n");
- if (mcsr & MCSR_DWB)
- printk("Data Write PLB Error\n");
- if (mcsr & MCSR_TLBP)
- printk("TLB Parity Error\n");
- if (mcsr & MCSR_ICP){
- flush_instruction_cache();
- printk("I-Cache Parity Error\n");
- }
- if (mcsr & MCSR_DCSP)
- printk("D-Cache Search Parity Error\n");
- if (mcsr & MCSR_DCFP)
- printk("D-Cache Flush Parity Error\n");
- if (mcsr & MCSR_IMPE)
- printk("Machine Check exception is imprecise\n");
-
- /* Clear MCSR */
- mtspr(SPRN_MCSR, mcsr);
- }
- return 0;
-}
-
-int machine_check_47x(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
- u32 mcsr;
-
- printk(KERN_ERR "Machine check in kernel mode.\n");
- if (reason & ESR_IMCP) {
- printk(KERN_ERR
- "Instruction Synchronous Machine Check exception\n");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- return 0;
- }
- mcsr = mfspr(SPRN_MCSR);
- if (mcsr & MCSR_IB)
- printk(KERN_ERR "Instruction Read PLB Error\n");
- if (mcsr & MCSR_DRB)
- printk(KERN_ERR "Data Read PLB Error\n");
- if (mcsr & MCSR_DWB)
- printk(KERN_ERR "Data Write PLB Error\n");
- if (mcsr & MCSR_TLBP)
- printk(KERN_ERR "TLB Parity Error\n");
- if (mcsr & MCSR_ICP) {
- flush_instruction_cache();
- printk(KERN_ERR "I-Cache Parity Error\n");
- }
- if (mcsr & MCSR_DCSP)
- printk(KERN_ERR "D-Cache Search Parity Error\n");
- if (mcsr & PPC47x_MCSR_GPR)
- printk(KERN_ERR "GPR Parity Error\n");
- if (mcsr & PPC47x_MCSR_FPR)
- printk(KERN_ERR "FPR Parity Error\n");
- if (mcsr & PPC47x_MCSR_IPR)
- printk(KERN_ERR "Machine Check exception is imprecise\n");
-
- /* Clear MCSR */
- mtspr(SPRN_MCSR, mcsr);
-
- return 0;
-}
-#elif defined(CONFIG_E500)
+#if defined(CONFIG_E500)
int machine_check_e500mc(struct pt_regs *regs)
{
unsigned long mcsr = mfspr(SPRN_MCSR);
+ unsigned long pvr = mfspr(SPRN_PVR);
unsigned long reason = mcsr;
int recoverable = 1;
@@ -561,8 +480,15 @@ int machine_check_e500mc(struct pt_regs *regs)
* may still get logged and cause a machine check. We should
* only treat the non-write shadow case as non-recoverable.
*/
- if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
- recoverable = 0;
+ /* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
+ * is not implemented but L1 data cache always runs in write
+ * shadow mode. Hence on data cache parity errors HW will
+ * automatically invalidate the L1 Data Cache.
+ */
+ if (PVR_VER(pvr) != PVR_VER_E6500) {
+ if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
+ recoverable = 0;
+ }
}
if (reason & MCSR_L2MMU_MHIT) {
@@ -618,7 +544,7 @@ silent_out:
int machine_check_e500(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = mfspr(SPRN_MCSR);
if (reason & MCSR_BUS_RBERR) {
if (fsl_rio_mcheck_exception(regs))
@@ -665,7 +591,7 @@ int machine_check_generic(struct pt_regs *regs)
#elif defined(CONFIG_E200)
int machine_check_e200(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = mfspr(SPRN_MCSR);
printk("Machine check in kernel mode.\n");
printk("Caused by (from MCSR=%lx): ", reason);
@@ -687,35 +613,10 @@ int machine_check_e200(struct pt_regs *regs)
return 0;
}
-#elif defined(CONFIG_PPC_8xx)
-int machine_check_8xx(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- pr_err("Machine check in kernel mode.\n");
- pr_err("Caused by (from SRR1=%lx): ", reason);
- if (reason & 0x40000000)
- pr_err("Fetch error at address %lx\n", regs->nip);
- else
- pr_err("Data access error at address %lx\n", regs->dar);
-
-#ifdef CONFIG_PCI
- /* the qspan pci read routines can cause machine checks -- Cort
- *
- * yuck !!! that totally needs to go away ! There are better ways
- * to deal with that than having a wart in the mcheck handler.
- * -- BenH
- */
- bad_page_fault(regs, regs->dar, SIGBUS);
- return 1;
-#else
- return 0;
-#endif
-}
-#else
+#elif defined(CONFIG_PPC32)
int machine_check_generic(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = regs->msr;
printk("Machine check in kernel mode.\n");
printk("Caused by (from SRR1=%lx): ", reason);
@@ -752,10 +653,14 @@ int machine_check_generic(struct pt_regs *regs)
void machine_check_exception(struct pt_regs *regs)
{
- enum ctx_state prev_state = exception_enter();
int recover = 0;
+ bool nested = in_nmi();
+ if (!nested)
+ nmi_enter();
- __this_cpu_inc(irq_stat.mce_exceptions);
+ /* 64s accounts the mce in machine_check_early when in HVMODE */
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
+ __this_cpu_inc(irq_stat.mce_exceptions);
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -783,10 +688,11 @@ void machine_check_exception(struct pt_regs *regs)
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
- panic("Unrecoverable Machine check");
+ nmi_panic(regs, "Unrecoverable Machine check");
bail:
- exception_exit(prev_state);
+ if (!nested)
+ nmi_exit();
}
void SMIException(struct pt_regs *regs)
@@ -794,6 +700,187 @@ void SMIException(struct pt_regs *regs)
die("System Management Interrupt", regs, SIGABRT);
}
+#ifdef CONFIG_VSX
+static void p9_hmi_special_emu(struct pt_regs *regs)
+{
+ unsigned int ra, rb, t, i, sel, instr, rc;
+ const void __user *addr;
+ u8 vbuf[16], *vdst;
+ unsigned long ea, msr, msr_mask;
+ bool swap;
+
+ if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
+ return;
+
+ /*
+ * lxvb16x opcode: 0x7c0006d8
+ * lxvd2x opcode: 0x7c000698
+ * lxvh8x opcode: 0x7c000658
+ * lxvw4x opcode: 0x7c000618
+ */
+ if ((instr & 0xfc00073e) != 0x7c000618) {
+ pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
+ " instr=%08x\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr);
+ return;
+ }
+
+ /* Grab vector registers into the task struct */
+ msr = regs->msr; /* Grab msr before we flush the bits */
+ flush_vsx_to_thread(current);
+ enable_kernel_altivec();
+
+ /*
+ * Is userspace running with a different endian (this is rare but
+ * not impossible)
+ */
+ swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+
+ /* Decode the instruction */
+ ra = (instr >> 16) & 0x1f;
+ rb = (instr >> 11) & 0x1f;
+ t = (instr >> 21) & 0x1f;
+ if (instr & 1)
+ vdst = (u8 *)&current->thread.vr_state.vr[t];
+ else
+ vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
+
+ /* Grab the vector address */
+ ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
+ if (is_32bit_task())
+ ea &= 0xfffffffful;
+ addr = (__force const void __user *)ea;
+
+ /* Check it */
+ if (!access_ok(VERIFY_READ, addr, 16)) {
+ pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, (unsigned long)addr);
+ return;
+ }
+
+ /* Read the vector */
+ rc = 0;
+ if ((unsigned long)addr & 0xfUL)
+ /* unaligned case */
+ rc = __copy_from_user_inatomic(vbuf, addr, 16);
+ else
+ __get_user_atomic_128_aligned(vbuf, addr, rc);
+ if (rc) {
+ pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, (unsigned long)addr);
+ return;
+ }
+
+ pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid, regs->nip,
+ instr, (unsigned long) addr);
+
+ /* Grab instruction "selector" */
+ sel = (instr >> 6) & 3;
+
+ /*
+ * Check to make sure the facility is actually enabled. This
+ * could happen if we get a false positive hit.
+ *
+ * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
+ * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
+ */
+ msr_mask = MSR_VSX;
+ if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
+ msr_mask = MSR_VEC;
+ if (!(msr & msr_mask)) {
+ pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
+ " instr=%08x msr:%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, msr);
+ return;
+ }
+
+ /* Do logging here before we modify sel based on endian */
+ switch (sel) {
+ case 0: /* lxvw4x */
+ PPC_WARN_EMULATED(lxvw4x, regs);
+ break;
+ case 1: /* lxvh8x */
+ PPC_WARN_EMULATED(lxvh8x, regs);
+ break;
+ case 2: /* lxvd2x */
+ PPC_WARN_EMULATED(lxvd2x, regs);
+ break;
+ case 3: /* lxvb16x */
+ PPC_WARN_EMULATED(lxvb16x, regs);
+ break;
+ }
+
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * An LE kernel stores the vector in the task struct as an LE
+ * byte array (effectively swapping both the components and
+ * the content of the components). Those instructions expect
+ * the components to remain in ascending address order, so we
+ * swap them back.
+ *
+ * If we are running a BE user space, the expectation is that
+ * of a simple memcpy, so forcing the emulation to look like
+ * a lxvb16x should do the trick.
+ */
+ if (swap)
+ sel = 3;
+
+ switch (sel) {
+ case 0: /* lxvw4x */
+ for (i = 0; i < 4; i++)
+ ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
+ break;
+ case 1: /* lxvh8x */
+ for (i = 0; i < 8; i++)
+ ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
+ break;
+ case 2: /* lxvd2x */
+ for (i = 0; i < 2; i++)
+ ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
+ break;
+ case 3: /* lxvb16x */
+ for (i = 0; i < 16; i++)
+ vdst[i] = vbuf[15-i];
+ break;
+ }
+#else /* __LITTLE_ENDIAN__ */
+ /* On a big endian kernel, a BE userspace only needs a memcpy */
+ if (!swap)
+ sel = 3;
+
+ /* Otherwise, we need to swap the content of the components */
+ switch (sel) {
+ case 0: /* lxvw4x */
+ for (i = 0; i < 4; i++)
+ ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
+ break;
+ case 1: /* lxvh8x */
+ for (i = 0; i < 8; i++)
+ ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
+ break;
+ case 2: /* lxvd2x */
+ for (i = 0; i < 2; i++)
+ ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
+ break;
+ case 3: /* lxvb16x */
+ memcpy(vdst, vbuf, 16);
+ break;
+ }
+#endif /* !__LITTLE_ENDIAN__ */
+
+ /* Go to next instruction */
+ regs->nip += 4;
+}
+#endif /* CONFIG_VSX */
+
void handle_hmi_exception(struct pt_regs *regs)
{
struct pt_regs *old_regs;
@@ -801,6 +888,21 @@ void handle_hmi_exception(struct pt_regs *regs)
old_regs = set_irq_regs(regs);
irq_enter();
+#ifdef CONFIG_VSX
+ /* Real mode flagged P9 special emu is needed */
+ if (local_paca->hmi_p9_special_emu) {
+ local_paca->hmi_p9_special_emu = 0;
+
+ /*
+ * We don't want to take page faults while doing the
+ * emulation, we just replay the instruction if necessary.
+ */
+ pagefault_disable();
+ p9_hmi_special_emu(regs);
+ pagefault_enable();
+ }
+#endif /* CONFIG_VSX */
+
if (ppc_md.handle_hmi_exception)
ppc_md.handle_hmi_exception(regs);
@@ -1235,13 +1337,8 @@ void program_check_exception(struct pt_regs *regs)
* - A treclaim is attempted when non transactional.
* - A tend is illegally attempted.
* - writing a TM SPR when transactional.
- */
- if (!user_mode(regs) &&
- report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
- regs->nip += 4;
- goto bail;
- }
- /* If usermode caused this, it's done something illegal and
+ *
+ * If usermode caused this, it's done something illegal and
* gets a SIGILL slap on the wrist. We call it an illegal
* operand to distinguish from the instruction just being bad
* (e.g. executing a 'tend' on a CPU without TM!); it's an
@@ -1582,7 +1679,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
/* Reclaim didn't save out any FPRs to transact_fprs. */
/* Enable FP for the task: */
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ current->thread.load_fp = 1;
/* This loads and recheckpoints the FP registers from
* thread.fpr[]. They will remain in registers after the
@@ -1590,15 +1687,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
* If VMX is in use, the VRs now hold checkpointed values,
* so we don't want to load the VRs from the thread_struct.
*/
- tm_recheckpoint(&current->thread, MSR_FP);
-
- /* If VMX is in use, get the transactional values back */
- if (regs->msr & MSR_VEC) {
- msr_check_and_set(MSR_VEC);
- load_vr_state(&current->thread.vr_state);
- /* At this point all the VSX state is loaded, so enable it */
- regs->msr |= MSR_VSX;
- }
+ tm_recheckpoint(&current->thread);
}
void altivec_unavailable_tm(struct pt_regs *regs)
@@ -1611,21 +1700,13 @@ void altivec_unavailable_tm(struct pt_regs *regs)
"MSR=%lx\n",
regs->nip, regs->msr);
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- regs->msr |= MSR_VEC;
- tm_recheckpoint(&current->thread, MSR_VEC);
+ current->thread.load_vec = 1;
+ tm_recheckpoint(&current->thread);
current->thread.used_vr = 1;
-
- if (regs->msr & MSR_FP) {
- msr_check_and_set(MSR_FP);
- load_fp_state(&current->thread.fp_state);
- regs->msr |= MSR_VSX;
- }
}
void vsx_unavailable_tm(struct pt_regs *regs)
{
- unsigned long orig_msr = regs->msr;
-
/* See the comments in fp_unavailable_tm(). This works similarly,
* though we're loading both FP and VEC registers in here.
*
@@ -1639,29 +1720,13 @@ void vsx_unavailable_tm(struct pt_regs *regs)
current->thread.used_vsr = 1;
- /* If FP and VMX are already loaded, we have all the state we need */
- if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) {
- regs->msr |= MSR_VSX;
- return;
- }
-
/* This reclaims FP and/or VR regs if they're already enabled */
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode |
- MSR_VSX;
-
- /* This loads & recheckpoints FP and VRs; but we have
- * to be sure not to overwrite previously-valid state.
- */
- tm_recheckpoint(&current->thread, regs->msr & ~orig_msr);
-
- msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC));
+ current->thread.load_vec = 1;
+ current->thread.load_fp = 1;
- if (orig_msr & MSR_FP)
- load_fp_state(&current->thread.fp_state);
- if (orig_msr & MSR_VEC)
- load_vr_state(&current->thread.vr_state);
+ tm_recheckpoint(&current->thread);
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -1672,24 +1737,6 @@ void performance_monitor_exception(struct pt_regs *regs)
perf_irq(regs);
}
-#ifdef CONFIG_8xx
-void SoftwareEmulation(struct pt_regs *regs)
-{
- CHECK_FULL_REGS(regs);
-
- if (!user_mode(regs)) {
- debugger(regs);
- die("Kernel Mode Unimplemented Instruction or SW FPU Emulation",
- regs, SIGFPE);
- }
-
- if (!emulate_math(regs))
- return;
-
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-}
-#endif /* CONFIG_8xx */
-
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
{
@@ -2037,6 +2084,10 @@ struct ppc_emulated ppc_emulated = {
WARN_EMULATED_SETUP(mfdscr),
WARN_EMULATED_SETUP(mtdscr),
WARN_EMULATED_SETUP(lq_stq),
+ WARN_EMULATED_SETUP(lxvw4x),
+ WARN_EMULATED_SETUP(lxvh8x),
+ WARN_EMULATED_SETUP(lxvd2x),
+ WARN_EMULATED_SETUP(lxvb16x),
#endif
};
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 003b20964ea0..5d105b8eeece 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -205,3 +205,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return orig_ret_vaddr;
}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return regs->gpr[1] <= ret->stack;
+ else
+ return regs->gpr[1] < ret->stack;
+}
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 78a7449bf489..b8c434d1d459 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso, has to be asm only for now
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 6b2b69616e77..769c2624e0a6 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -232,15 +232,9 @@ __do_get_tspec:
lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
/* Get a stable TB value */
-#ifdef CONFIG_8xx
-2: mftbu r3
- mftbl r4
- mftbu r0
-#else
-2: mfspr r3, SPRN_TBRU
- mfspr r4, SPRN_TBRL
- mfspr r0, SPRN_TBRU
-#endif
+2: MFTBU(r3)
+ MFTBL(r4)
+ MFTBU(r0)
cmplw cr0,r3,r0
bne- 2b
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index e58ee10fa5c0..099a6db14e67 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This is the infamous ld script for the 32 bits vdso
* library
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
index 6ac107ac402a..3f5ef035b0a9 100644
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/page.h>
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 31107bf5a61f..69cecb346269 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso, has to be asm only for now
obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 64fb183a47c2..256fb9720298 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This is the infamous ld script for the 64 bits vdso
* library
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
index df60fca6a13d..1d56d81fe3b3 100644
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/page.h>
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index 2d8f6d8ccafc..8812085883fd 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Routines to emulate some Altivec/VMX instructions, specifically
* those that can trap when given denormalized operands in Java mode.
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 0c123f3406cd..f314fd475491 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/reg.h>
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b1a250560198..0494e1566ee2 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifdef CONFIG_PPC64
#define PROVIDE32(x) PROVIDE(__unused__##x)
#else
@@ -8,7 +9,7 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
-#ifdef CONFIG_STRICT_KERNEL_RWX
+#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32)
#define STRICT_ALIGN_SIZE (1 << 24)
#else
#define STRICT_ALIGN_SIZE PAGE_SIZE
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index b67f8b03a32d..87da80ccced1 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Watchdog support on powerpc systems.
*
@@ -71,15 +72,20 @@ static inline void wd_smp_lock(unsigned long *flags)
* This may be called from low level interrupt handlers at some
* point in future.
*/
- local_irq_save(*flags);
- while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
- cpu_relax();
+ raw_local_irq_save(*flags);
+ hard_irq_disable(); /* Make it soft-NMI safe */
+ while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
+ raw_local_irq_restore(*flags);
+ spin_until_cond(!test_bit(0, &__wd_smp_lock));
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
}
static inline void wd_smp_unlock(unsigned long *flags)
{
clear_bit_unlock(0, &__wd_smp_lock);
- local_irq_restore(*flags);
+ raw_local_irq_restore(*flags);
}
static void wd_lockup_ipi(struct pt_regs *regs)
@@ -92,14 +98,13 @@ static void wd_lockup_ipi(struct pt_regs *regs)
else
dump_stack();
- if (hardlockup_panic)
- nmi_panic(regs, "Hard LOCKUP");
+ /* Do not panic from here because that can recurse into NMI IPI layer */
}
-static void set_cpu_stuck(int cpu, u64 tb)
+static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{
- cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
- cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
+ cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending,
@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
&wd_smp_cpus_stuck);
}
}
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+ set_cpumask_stuck(cpumask_of(cpu), tb);
+}
static void watchdog_smp_panic(int cpu, u64 tb)
{
@@ -125,21 +134,22 @@ static void watchdog_smp_panic(int cpu, u64 tb)
pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
cpu, cpumask_pr_args(&wd_smp_cpus_pending));
- /*
- * Try to trigger the stuck CPUs.
- */
- for_each_cpu(c, &wd_smp_cpus_pending) {
- if (c == cpu)
- continue;
- smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ if (!sysctl_hardlockup_all_cpu_backtrace) {
+ /*
+ * Try to trigger the stuck CPUs, unless we are going to
+ * get a backtrace on all of them anyway.
+ */
+ for_each_cpu(c, &wd_smp_cpus_pending) {
+ if (c == cpu)
+ continue;
+ smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ }
+ smp_flush_nmi_ipi(1000000);
}
- smp_flush_nmi_ipi(1000000);
- /* Take the stuck CPU out of the watch group */
- for_each_cpu(c, &wd_smp_cpus_pending)
- set_cpu_stuck(c, tb);
+ /* Take the stuck CPUs out of the watch group */
+ set_cpumask_stuck(&wd_smp_cpus_pending, tb);
-out:
wd_smp_unlock(&flags);
printk_safe_flush();
@@ -152,6 +162,11 @@ out:
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
+
+ return;
+
+out:
+ wd_smp_unlock(&flags);
}
static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
@@ -204,6 +219,9 @@ void soft_nmi_interrupt(struct pt_regs *regs)
return;
nmi_enter();
+
+ __this_cpu_inc(irq_stat.soft_nmi_irqs);
+
tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
per_cpu(wd_timer_tb, cpu) = tb;
@@ -246,9 +264,8 @@ static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
add_timer_on(t, cpu);
}
-static void wd_timer_fn(unsigned long data)
+static void wd_timer_fn(struct timer_list *t)
{
- struct timer_list *t = this_cpu_ptr(&wd_timer);
int cpu = smp_processor_id();
watchdog_timer_interrupt(cpu);
@@ -258,9 +275,14 @@ static void wd_timer_fn(unsigned long data)
void arch_touch_nmi_watchdog(void)
{
+ unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
+ u64 tb = get_tb();
- watchdog_timer_interrupt(cpu);
+ if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
+ per_cpu(wd_timer_tb, cpu) = tb;
+ wd_smp_clear_cpu_pending(cpu, tb);
+ }
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@@ -270,7 +292,7 @@ static void start_watchdog_timer_on(unsigned int cpu)
per_cpu(wd_timer_tb, cpu) = get_tb();
- setup_pinned_timer(t, wd_timer_fn, 0);
+ timer_setup(t, wd_timer_fn, TIMER_PINNED);
wd_timer_reset(cpu, t);
}
@@ -283,6 +305,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
static int start_wd_on_cpu(unsigned int cpu)
{
+ unsigned long flags;
+
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
return 0;
@@ -291,18 +315,17 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
return 0;
- if (watchdog_suspended)
- return 0;
-
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0;
+ wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
if (cpumask_weight(&wd_cpus_enabled) == 1) {
cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
wd_smp_last_reset_tb = get_tb();
}
- smp_wmb();
+ wd_smp_unlock(&flags);
+
start_watchdog_timer_on(cpu);
return 0;
@@ -310,12 +333,17 @@ static int start_wd_on_cpu(unsigned int cpu)
static int stop_wd_on_cpu(unsigned int cpu)
{
+ unsigned long flags;
+
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0; /* Can happen in CPU unplug case */
stop_watchdog_timer_on(cpu);
+ wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+ wd_smp_unlock(&flags);
+
wd_smp_clear_cpu_pending(cpu, get_tb());
return 0;
@@ -332,36 +360,39 @@ static void watchdog_calc_timeouts(void)
wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
}
-void watchdog_nmi_reconfigure(void)
+void watchdog_nmi_stop(void)
{
int cpu;
- watchdog_calc_timeouts();
-
for_each_cpu(cpu, &wd_cpus_enabled)
stop_wd_on_cpu(cpu);
+}
+
+void watchdog_nmi_start(void)
+{
+ int cpu;
+ watchdog_calc_timeouts();
for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
start_wd_on_cpu(cpu);
}
/*
- * This runs after lockup_detector_init() which sets up watchdog_cpumask.
+ * Invoked from core watchdog init.
*/
-static int __init powerpc_watchdog_init(void)
+int __init watchdog_nmi_probe(void)
{
int err;
- watchdog_calc_timeouts();
-
- err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online",
- start_wd_on_cpu, stop_wd_on_cpu);
- if (err < 0)
+ err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "powerpc/watchdog:online",
+ start_wd_on_cpu, stop_wd_on_cpu);
+ if (err < 0) {
pr_warn("Watchdog could not be initialized");
-
+ return err;
+ }
return 0;
}
-arch_initcall(powerpc_watchdog_init);
static void handle_backtrace_ipi(struct pt_regs *regs)
{
OpenPOWER on IntegriCloud