diff options
Diffstat (limited to 'openpower/linux/0002-powerpc-64s-Add-workaround-for-P9-vector-CI-load-iss.patch')
-rw-r--r-- | openpower/linux/0002-powerpc-64s-Add-workaround-for-P9-vector-CI-load-iss.patch | 438 |
1 files changed, 0 insertions, 438 deletions
diff --git a/openpower/linux/0002-powerpc-64s-Add-workaround-for-P9-vector-CI-load-iss.patch b/openpower/linux/0002-powerpc-64s-Add-workaround-for-P9-vector-CI-load-iss.patch deleted file mode 100644 index a0d90697..00000000 --- a/openpower/linux/0002-powerpc-64s-Add-workaround-for-P9-vector-CI-load-iss.patch +++ /dev/null @@ -1,438 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Michael Neuling <mikey@neuling.org> -Date: Fri, 15 Sep 2017 15:25:48 +1000 -Subject: [PATCH 2/3] powerpc/64s: Add workaround for P9 vector CI load issue - -POWER9 DD2.1 and earlier has an issue where some cache inhibited -vector load will return bad data. The workaround is two part, one -firmware/microcode part triggers HMI interrupts when hitting such -loads, the other part is this patch which then emulates the -instructions in Linux. - -The affected instructions are limited to lxvd2x, lxvw4x, lxvb16x and -lxvh8x. - -When an instruction triggers the HMI, all threads in the core will be -sent to the HMI handler, not just the one running the vector load. - -In general, these spurious HMIs are detected by the emulation code and -we just return back to the running process. Unfortunately, if a -spurious interrupt occurs on a vector load that's to normal memory we -have no way to detect that it's spurious (unless we walk the page -tables, which is very expensive). In this case we emulate the load but -we need do so using a vector load itself to ensure 128bit atomicity is -preserved. - -Some additional debugfs emulated instruction counters are added also. - -Signed-off-by: Michael Neuling <mikey@neuling.org> -Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> -[mpe: Switch CONFIG_PPC_BOOK3S_64 to CONFIG_VSX to unbreak the build] -Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> -(cherry picked from commit 5080332c2c893118dbc18755f35c8b0131cf0fc4) -Signed-off-by: Joel Stanley <joel@jms.id.au> ---- - arch/powerpc/include/asm/emulated_ops.h | 4 + - arch/powerpc/include/asm/paca.h | 1 + - arch/powerpc/include/asm/uaccess.h | 17 +++ - arch/powerpc/kernel/exceptions-64s.S | 16 ++- - arch/powerpc/kernel/mce.c | 30 ++++- - arch/powerpc/kernel/traps.c | 201 ++++++++++++++++++++++++++++++++ - arch/powerpc/platforms/powernv/smp.c | 7 ++ - 7 files changed, 271 insertions(+), 5 deletions(-) - -diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h -index f00e10e2a335..651e1354498e 100644 ---- a/arch/powerpc/include/asm/emulated_ops.h -+++ b/arch/powerpc/include/asm/emulated_ops.h -@@ -55,6 +55,10 @@ extern struct ppc_emulated { - struct ppc_emulated_entry mfdscr; - struct ppc_emulated_entry mtdscr; - struct ppc_emulated_entry lq_stq; -+ struct ppc_emulated_entry lxvw4x; -+ struct ppc_emulated_entry lxvh8x; -+ struct ppc_emulated_entry lxvd2x; -+ struct ppc_emulated_entry lxvb16x; - #endif - } ppc_emulated; - -diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h -index 04b60af027ae..1e06310ccc09 100644 ---- a/arch/powerpc/include/asm/paca.h -+++ b/arch/powerpc/include/asm/paca.h -@@ -210,6 +210,7 @@ struct paca_struct { - */ - u16 in_mce; - u8 hmi_event_available; /* HMI event is available */ -+ u8 hmi_p9_special_emu; /* HMI P9 special emulation */ - #endif - - /* Stuff for accurate time accounting */ -diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h -index 11f4bd07cce0..1e6ead577459 100644 ---- a/arch/powerpc/include/asm/uaccess.h -+++ b/arch/powerpc/include/asm/uaccess.h -@@ -174,6 +174,23 @@ do { \ - - extern long __get_user_bad(void); - -+/* -+ * This does an atomic 128 byte aligned load from userspace. -+ * Upto caller to do enable_kernel_vmx() before calling! -+ */ -+#define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ -+ __asm__ __volatile__( \ -+ "1: lvx 0,0,%1 # get user\n" \ -+ " stvx 0,0,%2 # put kernel\n" \ -+ "2:\n" \ -+ ".section .fixup,\"ax\"\n" \ -+ "3: li %0,%3\n" \ -+ " b 2b\n" \ -+ ".previous\n" \ -+ EX_TABLE(1b, 3b) \ -+ : "=r" (err) \ -+ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err)) -+ - #define __get_user_asm(x, addr, err, op) \ - __asm__ __volatile__( \ - "1: "op" %1,0(%2) # get_user\n" \ -diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S -index 06598142d755..7ac85fde9ece 100644 ---- a/arch/powerpc/kernel/exceptions-64s.S -+++ b/arch/powerpc/kernel/exceptions-64s.S -@@ -1033,6 +1033,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early) - EXCEPTION_PROLOG_COMMON_3(0xe60) - addi r3,r1,STACK_FRAME_OVERHEAD - BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */ -+ cmpdi cr0,r3,0 -+ - /* Windup the stack. */ - /* Move original HSRR0 and HSRR1 into the respective regs */ - ld r9,_MSR(r1) -@@ -1049,10 +1051,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early) - REST_8GPRS(2, r1) - REST_GPR(10, r1) - ld r11,_CCR(r1) -+ REST_2GPRS(12, r1) -+ bne 1f - mtcr r11 - REST_GPR(11, r1) -- REST_2GPRS(12, r1) -- /* restore original r1. */ -+ ld r1,GPR1(r1) -+ hrfid -+ -+1: mtcr r11 -+ REST_GPR(11, r1) - ld r1,GPR1(r1) - - /* -@@ -1065,8 +1072,9 @@ hmi_exception_after_realmode: - EXCEPTION_PROLOG_0(PACA_EXGEN) - b tramp_real_hmi_exception - --EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception) -- -+EXC_COMMON_BEGIN(hmi_exception_common) -+EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception, -+ ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON) - - EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20) - EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80) -diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c -index 9b2ea7e71c06..f588951b171d 100644 ---- a/arch/powerpc/kernel/mce.c -+++ b/arch/powerpc/kernel/mce.c -@@ -470,6 +470,34 @@ long hmi_exception_realmode(struct pt_regs *regs) - { - __this_cpu_inc(irq_stat.hmi_exceptions); - -+#ifdef CONFIG_PPC_BOOK3S_64 -+ /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ -+ if (pvr_version_is(PVR_POWER9)) { -+ unsigned long hmer = mfspr(SPRN_HMER); -+ -+ /* Do we have the debug bit set */ -+ if (hmer & PPC_BIT(17)) { -+ hmer &= ~PPC_BIT(17); -+ mtspr(SPRN_HMER, hmer); -+ -+ /* -+ * Now to avoid problems with soft-disable we -+ * only do the emulation if we are coming from -+ * user space -+ */ -+ if (user_mode(regs)) -+ local_paca->hmi_p9_special_emu = 1; -+ -+ /* -+ * Don't bother going to OPAL if that's the -+ * only relevant bit. -+ */ -+ if (!(hmer & mfspr(SPRN_HMEER))) -+ return local_paca->hmi_p9_special_emu; -+ } -+ } -+#endif /* CONFIG_PPC_BOOK3S_64 */ -+ - wait_for_subcore_guest_exit(); - - if (ppc_md.hmi_exception_early) -@@ -477,5 +505,5 @@ long hmi_exception_realmode(struct pt_regs *regs) - - wait_for_tb_resync(); - -- return 0; -+ return 1; - } -diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c -index 13c9dcdcba69..9ae1924c7d1a 100644 ---- a/arch/powerpc/kernel/traps.c -+++ b/arch/powerpc/kernel/traps.c -@@ -37,6 +37,7 @@ - #include <linux/kdebug.h> - #include <linux/ratelimit.h> - #include <linux/context_tracking.h> -+#include <linux/smp.h> - - #include <asm/emulated_ops.h> - #include <asm/pgtable.h> -@@ -699,6 +700,187 @@ void SMIException(struct pt_regs *regs) - die("System Management Interrupt", regs, SIGABRT); - } - -+#ifdef CONFIG_VSX -+static void p9_hmi_special_emu(struct pt_regs *regs) -+{ -+ unsigned int ra, rb, t, i, sel, instr, rc; -+ const void __user *addr; -+ u8 vbuf[16], *vdst; -+ unsigned long ea, msr, msr_mask; -+ bool swap; -+ -+ if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip)) -+ return; -+ -+ /* -+ * lxvb16x opcode: 0x7c0006d8 -+ * lxvd2x opcode: 0x7c000698 -+ * lxvh8x opcode: 0x7c000658 -+ * lxvw4x opcode: 0x7c000618 -+ */ -+ if ((instr & 0xfc00073e) != 0x7c000618) { -+ pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx" -+ " instr=%08x\n", -+ smp_processor_id(), current->comm, current->pid, -+ regs->nip, instr); -+ return; -+ } -+ -+ /* Grab vector registers into the task struct */ -+ msr = regs->msr; /* Grab msr before we flush the bits */ -+ flush_vsx_to_thread(current); -+ enable_kernel_altivec(); -+ -+ /* -+ * Is userspace running with a different endian (this is rare but -+ * not impossible) -+ */ -+ swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE); -+ -+ /* Decode the instruction */ -+ ra = (instr >> 16) & 0x1f; -+ rb = (instr >> 11) & 0x1f; -+ t = (instr >> 21) & 0x1f; -+ if (instr & 1) -+ vdst = (u8 *)¤t->thread.vr_state.vr[t]; -+ else -+ vdst = (u8 *)¤t->thread.fp_state.fpr[t][0]; -+ -+ /* Grab the vector address */ -+ ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0); -+ if (is_32bit_task()) -+ ea &= 0xfffffffful; -+ addr = (__force const void __user *)ea; -+ -+ /* Check it */ -+ if (!access_ok(VERIFY_READ, addr, 16)) { -+ pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx" -+ " instr=%08x addr=%016lx\n", -+ smp_processor_id(), current->comm, current->pid, -+ regs->nip, instr, (unsigned long)addr); -+ return; -+ } -+ -+ /* Read the vector */ -+ rc = 0; -+ if ((unsigned long)addr & 0xfUL) -+ /* unaligned case */ -+ rc = __copy_from_user_inatomic(vbuf, addr, 16); -+ else -+ __get_user_atomic_128_aligned(vbuf, addr, rc); -+ if (rc) { -+ pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx" -+ " instr=%08x addr=%016lx\n", -+ smp_processor_id(), current->comm, current->pid, -+ regs->nip, instr, (unsigned long)addr); -+ return; -+ } -+ -+ pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx" -+ " instr=%08x addr=%016lx\n", -+ smp_processor_id(), current->comm, current->pid, regs->nip, -+ instr, (unsigned long) addr); -+ -+ /* Grab instruction "selector" */ -+ sel = (instr >> 6) & 3; -+ -+ /* -+ * Check to make sure the facility is actually enabled. This -+ * could happen if we get a false positive hit. -+ * -+ * lxvd2x/lxvw4x always check MSR VSX sel = 0,2 -+ * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3 -+ */ -+ msr_mask = MSR_VSX; -+ if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */ -+ msr_mask = MSR_VEC; -+ if (!(msr & msr_mask)) { -+ pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx" -+ " instr=%08x msr:%016lx\n", -+ smp_processor_id(), current->comm, current->pid, -+ regs->nip, instr, msr); -+ return; -+ } -+ -+ /* Do logging here before we modify sel based on endian */ -+ switch (sel) { -+ case 0: /* lxvw4x */ -+ PPC_WARN_EMULATED(lxvw4x, regs); -+ break; -+ case 1: /* lxvh8x */ -+ PPC_WARN_EMULATED(lxvh8x, regs); -+ break; -+ case 2: /* lxvd2x */ -+ PPC_WARN_EMULATED(lxvd2x, regs); -+ break; -+ case 3: /* lxvb16x */ -+ PPC_WARN_EMULATED(lxvb16x, regs); -+ break; -+ } -+ -+#ifdef __LITTLE_ENDIAN__ -+ /* -+ * An LE kernel stores the vector in the task struct as an LE -+ * byte array (effectively swapping both the components and -+ * the content of the components). Those instructions expect -+ * the components to remain in ascending address order, so we -+ * swap them back. -+ * -+ * If we are running a BE user space, the expectation is that -+ * of a simple memcpy, so forcing the emulation to look like -+ * a lxvb16x should do the trick. -+ */ -+ if (swap) -+ sel = 3; -+ -+ switch (sel) { -+ case 0: /* lxvw4x */ -+ for (i = 0; i < 4; i++) -+ ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i]; -+ break; -+ case 1: /* lxvh8x */ -+ for (i = 0; i < 8; i++) -+ ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i]; -+ break; -+ case 2: /* lxvd2x */ -+ for (i = 0; i < 2; i++) -+ ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i]; -+ break; -+ case 3: /* lxvb16x */ -+ for (i = 0; i < 16; i++) -+ vdst[i] = vbuf[15-i]; -+ break; -+ } -+#else /* __LITTLE_ENDIAN__ */ -+ /* On a big endian kernel, a BE userspace only needs a memcpy */ -+ if (!swap) -+ sel = 3; -+ -+ /* Otherwise, we need to swap the content of the components */ -+ switch (sel) { -+ case 0: /* lxvw4x */ -+ for (i = 0; i < 4; i++) -+ ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]); -+ break; -+ case 1: /* lxvh8x */ -+ for (i = 0; i < 8; i++) -+ ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]); -+ break; -+ case 2: /* lxvd2x */ -+ for (i = 0; i < 2; i++) -+ ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]); -+ break; -+ case 3: /* lxvb16x */ -+ memcpy(vdst, vbuf, 16); -+ break; -+ } -+#endif /* !__LITTLE_ENDIAN__ */ -+ -+ /* Go to next instruction */ -+ regs->nip += 4; -+} -+#endif /* CONFIG_VSX */ -+ - void handle_hmi_exception(struct pt_regs *regs) - { - struct pt_regs *old_regs; -@@ -706,6 +888,21 @@ void handle_hmi_exception(struct pt_regs *regs) - old_regs = set_irq_regs(regs); - irq_enter(); - -+#ifdef CONFIG_VSX -+ /* Real mode flagged P9 special emu is needed */ -+ if (local_paca->hmi_p9_special_emu) { -+ local_paca->hmi_p9_special_emu = 0; -+ -+ /* -+ * We don't want to take page faults while doing the -+ * emulation, we just replay the instruction if necessary. -+ */ -+ pagefault_disable(); -+ p9_hmi_special_emu(regs); -+ pagefault_enable(); -+ } -+#endif /* CONFIG_VSX */ -+ - if (ppc_md.handle_hmi_exception) - ppc_md.handle_hmi_exception(regs); - -@@ -1924,6 +2121,10 @@ struct ppc_emulated ppc_emulated = { - WARN_EMULATED_SETUP(mfdscr), - WARN_EMULATED_SETUP(mtdscr), - WARN_EMULATED_SETUP(lq_stq), -+ WARN_EMULATED_SETUP(lxvw4x), -+ WARN_EMULATED_SETUP(lxvh8x), -+ WARN_EMULATED_SETUP(lxvd2x), -+ WARN_EMULATED_SETUP(lxvb16x), - #endif - }; - -diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c -index c17f81e433f7..355d3f99cafb 100644 ---- a/arch/powerpc/platforms/powernv/smp.c -+++ b/arch/powerpc/platforms/powernv/smp.c -@@ -49,6 +49,13 @@ - - static void pnv_smp_setup_cpu(int cpu) - { -+ /* -+ * P9 workaround for CI vector load (see traps.c), -+ * enable the corresponding HMI interrupt -+ */ -+ if (pvr_version_is(PVR_POWER9)) -+ mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17)); -+ - if (xive_enabled()) - xive_smp_setup_cpu(); - else if (cpu != boot_cpuid) |