diff options
Diffstat (limited to 'arch/powerpc')
78 files changed, 3363 insertions, 1189 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 494091762bd7..053382616533 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,93 +80,98 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y - select BUILDTIME_EXTABLE_SORT + # + # Please keep this list sorted alphabetically. + # + select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE + select ARCH_HAS_SG_CHAIN + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_ELF - select ARCH_HAS_ELF_RANDOMIZE - select OF - select OF_EARLY_FLATTREE - select OF_RESERVED_MEM - select HAVE_FTRACE_MCOUNT_RECORD + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select EDAC_ATOMIC_SCRUB + select EDAC_SUPPORT + select GENERIC_ATOMIC64 if PPC32 + select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL_OLD + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_CBPF_JIT if !PPC64 + select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_FUNCTION_TRACER + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_EBPF_JIT if PPC64 + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS - select SYSCTL_EXCEPTION_TRACE - select VIRT_TO_BUS if !PPC64 + select HAVE_GENERIC_RCU_GUP + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_KERNEL_GZIP select HAVE_KPROBES - select HAVE_OPTPROBES if PPC64 - select HAVE_ARCH_KGDB select HAVE_KRETPROBES - select HAVE_ARCH_TRACEHOOK + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI if PERF_EVENTS select HAVE_OPROFILE - select HAVE_DEBUG_KMEMLEAK - select ARCH_HAS_SG_CHAIN - select GENERIC_ATOMIC64 if PPC32 + select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_RCU_TABLE_FREE if SMP select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select ARCH_WANT_IPC_PARSE_VERSION - select SPARSE_IRQ + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING select IRQ_DOMAIN - select GENERIC_IRQ_SHOW - select GENERIC_IRQ_SHOW_LEVEL select IRQ_FORCED_THREADING - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT if !PPC64 - select HAVE_EBPF_JIT if PPC64 - select HAVE_ARCH_JUMP_LABEL - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_HAS_GCOV_PROFILE_ALL - select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE - select GENERIC_TIME_VSYSCALL_OLD - select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS - select ARCH_USE_BUILTIN_BSWAP - select OLD_SIGSUSPEND - select OLD_SIGACTION if PPC32 - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_IRQ_EXIT_ON_IRQ_STACK - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select HAVE_ARCH_AUDITSYSCALL - select ARCH_SUPPORTS_ATOMIC_RMW - select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select NO_BOOTMEM - select HAVE_GENERIC_RCU_GUP - select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_NMI if PERF_EVENTS - select EDAC_SUPPORT - select EDAC_ATOMIC_SCRUB - select ARCH_HAS_DMA_SET_COHERENT_MASK - select ARCH_HAS_DEVMEM_IS_ALLOWED - select HAVE_ARCH_SECCOMP_FILTER - select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select GENERIC_CPU_AUTOPROBE - select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE - select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_KERNEL_GZIP - select HAVE_CONTEXT_TRACKING if PPC64 + select OF + select OF_EARLY_FLATTREE + select OF_RESERVED_MEM + select OLD_SIGACTION if PPC32 + select OLD_SIGSUSPEND + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select VIRT_TO_BUS if !PPC64 + # + # Please keep this list sorted alphabetically. + # config GENERIC_CSUM def_bool n diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 31286fa7873c..19b0d1a81959 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index 861e72109df2..f080abfc2f83 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -68,6 +68,7 @@ SECTIONS } #ifdef CONFIG_PPC64_BOOT_WRAPPER + . = ALIGN(256); .got : { __toc_start = .; diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config index 528ff0e714e6..c03d0fb16665 100644 --- a/arch/powerpc/configs/85xx-hw.config +++ b/arch/powerpc/configs/85xx-hw.config @@ -16,9 +16,8 @@ CONFIG_DAVICOM_PHY=y CONFIG_DMADEVICES=y CONFIG_E1000E=y CONFIG_E1000=y -CONFIG_EDAC_MM_EDAC=y -CONFIG_EDAC_MPC85XX=y CONFIG_EDAC=y +CONFIG_EDAC_MPC85XX=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_LEGACY=y CONFIG_FB_FSL_DIU=y diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index c79283be5680..a917f7afb4f9 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -155,7 +155,6 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_OHCI_HCD_PPC_OF_LE=y CONFIG_USB_STORAGE=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_MPC85XX=y CONFIG_RTC_CLASS=y # CONFIG_RTC_INTF_PROC is not set diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index dbd961de251e..72900b84d3e0 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -116,7 +116,6 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_CMOS=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 2d7fcbe047ac..aa564599e368 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -179,7 +179,6 @@ CONFIG_INFINIBAND_MTHCA=m CONFIG_INFINIBAND_IPOIB=m CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_CELL=y CONFIG_UIO=m CONFIG_EXT2_FS=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 5553c5ce4274..fe43ff47bd2f 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -142,7 +142,6 @@ CONFIG_USB_UHCI_HCD=y CONFIG_USB_SL811_HCD=y CONFIG_USB_STORAGE=y CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_PASEMI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 4f1288b04303..f2e03f032041 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -262,7 +262,6 @@ CONFIG_INFINIBAND_IPOIB_CM=y CONFIG_INFINIBAND_SRP=m CONFIG_INFINIBAND_ISER=m CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_PASEMI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 11a3473f9e2e..6340e6c53c54 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -173,7 +173,6 @@ CONFIG_INFINIBAND_MTHCA=m CONFIG_INFINIBAND_IPOIB=m CONFIG_INFINIBAND_ISER=m CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_FS_DAX=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 1d2d69dd6409..18d0d60dadbf 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -988,8 +988,7 @@ CONFIG_LEDS_TRIGGER_BACKLIGHT=m CONFIG_LEDS_TRIGGER_DEFAULT_ON=m CONFIG_ACCESSIBILITY=y CONFIG_A11Y_BRAILLE_CONSOLE=y -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=m +CONFIG_EDAC=m CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set CONFIG_RTC_DRV_DS1307=m diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 87f40454bad3..67eca3af9fc7 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -10,6 +10,8 @@ obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o +obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o md5-ppc-y := md5-asm.o md5-glue.o @@ -17,3 +19,4 @@ sha1-powerpc-y := sha1-powerpc-asm.o sha1.o sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o +crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c new file mode 100644 index 000000000000..0153a9c6f4af --- /dev/null +++ b/arch/powerpc/crypto/crc-vpmsum_test.c @@ -0,0 +1,137 @@ +/* + * CRC vpmsum tester + * Copyright 2017 Daniel Axtens, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crc-t10dif.h> +#include <linux/crc32.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/cpufeature.h> +#include <asm/switch_to.h> + +static unsigned long iterations = 10000; + +#define MAX_CRC_LENGTH 65535 + + +static int __init crc_test_init(void) +{ + u16 crc16 = 0, verify16 = 0; + u32 crc32 = 0, verify32 = 0; + __le32 verify32le = 0; + unsigned char *data; + unsigned long i; + int ret; + + struct crypto_shash *crct10dif_tfm; + struct crypto_shash *crc32c_tfm; + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + data = kmalloc(MAX_CRC_LENGTH, GFP_KERNEL); + if (!data) + return -ENOMEM; + + crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); + + if (IS_ERR(crct10dif_tfm)) { + pr_err("Error allocating crc-t10dif\n"); + goto free_buf; + } + + crc32c_tfm = crypto_alloc_shash("crc32c", 0, 0); + + if (IS_ERR(crc32c_tfm)) { + pr_err("Error allocating crc32c\n"); + goto free_16; + } + + do { + SHASH_DESC_ON_STACK(crct10dif_shash, crct10dif_tfm); + SHASH_DESC_ON_STACK(crc32c_shash, crc32c_tfm); + + crct10dif_shash->tfm = crct10dif_tfm; + ret = crypto_shash_init(crct10dif_shash); + + if (ret) { + pr_err("Error initing crc-t10dif\n"); + goto free_32; + } + + + crc32c_shash->tfm = crc32c_tfm; + ret = crypto_shash_init(crc32c_shash); + + if (ret) { + pr_err("Error initing crc32c\n"); + goto free_32; + } + + pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations); + for (i=0; i<iterations; i++) { + size_t len, offset; + + get_random_bytes(data, MAX_CRC_LENGTH); + get_random_bytes(&len, sizeof(len)); + get_random_bytes(&offset, sizeof(offset)); + + len %= MAX_CRC_LENGTH; + offset &= 15; + if (len <= offset) + continue; + len -= offset; + + crypto_shash_update(crct10dif_shash, data+offset, len); + crypto_shash_final(crct10dif_shash, (u8 *)(&crc16)); + verify16 = crc_t10dif_generic(verify16, data+offset, len); + + + if (crc16 != verify16) { + pr_err("FAILURE in CRC16: got 0x%04x expected 0x%04x (len %lu)\n", + crc16, verify16, len); + break; + } + + crypto_shash_update(crc32c_shash, data+offset, len); + crypto_shash_final(crc32c_shash, (u8 *)(&crc32)); + verify32 = le32_to_cpu(verify32le); + verify32le = ~cpu_to_le32(__crc32c_le(~verify32, data+offset, len)); + if (crc32 != (u32)verify32le) { + pr_err("FAILURE in CRC32: got 0x%08x expected 0x%08x (len %lu)\n", + crc32, verify32, len); + break; + } + } + pr_info("crc-vpmsum_test done, completed %lu iterations\n", i); + } while (0); + +free_32: + crypto_free_shash(crc32c_tfm); + +free_16: + crypto_free_shash(crct10dif_tfm); + +free_buf: + kfree(data); + + return 0; +} + +static void __exit crc_test_exit(void) {} + +module_init(crc_test_init); +module_exit(crc_test_exit); +module_param(iterations, long, 0400); + +MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>"); +MODULE_DESCRIPTION("Vector polynomial multiply-sum CRC tester"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S new file mode 100644 index 000000000000..aadb59c96a27 --- /dev/null +++ b/arch/powerpc/crypto/crc32-vpmsum_core.S @@ -0,0 +1,755 @@ +/* + * Core of the accelerated CRC algorithm. + * In your file, define the constants and CRC_FUNCTION_NAME + * Then include this file. + * + * Calculate the checksum of data that is 16 byte aligned and a multiple of + * 16 bytes. + * + * The first step is to reduce it to 1024 bits. We do this in 8 parallel + * chunks in order to mask the latency of the vpmsum instructions. If we + * have more than 32 kB of data to checksum we repeat this step multiple + * times, passing in the previous 1024 bits. + * + * The next step is to reduce the 1024 bits to 64 bits. This step adds + * 32 bits of 0s to the end - this matches what a CRC does. We just + * calculate constants that land the data in this 32 bits. + * + * We then use fixed point Barrett reduction to compute a mod n over GF(2) + * for n = CRC using POWER8 instructions. We use x = 32. + * + * http://en.wikipedia.org/wiki/Barrett_reduction + * + * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. +*/ + +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> + +#define MAX_SIZE 32768 + + .text + +#if defined(__BIG_ENDIAN__) && defined(REFLECT) +#define BYTESWAP_DATA +#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) +#define BYTESWAP_DATA +#else +#undef BYTESWAP_DATA +#endif + +#define off16 r25 +#define off32 r26 +#define off48 r27 +#define off64 r28 +#define off80 r29 +#define off96 r30 +#define off112 r31 + +#define const1 v24 +#define const2 v25 + +#define byteswap v26 +#define mask_32bit v27 +#define mask_64bit v28 +#define zeroes v29 + +#ifdef BYTESWAP_DATA +#define VPERM(A, B, C, D) vperm A, B, C, D +#else +#define VPERM(A, B, C, D) +#endif + +/* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */ +FUNC_START(CRC_FUNCTION_NAME) + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + li off16,16 + li off32,32 + li off48,48 + li off64,64 + li off80,80 + li off96,96 + li off112,112 + li r0,0 + + /* Enough room for saving 10 non volatile VMX registers */ + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + stvx v20,0,r6 + stvx v21,off16,r6 + stvx v22,off32,r6 + stvx v23,off48,r6 + stvx v24,off64,r6 + stvx v25,off80,r6 + stvx v26,off96,r6 + stvx v27,off112,r6 + stvx v28,0,r7 + stvx v29,off16,r7 + + mr r10,r3 + + vxor zeroes,zeroes,zeroes + vspltisw v0,-1 + + vsldoi mask_32bit,zeroes,v0,4 + vsldoi mask_64bit,zeroes,v0,8 + + /* Get the initial value into v8 */ + vxor v8,v8,v8 + MTVRD(v8, R3) +#ifdef REFLECT + vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */ +#else + vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */ +#endif + +#ifdef BYTESWAP_DATA + addis r3,r2,.byteswap_constant@toc@ha + addi r3,r3,.byteswap_constant@toc@l + + lvx byteswap,0,r3 + addi r3,r3,16 +#endif + + cmpdi r5,256 + blt .Lshort + + rldicr r6,r5,0,56 + + /* Checksum in blocks of MAX_SIZE */ +1: lis r7,MAX_SIZE@h + ori r7,r7,MAX_SIZE@l + mr r9,r7 + cmpd r6,r7 + bgt 2f + mr r7,r6 +2: subf r6,r7,r6 + + /* our main loop does 128 bytes at a time */ + srdi r7,r7,7 + + /* + * Work out the offset into the constants table to start at. Each + * constant is 16 bytes, and it is used against 128 bytes of input + * data - 128 / 16 = 8 + */ + sldi r8,r7,4 + srdi r9,r9,3 + subf r8,r8,r9 + + /* We reduce our final 128 bytes in a separate step */ + addi r7,r7,-1 + mtctr r7 + + addis r3,r2,.constants@toc@ha + addi r3,r3,.constants@toc@l + + /* Find the start of our constants */ + add r3,r3,r8 + + /* zero v0-v7 which will contain our checksums */ + vxor v0,v0,v0 + vxor v1,v1,v1 + vxor v2,v2,v2 + vxor v3,v3,v3 + vxor v4,v4,v4 + vxor v5,v5,v5 + vxor v6,v6,v6 + vxor v7,v7,v7 + + lvx const1,0,r3 + + /* + * If we are looping back to consume more data we use the values + * already in v16-v23. + */ + cmpdi r0,1 + beq 2f + + /* First warm up pass */ + lvx v16,0,r4 + lvx v17,off16,r4 + VPERM(v16,v16,v16,byteswap) + VPERM(v17,v17,v17,byteswap) + lvx v18,off32,r4 + lvx v19,off48,r4 + VPERM(v18,v18,v18,byteswap) + VPERM(v19,v19,v19,byteswap) + lvx v20,off64,r4 + lvx v21,off80,r4 + VPERM(v20,v20,v20,byteswap) + VPERM(v21,v21,v21,byteswap) + lvx v22,off96,r4 + lvx v23,off112,r4 + VPERM(v22,v22,v22,byteswap) + VPERM(v23,v23,v23,byteswap) + addi r4,r4,8*16 + + /* xor in initial value */ + vxor v16,v16,v8 + +2: bdz .Lfirst_warm_up_done + + addi r3,r3,16 + lvx const2,0,r3 + + /* Second warm up pass */ + VPMSUMD(v8,v16,const1) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + VPMSUMD(v9,v17,const1) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + VPMSUMD(v10,v18,const1) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + VPMSUMD(v11,v19,const1) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + ori r2,r2,0 + + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdz .Lfirst_cool_down + + /* + * main loop. We modulo schedule it such that it takes three iterations + * to complete - first iteration load, second iteration vpmsum, third + * iteration xor. + */ + .balign 16 +4: lvx const1,0,r3 + addi r3,r3,16 + ori r2,r2,0 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const2) + lvx v16,0,r4 + VPERM(v16,v16,v16,byteswap) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const2) + lvx v17,off16,r4 + VPERM(v17,v17,v17,byteswap) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const2) + lvx v18,off32,r4 + VPERM(v18,v18,v18,byteswap) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const2) + lvx v19,off48,r4 + VPERM(v19,v19,v19,byteswap) + lvx const2,0,r3 + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + lvx v20,off64,r4 + VPERM(v20,v20,v20,byteswap) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + lvx v21,off80,r4 + VPERM(v21,v21,v21,byteswap) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + lvx v22,off96,r4 + VPERM(v22,v22,v22,byteswap) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + lvx v23,off112,r4 + VPERM(v23,v23,v23,byteswap) + + addi r4,r4,8*16 + + bdnz 4b + +.Lfirst_cool_down: + /* First cool down pass */ + lvx const1,0,r3 + addi r3,r3,16 + + vxor v0,v0,v8 + VPMSUMD(v8,v16,const1) + ori r2,r2,0 + + vxor v1,v1,v9 + VPMSUMD(v9,v17,const1) + ori r2,r2,0 + + vxor v2,v2,v10 + VPMSUMD(v10,v18,const1) + ori r2,r2,0 + + vxor v3,v3,v11 + VPMSUMD(v11,v19,const1) + ori r2,r2,0 + + vxor v4,v4,v12 + VPMSUMD(v12,v20,const1) + ori r2,r2,0 + + vxor v5,v5,v13 + VPMSUMD(v13,v21,const1) + ori r2,r2,0 + + vxor v6,v6,v14 + VPMSUMD(v14,v22,const1) + ori r2,r2,0 + + vxor v7,v7,v15 + VPMSUMD(v15,v23,const1) + ori r2,r2,0 + +.Lsecond_cool_down: + /* Second cool down pass */ + vxor v0,v0,v8 + vxor v1,v1,v9 + vxor v2,v2,v10 + vxor v3,v3,v11 + vxor v4,v4,v12 + vxor v5,v5,v13 + vxor v6,v6,v14 + vxor v7,v7,v15 + +#ifdef REFLECT + /* + * vpmsumd produces a 96 bit result in the least significant bits + * of the register. Since we are bit reflected we have to shift it + * left 32 bits so it occupies the least significant bits in the + * bit reflected domain. + */ + vsldoi v0,v0,zeroes,4 + vsldoi v1,v1,zeroes,4 + vsldoi v2,v2,zeroes,4 + vsldoi v3,v3,zeroes,4 + vsldoi v4,v4,zeroes,4 + vsldoi v5,v5,zeroes,4 + vsldoi v6,v6,zeroes,4 + vsldoi v7,v7,zeroes,4 +#endif + + /* xor with last 1024 bits */ + lvx v8,0,r4 + lvx v9,off16,r4 + VPERM(v8,v8,v8,byteswap) + VPERM(v9,v9,v9,byteswap) + lvx v10,off32,r4 + lvx v11,off48,r4 + VPERM(v10,v10,v10,byteswap) + VPERM(v11,v11,v11,byteswap) + lvx v12,off64,r4 + lvx v13,off80,r4 + VPERM(v12,v12,v12,byteswap) + VPERM(v13,v13,v13,byteswap) + lvx v14,off96,r4 + lvx v15,off112,r4 + VPERM(v14,v14,v14,byteswap) + VPERM(v15,v15,v15,byteswap) + + addi r4,r4,8*16 + + vxor v16,v0,v8 + vxor v17,v1,v9 + vxor v18,v2,v10 + vxor v19,v3,v11 + vxor v20,v4,v12 + vxor v21,v5,v13 + vxor v22,v6,v14 + vxor v23,v7,v15 + + li r0,1 + cmpdi r6,0 + addi r6,r6,128 + bne 1b + + /* Work out how many bytes we have left */ + andi. r5,r5,127 + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,128 + add r3,r3,r6 + + /* How many 16 byte chunks are in the tail */ + srdi r7,r5,4 + mtctr r7 + + /* + * Reduce the previously calculated 1024 bits to 64 bits, shifting + * 32 bits to include the trailing 32 bits of zeros + */ + lvx v0,0,r3 + lvx v1,off16,r3 + lvx v2,off32,r3 + lvx v3,off48,r3 + lvx v4,off64,r3 + lvx v5,off80,r3 + lvx v6,off96,r3 + lvx v7,off112,r3 + addi r3,r3,8*16 + + VPMSUMW(v0,v16,v0) + VPMSUMW(v1,v17,v1) + VPMSUMW(v2,v18,v2) + VPMSUMW(v3,v19,v3) + VPMSUMW(v4,v20,v4) + VPMSUMW(v5,v21,v5) + VPMSUMW(v6,v22,v6) + VPMSUMW(v7,v23,v7) + + /* Now reduce the tail (0 - 112 bytes) */ + cmpdi r7,0 + beq 1f + + lvx v16,0,r4 + lvx v17,0,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off16,r4 + lvx v17,off16,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off32,r4 + lvx v17,off32,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off48,r4 + lvx v17,off48,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off64,r4 + lvx v17,off64,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off80,r4 + lvx v17,off80,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + bdz 1f + + lvx v16,off96,r4 + lvx v17,off96,r3 + VPERM(v16,v16,v16,byteswap) + VPMSUMW(v16,v16,v17) + vxor v0,v0,v16 + + /* Now xor all the parallel chunks together */ +1: vxor v0,v0,v1 + vxor v2,v2,v3 + vxor v4,v4,v5 + vxor v6,v6,v7 + + vxor v0,v0,v2 + vxor v4,v4,v6 + + vxor v0,v0,v4 + +.Lbarrett_reduction: + /* Barrett constants */ + addis r3,r2,.barrett_constants@toc@ha + addi r3,r3,.barrett_constants@toc@l + + lvx const1,0,r3 + lvx const2,off16,r3 + + vsldoi v1,v0,v0,8 + vxor v0,v0,v1 /* xor two 64 bit results together */ + +#ifdef REFLECT + /* shift left one bit */ + vspltisb v1,1 + vsl v0,v0,v1 +#endif + + vand v0,v0,mask_64bit +#ifndef REFLECT + /* + * Now for the Barrett reduction algorithm. The idea is to calculate q, + * the multiple of our polynomial that we need to subtract. By + * doing the computation 2x bits higher (ie 64 bits) and shifting the + * result back down 2x bits, we round down to the nearest multiple. + */ + VPMSUMD(v1,v0,const1) /* ma */ + vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Get the result into r3. We need to shift it left 8 bytes: + * V0 [ 0 1 2 X ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */ +#else + /* + * The reflected version of Barrett reduction. Instead of bit + * reflecting our data (which is expensive to do), we bit reflect our + * constants and our algorithm, which means the intermediate data in + * our vector registers goes from 0-63 instead of 63-0. We can reflect + * the algorithm because we don't carry in mod 2 arithmetic. + */ + vand v1,v0,mask_32bit /* bottom 32 bits of a */ + VPMSUMD(v1,v1,const1) /* ma */ + vand v1,v1,mask_32bit /* bottom 32bits of ma */ + VPMSUMD(v1,v1,const2) /* qn */ + vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Since we are bit reflected, the result (ie the low 32 bits) is in + * the high 32 bits. We just need to shift it left 4 bytes + * V0 [ 0 1 X 3 ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ +#endif + + /* Get it into r3 */ + MFVRD(R3, v0) + +.Lout: + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + lvx v20,0,r6 + lvx v21,off16,r6 + lvx v22,off32,r6 + lvx v23,off48,r6 + lvx v24,off64,r6 + lvx v25,off80,r6 + lvx v26,off96,r6 + lvx v27,off112,r6 + lvx v28,0,r7 + lvx v29,off16,r7 + + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + ld r26,-48(r1) + ld r25,-56(r1) + + blr + +.Lfirst_warm_up_done: + lvx const1,0,r3 + addi r3,r3,16 + + VPMSUMD(v8,v16,const1) + VPMSUMD(v9,v17,const1) + VPMSUMD(v10,v18,const1) + VPMSUMD(v11,v19,const1) + VPMSUMD(v12,v20,const1) + VPMSUMD(v13,v21,const1) + VPMSUMD(v14,v22,const1) + VPMSUMD(v15,v23,const1) + + b .Lsecond_cool_down + +.Lshort: + cmpdi r5,0 + beq .Lzero + + addis r3,r2,.short_constants@toc@ha + addi r3,r3,.short_constants@toc@l + + /* Calculate where in the constant table we need to start */ + subfic r6,r5,256 + add r3,r3,r6 + + /* How many 16 byte chunks? */ + srdi r7,r5,4 + mtctr r7 + + vxor v19,v19,v19 + vxor v20,v20,v20 + + lvx v0,0,r4 + lvx v16,0,r3 + VPERM(v0,v0,v16,byteswap) + vxor v0,v0,v8 /* xor in initial value */ + VPMSUMW(v0,v0,v16) + bdz .Lv0 + + lvx v1,off16,r4 + lvx v17,off16,r3 + VPERM(v1,v1,v17,byteswap) + VPMSUMW(v1,v1,v17) + bdz .Lv1 + + lvx v2,off32,r4 + lvx v16,off32,r3 + VPERM(v2,v2,v16,byteswap) + VPMSUMW(v2,v2,v16) + bdz .Lv2 + + lvx v3,off48,r4 + lvx v17,off48,r3 + VPERM(v3,v3,v17,byteswap) + VPMSUMW(v3,v3,v17) + bdz .Lv3 + + lvx v4,off64,r4 + lvx v16,off64,r3 + VPERM(v4,v4,v16,byteswap) + VPMSUMW(v4,v4,v16) + bdz .Lv4 + + lvx v5,off80,r4 + lvx v17,off80,r3 + VPERM(v5,v5,v17,byteswap) + VPMSUMW(v5,v5,v17) + bdz .Lv5 + + lvx v6,off96,r4 + lvx v16,off96,r3 + VPERM(v6,v6,v16,byteswap) + VPMSUMW(v6,v6,v16) + bdz .Lv6 + + lvx v7,off112,r4 + lvx v17,off112,r3 + VPERM(v7,v7,v17,byteswap) + VPMSUMW(v7,v7,v17) + bdz .Lv7 + + addi r3,r3,128 + addi r4,r4,128 + + lvx v8,0,r4 + lvx v16,0,r3 + VPERM(v8,v8,v16,byteswap) + VPMSUMW(v8,v8,v16) + bdz .Lv8 + + lvx v9,off16,r4 + lvx v17,off16,r3 + VPERM(v9,v9,v17,byteswap) + VPMSUMW(v9,v9,v17) + bdz .Lv9 + + lvx v10,off32,r4 + lvx v16,off32,r3 + VPERM(v10,v10,v16,byteswap) + VPMSUMW(v10,v10,v16) + bdz .Lv10 + + lvx v11,off48,r4 + lvx v17,off48,r3 + VPERM(v11,v11,v17,byteswap) + VPMSUMW(v11,v11,v17) + bdz .Lv11 + + lvx v12,off64,r4 + lvx v16,off64,r3 + VPERM(v12,v12,v16,byteswap) + VPMSUMW(v12,v12,v16) + bdz .Lv12 + + lvx v13,off80,r4 + lvx v17,off80,r3 + VPERM(v13,v13,v17,byteswap) + VPMSUMW(v13,v13,v17) + bdz .Lv13 + + lvx v14,off96,r4 + lvx v16,off96,r3 + VPERM(v14,v14,v16,byteswap) + VPMSUMW(v14,v14,v16) + bdz .Lv14 + + lvx v15,off112,r4 + lvx v17,off112,r3 + VPERM(v15,v15,v17,byteswap) + VPMSUMW(v15,v15,v17) + +.Lv15: vxor v19,v19,v15 +.Lv14: vxor v20,v20,v14 +.Lv13: vxor v19,v19,v13 +.Lv12: vxor v20,v20,v12 +.Lv11: vxor v19,v19,v11 +.Lv10: vxor v20,v20,v10 +.Lv9: vxor v19,v19,v9 +.Lv8: vxor v20,v20,v8 +.Lv7: vxor v19,v19,v7 +.Lv6: vxor v20,v20,v6 +.Lv5: vxor v19,v19,v5 +.Lv4: vxor v20,v20,v4 +.Lv3: vxor v19,v19,v3 +.Lv2: vxor v20,v20,v2 +.Lv1: vxor v19,v19,v1 +.Lv0: vxor v20,v20,v0 + + vxor v0,v19,v20 + + b .Lbarrett_reduction + +.Lzero: + mr r3,r10 + b .Lout + +FUNC_END(CRC_FUNCTION_NAME) diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/crypto/crc32c-vpmsum_asm.S index dc640b212299..d2bea48051a0 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_asm.S +++ b/arch/powerpc/crypto/crc32c-vpmsum_asm.S @@ -1,20 +1,5 @@ /* - * Calculate the checksum of data that is 16 byte aligned and a multiple of - * 16 bytes. - * - * The first step is to reduce it to 1024 bits. We do this in 8 parallel - * chunks in order to mask the latency of the vpmsum instructions. If we - * have more than 32 kB of data to checksum we repeat this step multiple - * times, passing in the previous 1024 bits. - * - * The next step is to reduce the 1024 bits to 64 bits. This step adds - * 32 bits of 0s to the end - this matches what a CRC does. We just - * calculate constants that land the data in this 32 bits. - * - * We then use fixed point Barrett reduction to compute a mod n over GF(2) - * for n = CRC using POWER8 instructions. We use x = 32. - * - * http://en.wikipedia.org/wiki/Barrett_reduction + * Calculate a crc32c with vpmsum acceleration * * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM * @@ -23,9 +8,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <asm/ppc_asm.h> -#include <asm/ppc-opcode.h> - .section .rodata .balign 16 @@ -33,7 +15,6 @@ /* byte reverse permute constant */ .octa 0x0F0E0D0C0B0A09080706050403020100 -#define MAX_SIZE 32768 .constants: /* Reduce 262144 kbits to 1024 bits */ @@ -860,694 +841,6 @@ /* 33 bit reflected Barrett constant n */ .octa 0x00000000000000000000000105ec76f1 - .text - -#if defined(__BIG_ENDIAN__) -#define BYTESWAP_DATA -#else -#undef BYTESWAP_DATA -#endif - -#define off16 r25 -#define off32 r26 -#define off48 r27 -#define off64 r28 -#define off80 r29 -#define off96 r30 -#define off112 r31 - -#define const1 v24 -#define const2 v25 - -#define byteswap v26 -#define mask_32bit v27 -#define mask_64bit v28 -#define zeroes v29 - -#ifdef BYTESWAP_DATA -#define VPERM(A, B, C, D) vperm A, B, C, D -#else -#define VPERM(A, B, C, D) -#endif - -/* unsigned int __crc32c_vpmsum(unsigned int crc, void *p, unsigned long len) */ -FUNC_START(__crc32c_vpmsum) - std r31,-8(r1) - std r30,-16(r1) - std r29,-24(r1) - std r28,-32(r1) - std r27,-40(r1) - std r26,-48(r1) - std r25,-56(r1) - - li off16,16 - li off32,32 - li off48,48 - li off64,64 - li off80,80 - li off96,96 - li off112,112 - li r0,0 - - /* Enough room for saving 10 non volatile VMX registers */ - subi r6,r1,56+10*16 - subi r7,r1,56+2*16 - - stvx v20,0,r6 - stvx v21,off16,r6 - stvx v22,off32,r6 - stvx v23,off48,r6 - stvx v24,off64,r6 - stvx v25,off80,r6 - stvx v26,off96,r6 - stvx v27,off112,r6 - stvx v28,0,r7 - stvx v29,off16,r7 - - mr r10,r3 - - vxor zeroes,zeroes,zeroes - vspltisw v0,-1 - - vsldoi mask_32bit,zeroes,v0,4 - vsldoi mask_64bit,zeroes,v0,8 - - /* Get the initial value into v8 */ - vxor v8,v8,v8 - MTVRD(v8, R3) - vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */ - -#ifdef BYTESWAP_DATA - addis r3,r2,.byteswap_constant@toc@ha - addi r3,r3,.byteswap_constant@toc@l - - lvx byteswap,0,r3 - addi r3,r3,16 -#endif - - cmpdi r5,256 - blt .Lshort - - rldicr r6,r5,0,56 - - /* Checksum in blocks of MAX_SIZE */ -1: lis r7,MAX_SIZE@h - ori r7,r7,MAX_SIZE@l - mr r9,r7 - cmpd r6,r7 - bgt 2f - mr r7,r6 -2: subf r6,r7,r6 - - /* our main loop does 128 bytes at a time */ - srdi r7,r7,7 - - /* - * Work out the offset into the constants table to start at. Each - * constant is 16 bytes, and it is used against 128 bytes of input - * data - 128 / 16 = 8 - */ - sldi r8,r7,4 - srdi r9,r9,3 - subf r8,r8,r9 - - /* We reduce our final 128 bytes in a separate step */ - addi r7,r7,-1 - mtctr r7 - - addis r3,r2,.constants@toc@ha - addi r3,r3,.constants@toc@l - - /* Find the start of our constants */ - add r3,r3,r8 - - /* zero v0-v7 which will contain our checksums */ - vxor v0,v0,v0 - vxor v1,v1,v1 - vxor v2,v2,v2 - vxor v3,v3,v3 - vxor v4,v4,v4 - vxor v5,v5,v5 - vxor v6,v6,v6 - vxor v7,v7,v7 - - lvx const1,0,r3 - - /* - * If we are looping back to consume more data we use the values - * already in v16-v23. - */ - cmpdi r0,1 - beq 2f - - /* First warm up pass */ - lvx v16,0,r4 - lvx v17,off16,r4 - VPERM(v16,v16,v16,byteswap) - VPERM(v17,v17,v17,byteswap) - lvx v18,off32,r4 - lvx v19,off48,r4 - VPERM(v18,v18,v18,byteswap) - VPERM(v19,v19,v19,byteswap) - lvx v20,off64,r4 - lvx v21,off80,r4 - VPERM(v20,v20,v20,byteswap) - VPERM(v21,v21,v21,byteswap) - lvx v22,off96,r4 - lvx v23,off112,r4 - VPERM(v22,v22,v22,byteswap) - VPERM(v23,v23,v23,byteswap) - addi r4,r4,8*16 - - /* xor in initial value */ - vxor v16,v16,v8 - -2: bdz .Lfirst_warm_up_done - - addi r3,r3,16 - lvx const2,0,r3 - - /* Second warm up pass */ - VPMSUMD(v8,v16,const1) - lvx v16,0,r4 - VPERM(v16,v16,v16,byteswap) - ori r2,r2,0 - - VPMSUMD(v9,v17,const1) - lvx v17,off16,r4 - VPERM(v17,v17,v17,byteswap) - ori r2,r2,0 - - VPMSUMD(v10,v18,const1) - lvx v18,off32,r4 - VPERM(v18,v18,v18,byteswap) - ori r2,r2,0 - - VPMSUMD(v11,v19,const1) - lvx v19,off48,r4 - VPERM(v19,v19,v19,byteswap) - ori r2,r2,0 - - VPMSUMD(v12,v20,const1) - lvx v20,off64,r4 - VPERM(v20,v20,v20,byteswap) - ori r2,r2,0 - - VPMSUMD(v13,v21,const1) - lvx v21,off80,r4 - VPERM(v21,v21,v21,byteswap) - ori r2,r2,0 - - VPMSUMD(v14,v22,const1) - lvx v22,off96,r4 - VPERM(v22,v22,v22,byteswap) - ori r2,r2,0 - - VPMSUMD(v15,v23,const1) - lvx v23,off112,r4 - VPERM(v23,v23,v23,byteswap) - - addi r4,r4,8*16 - - bdz .Lfirst_cool_down - - /* - * main loop. We modulo schedule it such that it takes three iterations - * to complete - first iteration load, second iteration vpmsum, third - * iteration xor. - */ - .balign 16 -4: lvx const1,0,r3 - addi r3,r3,16 - ori r2,r2,0 - - vxor v0,v0,v8 - VPMSUMD(v8,v16,const2) - lvx v16,0,r4 - VPERM(v16,v16,v16,byteswap) - ori r2,r2,0 - - vxor v1,v1,v9 - VPMSUMD(v9,v17,const2) - lvx v17,off16,r4 - VPERM(v17,v17,v17,byteswap) - ori r2,r2,0 - - vxor v2,v2,v10 - VPMSUMD(v10,v18,const2) - lvx v18,off32,r4 - VPERM(v18,v18,v18,byteswap) - ori r2,r2,0 - - vxor v3,v3,v11 - VPMSUMD(v11,v19,const2) - lvx v19,off48,r4 - VPERM(v19,v19,v19,byteswap) - lvx const2,0,r3 - ori r2,r2,0 - - vxor v4,v4,v12 - VPMSUMD(v12,v20,const1) - lvx v20,off64,r4 - VPERM(v20,v20,v20,byteswap) - ori r2,r2,0 - - vxor v5,v5,v13 - VPMSUMD(v13,v21,const1) - lvx v21,off80,r4 - VPERM(v21,v21,v21,byteswap) - ori r2,r2,0 - - vxor v6,v6,v14 - VPMSUMD(v14,v22,const1) - lvx v22,off96,r4 - VPERM(v22,v22,v22,byteswap) - ori r2,r2,0 - - vxor v7,v7,v15 - VPMSUMD(v15,v23,const1) - lvx v23,off112,r4 - VPERM(v23,v23,v23,byteswap) - - addi r4,r4,8*16 - - bdnz 4b - -.Lfirst_cool_down: - /* First cool down pass */ - lvx const1,0,r3 - addi r3,r3,16 - - vxor v0,v0,v8 - VPMSUMD(v8,v16,const1) - ori r2,r2,0 - - vxor v1,v1,v9 - VPMSUMD(v9,v17,const1) - ori r2,r2,0 - - vxor v2,v2,v10 - VPMSUMD(v10,v18,const1) - ori r2,r2,0 - - vxor v3,v3,v11 - VPMSUMD(v11,v19,const1) - ori r2,r2,0 - - vxor v4,v4,v12 - VPMSUMD(v12,v20,const1) - ori r2,r2,0 - - vxor v5,v5,v13 - VPMSUMD(v13,v21,const1) - ori r2,r2,0 - - vxor v6,v6,v14 - VPMSUMD(v14,v22,const1) - ori r2,r2,0 - - vxor v7,v7,v15 - VPMSUMD(v15,v23,const1) - ori r2,r2,0 - -.Lsecond_cool_down: - /* Second cool down pass */ - vxor v0,v0,v8 - vxor v1,v1,v9 - vxor v2,v2,v10 - vxor v3,v3,v11 - vxor v4,v4,v12 - vxor v5,v5,v13 - vxor v6,v6,v14 - vxor v7,v7,v15 - - /* - * vpmsumd produces a 96 bit result in the least significant bits - * of the register. Since we are bit reflected we have to shift it - * left 32 bits so it occupies the least significant bits in the - * bit reflected domain. - */ - vsldoi v0,v0,zeroes,4 - vsldoi v1,v1,zeroes,4 - vsldoi v2,v2,zeroes,4 - vsldoi v3,v3,zeroes,4 - vsldoi v4,v4,zeroes,4 - vsldoi v5,v5,zeroes,4 - vsldoi v6,v6,zeroes,4 - vsldoi v7,v7,zeroes,4 - - /* xor with last 1024 bits */ - lvx v8,0,r4 - lvx v9,off16,r4 - VPERM(v8,v8,v8,byteswap) - VPERM(v9,v9,v9,byteswap) - lvx v10,off32,r4 - lvx v11,off48,r4 - VPERM(v10,v10,v10,byteswap) - VPERM(v11,v11,v11,byteswap) - lvx v12,off64,r4 - lvx v13,off80,r4 - VPERM(v12,v12,v12,byteswap) - VPERM(v13,v13,v13,byteswap) - lvx v14,off96,r4 - lvx v15,off112,r4 - VPERM(v14,v14,v14,byteswap) - VPERM(v15,v15,v15,byteswap) - - addi r4,r4,8*16 - - vxor v16,v0,v8 - vxor v17,v1,v9 - vxor v18,v2,v10 - vxor v19,v3,v11 - vxor v20,v4,v12 - vxor v21,v5,v13 - vxor v22,v6,v14 - vxor v23,v7,v15 - - li r0,1 - cmpdi r6,0 - addi r6,r6,128 - bne 1b - - /* Work out how many bytes we have left */ - andi. r5,r5,127 - - /* Calculate where in the constant table we need to start */ - subfic r6,r5,128 - add r3,r3,r6 - - /* How many 16 byte chunks are in the tail */ - srdi r7,r5,4 - mtctr r7 - - /* - * Reduce the previously calculated 1024 bits to 64 bits, shifting - * 32 bits to include the trailing 32 bits of zeros - */ - lvx v0,0,r3 - lvx v1,off16,r3 - lvx v2,off32,r3 - lvx v3,off48,r3 - lvx v4,off64,r3 - lvx v5,off80,r3 - lvx v6,off96,r3 - lvx v7,off112,r3 - addi r3,r3,8*16 - - VPMSUMW(v0,v16,v0) - VPMSUMW(v1,v17,v1) - VPMSUMW(v2,v18,v2) - VPMSUMW(v3,v19,v3) - VPMSUMW(v4,v20,v4) - VPMSUMW(v5,v21,v5) - VPMSUMW(v6,v22,v6) - VPMSUMW(v7,v23,v7) - - /* Now reduce the tail (0 - 112 bytes) */ - cmpdi r7,0 - beq 1f - - lvx v16,0,r4 - lvx v17,0,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off16,r4 - lvx v17,off16,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off32,r4 - lvx v17,off32,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off48,r4 - lvx v17,off48,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off64,r4 - lvx v17,off64,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off80,r4 - lvx v17,off80,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - bdz 1f - - lvx v16,off96,r4 - lvx v17,off96,r3 - VPERM(v16,v16,v16,byteswap) - VPMSUMW(v16,v16,v17) - vxor v0,v0,v16 - - /* Now xor all the parallel chunks together */ -1: vxor v0,v0,v1 - vxor v2,v2,v3 - vxor v4,v4,v5 - vxor v6,v6,v7 - - vxor v0,v0,v2 - vxor v4,v4,v6 - - vxor v0,v0,v4 - -.Lbarrett_reduction: - /* Barrett constants */ - addis r3,r2,.barrett_constants@toc@ha - addi r3,r3,.barrett_constants@toc@l - - lvx const1,0,r3 - lvx const2,off16,r3 - - vsldoi v1,v0,v0,8 - vxor v0,v0,v1 /* xor two 64 bit results together */ - - /* shift left one bit */ - vspltisb v1,1 - vsl v0,v0,v1 - - vand v0,v0,mask_64bit - - /* - * The reflected version of Barrett reduction. Instead of bit - * reflecting our data (which is expensive to do), we bit reflect our - * constants and our algorithm, which means the intermediate data in - * our vector registers goes from 0-63 instead of 63-0. We can reflect - * the algorithm because we don't carry in mod 2 arithmetic. - */ - vand v1,v0,mask_32bit /* bottom 32 bits of a */ - VPMSUMD(v1,v1,const1) /* ma */ - vand v1,v1,mask_32bit /* bottom 32bits of ma */ - VPMSUMD(v1,v1,const2) /* qn */ - vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */ - - /* - * Since we are bit reflected, the result (ie the low 32 bits) is in - * the high 32 bits. We just need to shift it left 4 bytes - * V0 [ 0 1 X 3 ] - * V0 [ 0 X 2 3 ] - */ - vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ - - /* Get it into r3 */ - MFVRD(R3, v0) - -.Lout: - subi r6,r1,56+10*16 - subi r7,r1,56+2*16 - - lvx v20,0,r6 - lvx v21,off16,r6 - lvx v22,off32,r6 - lvx v23,off48,r6 - lvx v24,off64,r6 - lvx v25,off80,r6 - lvx v26,off96,r6 - lvx v27,off112,r6 - lvx v28,0,r7 - lvx v29,off16,r7 - - ld r31,-8(r1) - ld r30,-16(r1) - ld r29,-24(r1) - ld r28,-32(r1) - ld r27,-40(r1) - ld r26,-48(r1) - ld r25,-56(r1) - - blr - -.Lfirst_warm_up_done: - lvx const1,0,r3 - addi r3,r3,16 - - VPMSUMD(v8,v16,const1) - VPMSUMD(v9,v17,const1) - VPMSUMD(v10,v18,const1) - VPMSUMD(v11,v19,const1) - VPMSUMD(v12,v20,const1) - VPMSUMD(v13,v21,const1) - VPMSUMD(v14,v22,const1) - VPMSUMD(v15,v23,const1) - - b .Lsecond_cool_down - -.Lshort: - cmpdi r5,0 - beq .Lzero - - addis r3,r2,.short_constants@toc@ha - addi r3,r3,.short_constants@toc@l - - /* Calculate where in the constant table we need to start */ - subfic r6,r5,256 - add r3,r3,r6 - - /* How many 16 byte chunks? */ - srdi r7,r5,4 - mtctr r7 - - vxor v19,v19,v19 - vxor v20,v20,v20 - - lvx v0,0,r4 - lvx v16,0,r3 - VPERM(v0,v0,v16,byteswap) - vxor v0,v0,v8 /* xor in initial value */ - VPMSUMW(v0,v0,v16) - bdz .Lv0 - - lvx v1,off16,r4 - lvx v17,off16,r3 - VPERM(v1,v1,v17,byteswap) - VPMSUMW(v1,v1,v17) - bdz .Lv1 - - lvx v2,off32,r4 - lvx v16,off32,r3 - VPERM(v2,v2,v16,byteswap) - VPMSUMW(v2,v2,v16) - bdz .Lv2 - - lvx v3,off48,r4 - lvx v17,off48,r3 - VPERM(v3,v3,v17,byteswap) - VPMSUMW(v3,v3,v17) - bdz .Lv3 - - lvx v4,off64,r4 - lvx v16,off64,r3 - VPERM(v4,v4,v16,byteswap) - VPMSUMW(v4,v4,v16) - bdz .Lv4 - - lvx v5,off80,r4 - lvx v17,off80,r3 - VPERM(v5,v5,v17,byteswap) - VPMSUMW(v5,v5,v17) - bdz .Lv5 - - lvx v6,off96,r4 - lvx v16,off96,r3 - VPERM(v6,v6,v16,byteswap) - VPMSUMW(v6,v6,v16) - bdz .Lv6 - - lvx v7,off112,r4 - lvx v17,off112,r3 - VPERM(v7,v7,v17,byteswap) - VPMSUMW(v7,v7,v17) - bdz .Lv7 - - addi r3,r3,128 - addi r4,r4,128 - - lvx v8,0,r4 - lvx v16,0,r3 - VPERM(v8,v8,v16,byteswap) - VPMSUMW(v8,v8,v16) - bdz .Lv8 - - lvx v9,off16,r4 - lvx v17,off16,r3 - VPERM(v9,v9,v17,byteswap) - VPMSUMW(v9,v9,v17) - bdz .Lv9 - - lvx v10,off32,r4 - lvx v16,off32,r3 - VPERM(v10,v10,v16,byteswap) - VPMSUMW(v10,v10,v16) - bdz .Lv10 - - lvx v11,off48,r4 - lvx v17,off48,r3 - VPERM(v11,v11,v17,byteswap) - VPMSUMW(v11,v11,v17) - bdz .Lv11 - - lvx v12,off64,r4 - lvx v16,off64,r3 - VPERM(v12,v12,v16,byteswap) - VPMSUMW(v12,v12,v16) - bdz .Lv12 - - lvx v13,off80,r4 - lvx v17,off80,r3 - VPERM(v13,v13,v17,byteswap) - VPMSUMW(v13,v13,v17) - bdz .Lv13 - - lvx v14,off96,r4 - lvx v16,off96,r3 - VPERM(v14,v14,v16,byteswap) - VPMSUMW(v14,v14,v16) - bdz .Lv14 - - lvx v15,off112,r4 - lvx v17,off112,r3 - VPERM(v15,v15,v17,byteswap) - VPMSUMW(v15,v15,v17) - -.Lv15: vxor v19,v19,v15 -.Lv14: vxor v20,v20,v14 -.Lv13: vxor v19,v19,v13 -.Lv12: vxor v20,v20,v12 -.Lv11: vxor v19,v19,v11 -.Lv10: vxor v20,v20,v10 -.Lv9: vxor v19,v19,v9 -.Lv8: vxor v20,v20,v8 -.Lv7: vxor v19,v19,v7 -.Lv6: vxor v20,v20,v6 -.Lv5: vxor v19,v19,v5 -.Lv4: vxor v20,v20,v4 -.Lv3: vxor v19,v19,v3 -.Lv2: vxor v20,v20,v2 -.Lv1: vxor v19,v19,v1 -.Lv0: vxor v20,v20,v0 - - vxor v0,v19,v20 - - b .Lbarrett_reduction - -.Lzero: - mr r3,r10 - b .Lout - -FUNC_END(__crc32_vpmsum) +#define CRC_FUNCTION_NAME __crc32c_vpmsum +#define REFLECT +#include "crc32-vpmsum_core.S" diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index 9fa046d56eba..f058e0c3e4d4 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -33,10 +33,13 @@ static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len) } if (len & ~VMX_ALIGN_MASK) { + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + disable_kernel_altivec(); pagefault_enable(); + preempt_enable(); } tail = len & VMX_ALIGN_MASK; @@ -52,7 +55,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm) { u32 *key = crypto_tfm_ctx(tfm); - *key = 0; + *key = ~0; return 0; } diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S new file mode 100644 index 000000000000..5e3d81a0af1b --- /dev/null +++ b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S @@ -0,0 +1,850 @@ +/* + * Calculate a CRC T10DIF with vpmsum acceleration + * + * Constants generated by crc32-vpmsum, available at + * https://github.com/antonblanchard/crc32-vpmsum + * + * crc32-vpmsum is + * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM + * and is available under the GPL v2 or later. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + .section .rodata +.balign 16 + +.byteswap_constant: + /* byte reverse permute constant */ + .octa 0x0F0E0D0C0B0A09080706050403020100 + +.constants: + + /* Reduce 262144 kbits to 1024 bits */ + /* x^261184 mod p(x), x^261120 mod p(x) */ + .octa 0x0000000056d300000000000052550000 + + /* x^260160 mod p(x), x^260096 mod p(x) */ + .octa 0x00000000ee67000000000000a1e40000 + + /* x^259136 mod p(x), x^259072 mod p(x) */ + .octa 0x0000000060830000000000004ad10000 + + /* x^258112 mod p(x), x^258048 mod p(x) */ + .octa 0x000000008cfe0000000000009ab40000 + + /* x^257088 mod p(x), x^257024 mod p(x) */ + .octa 0x000000003e93000000000000fdb50000 + + /* x^256064 mod p(x), x^256000 mod p(x) */ + .octa 0x000000003c2000000000000045480000 + + /* x^255040 mod p(x), x^254976 mod p(x) */ + .octa 0x00000000b1fc0000000000008d690000 + + /* x^254016 mod p(x), x^253952 mod p(x) */ + .octa 0x00000000f82b00000000000024ad0000 + + /* x^252992 mod p(x), x^252928 mod p(x) */ + .octa 0x0000000044420000000000009f1a0000 + + /* x^251968 mod p(x), x^251904 mod p(x) */ + .octa 0x00000000e88c00000000000066ec0000 + + /* x^250944 mod p(x), x^250880 mod p(x) */ + .octa 0x00000000385c000000000000c87d0000 + + /* x^249920 mod p(x), x^249856 mod p(x) */ + .octa 0x000000003227000000000000c8ff0000 + + /* x^248896 mod p(x), x^248832 mod p(x) */ + .octa 0x00000000a9a900000000000033440000 + + /* x^247872 mod p(x), x^247808 mod p(x) */ + .octa 0x00000000abaa00000000000066eb0000 + + /* x^246848 mod p(x), x^246784 mod p(x) */ + .octa 0x000000001ac3000000000000c4ef0000 + + /* x^245824 mod p(x), x^245760 mod p(x) */ + .octa 0x0000000063f000000000000056f30000 + + /* x^244800 mod p(x), x^244736 mod p(x) */ + .octa 0x0000000032cc00000000000002050000 + + /* x^243776 mod p(x), x^243712 mod p(x) */ + .octa 0x00000000f8b5000000000000568e0000 + + /* x^242752 mod p(x), x^242688 mod p(x) */ + .octa 0x000000008db100000000000064290000 + + /* x^241728 mod p(x), x^241664 mod p(x) */ + .octa 0x0000000059ca0000000000006b660000 + + /* x^240704 mod p(x), x^240640 mod p(x) */ + .octa 0x000000005f5c00000000000018f80000 + + /* x^239680 mod p(x), x^239616 mod p(x) */ + .octa 0x0000000061af000000000000b6090000 + + /* x^238656 mod p(x), x^238592 mod p(x) */ + .octa 0x00000000e29e000000000000099a0000 + + /* x^237632 mod p(x), x^237568 mod p(x) */ + .octa 0x000000000975000000000000a8360000 + + /* x^236608 mod p(x), x^236544 mod p(x) */ + .octa 0x0000000043900000000000004f570000 + + /* x^235584 mod p(x), x^235520 mod p(x) */ + .octa 0x00000000f9cd000000000000134c0000 + + /* x^234560 mod p(x), x^234496 mod p(x) */ + .octa 0x000000007c29000000000000ec380000 + + /* x^233536 mod p(x), x^233472 mod p(x) */ + .octa 0x000000004c6a000000000000b0d10000 + + /* x^232512 mod p(x), x^232448 mod p(x) */ + .octa 0x00000000e7290000000000007d3e0000 + + /* x^231488 mod p(x), x^231424 mod p(x) */ + .octa 0x00000000f1ab000000000000f0b20000 + + /* x^230464 mod p(x), x^230400 mod p(x) */ + .octa 0x0000000039db0000000000009c270000 + + /* x^229440 mod p(x), x^229376 mod p(x) */ + .octa 0x000000005e2800000000000092890000 + + /* x^228416 mod p(x), x^228352 mod p(x) */ + .octa 0x00000000d44e000000000000d5ee0000 + + /* x^227392 mod p(x), x^227328 mod p(x) */ + .octa 0x00000000cd0a00000000000041f50000 + + /* x^226368 mod p(x), x^226304 mod p(x) */ + .octa 0x00000000c5b400000000000010520000 + + /* x^225344 mod p(x), x^225280 mod p(x) */ + .octa 0x00000000fd2100000000000042170000 + + /* x^224320 mod p(x), x^224256 mod p(x) */ + .octa 0x000000002f2500000000000095c20000 + + /* x^223296 mod p(x), x^223232 mod p(x) */ + .octa 0x000000001b0100000000000001ce0000 + + /* x^222272 mod p(x), x^222208 mod p(x) */ + .octa 0x000000000d430000000000002aca0000 + + /* x^221248 mod p(x), x^221184 mod p(x) */ + .octa 0x0000000030a6000000000000385e0000 + + /* x^220224 mod p(x), x^220160 mod p(x) */ + .octa 0x00000000e37b0000000000006f7a0000 + + /* x^219200 mod p(x), x^219136 mod p(x) */ + .octa 0x00000000873600000000000024320000 + + /* x^218176 mod p(x), x^218112 mod p(x) */ + .octa 0x00000000e9fb000000000000bd9c0000 + + /* x^217152 mod p(x), x^217088 mod p(x) */ + .octa 0x000000003b9500000000000054bc0000 + + /* x^216128 mod p(x), x^216064 mod p(x) */ + .octa 0x00000000133e000000000000a4660000 + + /* x^215104 mod p(x), x^215040 mod p(x) */ + .octa 0x00000000784500000000000079930000 + + /* x^214080 mod p(x), x^214016 mod p(x) */ + .octa 0x00000000b9800000000000001bb80000 + + /* x^213056 mod p(x), x^212992 mod p(x) */ + .octa 0x00000000687600000000000024400000 + + /* x^212032 mod p(x), x^211968 mod p(x) */ + .octa 0x00000000aff300000000000029e10000 + + /* x^211008 mod p(x), x^210944 mod p(x) */ + .octa 0x0000000024b50000000000005ded0000 + + /* x^209984 mod p(x), x^209920 mod p(x) */ + .octa 0x0000000017e8000000000000b12e0000 + + /* x^208960 mod p(x), x^208896 mod p(x) */ + .octa 0x00000000128400000000000026d20000 + + /* x^207936 mod p(x), x^207872 mod p(x) */ + .octa 0x000000002115000000000000a32a0000 + + /* x^206912 mod p(x), x^206848 mod p(x) */ + .octa 0x000000009595000000000000a1210000 + + /* x^205888 mod p(x), x^205824 mod p(x) */ + .octa 0x00000000281e000000000000ee8b0000 + + /* x^204864 mod p(x), x^204800 mod p(x) */ + .octa 0x0000000006010000000000003d0d0000 + + /* x^203840 mod p(x), x^203776 mod p(x) */ + .octa 0x00000000e2b600000000000034e90000 + + /* x^202816 mod p(x), x^202752 mod p(x) */ + .octa 0x000000001bd40000000000004cdb0000 + + /* x^201792 mod p(x), x^201728 mod p(x) */ + .octa 0x00000000df2800000000000030e90000 + + /* x^200768 mod p(x), x^200704 mod p(x) */ + .octa 0x0000000049c200000000000042590000 + + /* x^199744 mod p(x), x^199680 mod p(x) */ + .octa 0x000000009b97000000000000df950000 + + /* x^198720 mod p(x), x^198656 mod p(x) */ + .octa 0x000000006184000000000000da7b0000 + + /* x^197696 mod p(x), x^197632 mod p(x) */ + .octa 0x00000000461700000000000012510000 + + /* x^196672 mod p(x), x^196608 mod p(x) */ + .octa 0x000000009b40000000000000f37e0000 + + /* x^195648 mod p(x), x^195584 mod p(x) */ + .octa 0x00000000eeb2000000000000ecf10000 + + /* x^194624 mod p(x), x^194560 mod p(x) */ + .octa 0x00000000b2e800000000000050f20000 + + /* x^193600 mod p(x), x^193536 mod p(x) */ + .octa 0x00000000f59a000000000000e0b30000 + + /* x^192576 mod p(x), x^192512 mod p(x) */ + .octa 0x00000000467f0000000000004d5a0000 + + /* x^191552 mod p(x), x^191488 mod p(x) */ + .octa 0x00000000da92000000000000bb010000 + + /* x^190528 mod p(x), x^190464 mod p(x) */ + .octa 0x000000001e1000000000000022a40000 + + /* x^189504 mod p(x), x^189440 mod p(x) */ + .octa 0x0000000058fe000000000000836f0000 + + /* x^188480 mod p(x), x^188416 mod p(x) */ + .octa 0x00000000b9ce000000000000d78d0000 + + /* x^187456 mod p(x), x^187392 mod p(x) */ + .octa 0x0000000022210000000000004f8d0000 + + /* x^186432 mod p(x), x^186368 mod p(x) */ + .octa 0x00000000744600000000000033760000 + + /* x^185408 mod p(x), x^185344 mod p(x) */ + .octa 0x000000001c2e000000000000a1e50000 + + /* x^184384 mod p(x), x^184320 mod p(x) */ + .octa 0x00000000dcc8000000000000a1a40000 + + /* x^183360 mod p(x), x^183296 mod p(x) */ + .octa 0x00000000910f00000000000019a20000 + + /* x^182336 mod p(x), x^182272 mod p(x) */ + .octa 0x0000000055d5000000000000f6ae0000 + + /* x^181312 mod p(x), x^181248 mod p(x) */ + .octa 0x00000000c8ba000000000000a7ac0000 + + /* x^180288 mod p(x), x^180224 mod p(x) */ + .octa 0x0000000031f8000000000000eea20000 + + /* x^179264 mod p(x), x^179200 mod p(x) */ + .octa 0x000000001966000000000000c4d90000 + + /* x^178240 mod p(x), x^178176 mod p(x) */ + .octa 0x00000000b9810000000000002b470000 + + /* x^177216 mod p(x), x^177152 mod p(x) */ + .octa 0x000000008303000000000000f7cf0000 + + /* x^176192 mod p(x), x^176128 mod p(x) */ + .octa 0x000000002ce500000000000035b30000 + + /* x^175168 mod p(x), x^175104 mod p(x) */ + .octa 0x000000002fae0000000000000c7c0000 + + /* x^174144 mod p(x), x^174080 mod p(x) */ + .octa 0x00000000f50c0000000000009edf0000 + + /* x^173120 mod p(x), x^173056 mod p(x) */ + .octa 0x00000000714f00000000000004cd0000 + + /* x^172096 mod p(x), x^172032 mod p(x) */ + .octa 0x00000000c161000000000000541b0000 + + /* x^171072 mod p(x), x^171008 mod p(x) */ + .octa 0x0000000021c8000000000000e2700000 + + /* x^170048 mod p(x), x^169984 mod p(x) */ + .octa 0x00000000b93d00000000000009a60000 + + /* x^169024 mod p(x), x^168960 mod p(x) */ + .octa 0x00000000fbcf000000000000761c0000 + + /* x^168000 mod p(x), x^167936 mod p(x) */ + .octa 0x0000000026350000000000009db30000 + + /* x^166976 mod p(x), x^166912 mod p(x) */ + .octa 0x00000000b64f0000000000003e9f0000 + + /* x^165952 mod p(x), x^165888 mod p(x) */ + .octa 0x00000000bd0e00000000000078590000 + + /* x^164928 mod p(x), x^164864 mod p(x) */ + .octa 0x00000000d9360000000000008bc80000 + + /* x^163904 mod p(x), x^163840 mod p(x) */ + .octa 0x000000002f140000000000008c9f0000 + + /* x^162880 mod p(x), x^162816 mod p(x) */ + .octa 0x000000006a270000000000006af70000 + + /* x^161856 mod p(x), x^161792 mod p(x) */ + .octa 0x000000006685000000000000e5210000 + + /* x^160832 mod p(x), x^160768 mod p(x) */ + .octa 0x0000000062da00000000000008290000 + + /* x^159808 mod p(x), x^159744 mod p(x) */ + .octa 0x00000000bb4b000000000000e4d00000 + + /* x^158784 mod p(x), x^158720 mod p(x) */ + .octa 0x00000000d2490000000000004ae10000 + + /* x^157760 mod p(x), x^157696 mod p(x) */ + .octa 0x00000000c85b00000000000000e70000 + + /* x^156736 mod p(x), x^156672 mod p(x) */ + .octa 0x00000000c37a00000000000015650000 + + /* x^155712 mod p(x), x^155648 mod p(x) */ + .octa 0x0000000018530000000000001c2f0000 + + /* x^154688 mod p(x), x^154624 mod p(x) */ + .octa 0x00000000b46600000000000037bd0000 + + /* x^153664 mod p(x), x^153600 mod p(x) */ + .octa 0x00000000439b00000000000012190000 + + /* x^152640 mod p(x), x^152576 mod p(x) */ + .octa 0x00000000b1260000000000005ece0000 + + /* x^151616 mod p(x), x^151552 mod p(x) */ + .octa 0x00000000d8110000000000002a5e0000 + + /* x^150592 mod p(x), x^150528 mod p(x) */ + .octa 0x00000000099f00000000000052330000 + + /* x^149568 mod p(x), x^149504 mod p(x) */ + .octa 0x00000000f9f9000000000000f9120000 + + /* x^148544 mod p(x), x^148480 mod p(x) */ + .octa 0x000000005cc00000000000000ddc0000 + + /* x^147520 mod p(x), x^147456 mod p(x) */ + .octa 0x00000000343b00000000000012200000 + + /* x^146496 mod p(x), x^146432 mod p(x) */ + .octa 0x000000009222000000000000d12b0000 + + /* x^145472 mod p(x), x^145408 mod p(x) */ + .octa 0x00000000d781000000000000eb2d0000 + + /* x^144448 mod p(x), x^144384 mod p(x) */ + .octa 0x000000000bf400000000000058970000 + + /* x^143424 mod p(x), x^143360 mod p(x) */ + .octa 0x00000000094200000000000013690000 + + /* x^142400 mod p(x), x^142336 mod p(x) */ + .octa 0x00000000d55100000000000051950000 + + /* x^141376 mod p(x), x^141312 mod p(x) */ + .octa 0x000000008f11000000000000954b0000 + + /* x^140352 mod p(x), x^140288 mod p(x) */ + .octa 0x00000000140f000000000000b29e0000 + + /* x^139328 mod p(x), x^139264 mod p(x) */ + .octa 0x00000000c6db000000000000db5d0000 + + /* x^138304 mod p(x), x^138240 mod p(x) */ + .octa 0x00000000715b000000000000dfaf0000 + + /* x^137280 mod p(x), x^137216 mod p(x) */ + .octa 0x000000000dea000000000000e3b60000 + + /* x^136256 mod p(x), x^136192 mod p(x) */ + .octa 0x000000006f94000000000000ddaf0000 + + /* x^135232 mod p(x), x^135168 mod p(x) */ + .octa 0x0000000024e1000000000000e4f70000 + + /* x^134208 mod p(x), x^134144 mod p(x) */ + .octa 0x000000008810000000000000aa110000 + + /* x^133184 mod p(x), x^133120 mod p(x) */ + .octa 0x0000000030c2000000000000a8e60000 + + /* x^132160 mod p(x), x^132096 mod p(x) */ + .octa 0x00000000e6d0000000000000ccf30000 + + /* x^131136 mod p(x), x^131072 mod p(x) */ + .octa 0x000000004da000000000000079bf0000 + + /* x^130112 mod p(x), x^130048 mod p(x) */ + .octa 0x000000007759000000000000b3a30000 + + /* x^129088 mod p(x), x^129024 mod p(x) */ + .octa 0x00000000597400000000000028790000 + + /* x^128064 mod p(x), x^128000 mod p(x) */ + .octa 0x000000007acd000000000000b5820000 + + /* x^127040 mod p(x), x^126976 mod p(x) */ + .octa 0x00000000e6e400000000000026ad0000 + + /* x^126016 mod p(x), x^125952 mod p(x) */ + .octa 0x000000006d49000000000000985b0000 + + /* x^124992 mod p(x), x^124928 mod p(x) */ + .octa 0x000000000f0800000000000011520000 + + /* x^123968 mod p(x), x^123904 mod p(x) */ + .octa 0x000000002c7f000000000000846c0000 + + /* x^122944 mod p(x), x^122880 mod p(x) */ + .octa 0x000000005ce7000000000000ae1d0000 + + /* x^121920 mod p(x), x^121856 mod p(x) */ + .octa 0x00000000d4cb000000000000e21d0000 + + /* x^120896 mod p(x), x^120832 mod p(x) */ + .octa 0x000000003a2300000000000019bb0000 + + /* x^119872 mod p(x), x^119808 mod p(x) */ + .octa 0x000000000e1700000000000095290000 + + /* x^118848 mod p(x), x^118784 mod p(x) */ + .octa 0x000000006e6400000000000050d20000 + + /* x^117824 mod p(x), x^117760 mod p(x) */ + .octa 0x000000008d5c0000000000000cd10000 + + /* x^116800 mod p(x), x^116736 mod p(x) */ + .octa 0x00000000ef310000000000007b570000 + + /* x^115776 mod p(x), x^115712 mod p(x) */ + .octa 0x00000000645d00000000000053d60000 + + /* x^114752 mod p(x), x^114688 mod p(x) */ + .octa 0x0000000018fc00000000000077510000 + + /* x^113728 mod p(x), x^113664 mod p(x) */ + .octa 0x000000000cb3000000000000a7b70000 + + /* x^112704 mod p(x), x^112640 mod p(x) */ + .octa 0x00000000991b000000000000d0780000 + + /* x^111680 mod p(x), x^111616 mod p(x) */ + .octa 0x00000000845a000000000000be3c0000 + + /* x^110656 mod p(x), x^110592 mod p(x) */ + .octa 0x00000000d3a9000000000000df020000 + + /* x^109632 mod p(x), x^109568 mod p(x) */ + .octa 0x0000000017d7000000000000063e0000 + + /* x^108608 mod p(x), x^108544 mod p(x) */ + .octa 0x000000007a860000000000008ab40000 + + /* x^107584 mod p(x), x^107520 mod p(x) */ + .octa 0x00000000fd7c000000000000c7bd0000 + + /* x^106560 mod p(x), x^106496 mod p(x) */ + .octa 0x00000000a56b000000000000efd60000 + + /* x^105536 mod p(x), x^105472 mod p(x) */ + .octa 0x0000000010e400000000000071380000 + + /* x^104512 mod p(x), x^104448 mod p(x) */ + .octa 0x00000000994500000000000004d30000 + + /* x^103488 mod p(x), x^103424 mod p(x) */ + .octa 0x00000000b83c0000000000003b0e0000 + + /* x^102464 mod p(x), x^102400 mod p(x) */ + .octa 0x00000000d6c10000000000008b020000 + + /* x^101440 mod p(x), x^101376 mod p(x) */ + .octa 0x000000009efc000000000000da940000 + + /* x^100416 mod p(x), x^100352 mod p(x) */ + .octa 0x000000005e87000000000000f9f70000 + + /* x^99392 mod p(x), x^99328 mod p(x) */ + .octa 0x000000006c9b00000000000045e40000 + + /* x^98368 mod p(x), x^98304 mod p(x) */ + .octa 0x00000000178a00000000000083940000 + + /* x^97344 mod p(x), x^97280 mod p(x) */ + .octa 0x00000000f0c8000000000000f0a00000 + + /* x^96320 mod p(x), x^96256 mod p(x) */ + .octa 0x00000000f699000000000000b74b0000 + + /* x^95296 mod p(x), x^95232 mod p(x) */ + .octa 0x00000000316d000000000000c1cf0000 + + /* x^94272 mod p(x), x^94208 mod p(x) */ + .octa 0x00000000987e00000000000072680000 + + /* x^93248 mod p(x), x^93184 mod p(x) */ + .octa 0x00000000acff000000000000e0ab0000 + + /* x^92224 mod p(x), x^92160 mod p(x) */ + .octa 0x00000000a1f6000000000000c5a80000 + + /* x^91200 mod p(x), x^91136 mod p(x) */ + .octa 0x0000000061bd000000000000cf690000 + + /* x^90176 mod p(x), x^90112 mod p(x) */ + .octa 0x00000000c9f2000000000000cbcc0000 + + /* x^89152 mod p(x), x^89088 mod p(x) */ + .octa 0x000000005a33000000000000de050000 + + /* x^88128 mod p(x), x^88064 mod p(x) */ + .octa 0x00000000e416000000000000ccd70000 + + /* x^87104 mod p(x), x^87040 mod p(x) */ + .octa 0x0000000058930000000000002f670000 + + /* x^86080 mod p(x), x^86016 mod p(x) */ + .octa 0x00000000a9d3000000000000152f0000 + + /* x^85056 mod p(x), x^84992 mod p(x) */ + .octa 0x00000000c114000000000000ecc20000 + + /* x^84032 mod p(x), x^83968 mod p(x) */ + .octa 0x00000000b9270000000000007c890000 + + /* x^83008 mod p(x), x^82944 mod p(x) */ + .octa 0x000000002e6000000000000006ee0000 + + /* x^81984 mod p(x), x^81920 mod p(x) */ + .octa 0x00000000dfc600000000000009100000 + + /* x^80960 mod p(x), x^80896 mod p(x) */ + .octa 0x000000004911000000000000ad4e0000 + + /* x^79936 mod p(x), x^79872 mod p(x) */ + .octa 0x00000000ae1b000000000000b04d0000 + + /* x^78912 mod p(x), x^78848 mod p(x) */ + .octa 0x0000000005fa000000000000e9900000 + + /* x^77888 mod p(x), x^77824 mod p(x) */ + .octa 0x0000000004a1000000000000cc6f0000 + + /* x^76864 mod p(x), x^76800 mod p(x) */ + .octa 0x00000000af73000000000000ed110000 + + /* x^75840 mod p(x), x^75776 mod p(x) */ + .octa 0x0000000082530000000000008f7e0000 + + /* x^74816 mod p(x), x^74752 mod p(x) */ + .octa 0x00000000cfdc000000000000594f0000 + + /* x^73792 mod p(x), x^73728 mod p(x) */ + .octa 0x00000000a6b6000000000000a8750000 + + /* x^72768 mod p(x), x^72704 mod p(x) */ + .octa 0x00000000fd76000000000000aa0c0000 + + /* x^71744 mod p(x), x^71680 mod p(x) */ + .octa 0x0000000006f500000000000071db0000 + + /* x^70720 mod p(x), x^70656 mod p(x) */ + .octa 0x0000000037ca000000000000ab0c0000 + + /* x^69696 mod p(x), x^69632 mod p(x) */ + .octa 0x00000000d7ab000000000000b7a00000 + + /* x^68672 mod p(x), x^68608 mod p(x) */ + .octa 0x00000000440800000000000090d30000 + + /* x^67648 mod p(x), x^67584 mod p(x) */ + .octa 0x00000000186100000000000054730000 + + /* x^66624 mod p(x), x^66560 mod p(x) */ + .octa 0x000000007368000000000000a3a20000 + + /* x^65600 mod p(x), x^65536 mod p(x) */ + .octa 0x0000000026d0000000000000f9040000 + + /* x^64576 mod p(x), x^64512 mod p(x) */ + .octa 0x00000000fe770000000000009c0a0000 + + /* x^63552 mod p(x), x^63488 mod p(x) */ + .octa 0x000000002cba000000000000d1e70000 + + /* x^62528 mod p(x), x^62464 mod p(x) */ + .octa 0x00000000f8bd0000000000005ac10000 + + /* x^61504 mod p(x), x^61440 mod p(x) */ + .octa 0x000000007372000000000000d68d0000 + + /* x^60480 mod p(x), x^60416 mod p(x) */ + .octa 0x00000000f37f00000000000089f60000 + + /* x^59456 mod p(x), x^59392 mod p(x) */ + .octa 0x00000000078400000000000008a90000 + + /* x^58432 mod p(x), x^58368 mod p(x) */ + .octa 0x00000000d3e400000000000042360000 + + /* x^57408 mod p(x), x^57344 mod p(x) */ + .octa 0x00000000eba800000000000092d50000 + + /* x^56384 mod p(x), x^56320 mod p(x) */ + .octa 0x00000000afbe000000000000b4d50000 + + /* x^55360 mod p(x), x^55296 mod p(x) */ + .octa 0x00000000d8ca000000000000c9060000 + + /* x^54336 mod p(x), x^54272 mod p(x) */ + .octa 0x00000000c2d00000000000008f4f0000 + + /* x^53312 mod p(x), x^53248 mod p(x) */ + .octa 0x00000000373200000000000028690000 + + /* x^52288 mod p(x), x^52224 mod p(x) */ + .octa 0x0000000046ae000000000000c3b30000 + + /* x^51264 mod p(x), x^51200 mod p(x) */ + .octa 0x00000000b243000000000000f8700000 + + /* x^50240 mod p(x), x^50176 mod p(x) */ + .octa 0x00000000f7f500000000000029eb0000 + + /* x^49216 mod p(x), x^49152 mod p(x) */ + .octa 0x000000000c7e000000000000fe730000 + + /* x^48192 mod p(x), x^48128 mod p(x) */ + .octa 0x00000000c38200000000000096000000 + + /* x^47168 mod p(x), x^47104 mod p(x) */ + .octa 0x000000008956000000000000683c0000 + + /* x^46144 mod p(x), x^46080 mod p(x) */ + .octa 0x00000000422d0000000000005f1e0000 + + /* x^45120 mod p(x), x^45056 mod p(x) */ + .octa 0x00000000ac0f0000000000006f810000 + + /* x^44096 mod p(x), x^44032 mod p(x) */ + .octa 0x00000000ce30000000000000031f0000 + + /* x^43072 mod p(x), x^43008 mod p(x) */ + .octa 0x000000003d43000000000000455a0000 + + /* x^42048 mod p(x), x^41984 mod p(x) */ + .octa 0x000000007ebe000000000000a6050000 + + /* x^41024 mod p(x), x^40960 mod p(x) */ + .octa 0x00000000976e00000000000077eb0000 + + /* x^40000 mod p(x), x^39936 mod p(x) */ + .octa 0x000000000872000000000000389c0000 + + /* x^38976 mod p(x), x^38912 mod p(x) */ + .octa 0x000000008979000000000000c7b20000 + + /* x^37952 mod p(x), x^37888 mod p(x) */ + .octa 0x000000005c1e0000000000001d870000 + + /* x^36928 mod p(x), x^36864 mod p(x) */ + .octa 0x00000000aebb00000000000045810000 + + /* x^35904 mod p(x), x^35840 mod p(x) */ + .octa 0x000000004f7e0000000000006d4a0000 + + /* x^34880 mod p(x), x^34816 mod p(x) */ + .octa 0x00000000ea98000000000000b9200000 + + /* x^33856 mod p(x), x^33792 mod p(x) */ + .octa 0x00000000f39600000000000022f20000 + + /* x^32832 mod p(x), x^32768 mod p(x) */ + .octa 0x000000000bc500000000000041ca0000 + + /* x^31808 mod p(x), x^31744 mod p(x) */ + .octa 0x00000000786400000000000078500000 + + /* x^30784 mod p(x), x^30720 mod p(x) */ + .octa 0x00000000be970000000000009e7e0000 + + /* x^29760 mod p(x), x^29696 mod p(x) */ + .octa 0x00000000dd6d000000000000a53c0000 + + /* x^28736 mod p(x), x^28672 mod p(x) */ + .octa 0x000000004c3f00000000000039340000 + + /* x^27712 mod p(x), x^27648 mod p(x) */ + .octa 0x0000000093a4000000000000b58e0000 + + /* x^26688 mod p(x), x^26624 mod p(x) */ + .octa 0x0000000050fb00000000000062d40000 + + /* x^25664 mod p(x), x^25600 mod p(x) */ + .octa 0x00000000f505000000000000a26f0000 + + /* x^24640 mod p(x), x^24576 mod p(x) */ + .octa 0x0000000064f900000000000065e60000 + + /* x^23616 mod p(x), x^23552 mod p(x) */ + .octa 0x00000000e8c2000000000000aad90000 + + /* x^22592 mod p(x), x^22528 mod p(x) */ + .octa 0x00000000720b000000000000a3b00000 + + /* x^21568 mod p(x), x^21504 mod p(x) */ + .octa 0x00000000e992000000000000d2680000 + + /* x^20544 mod p(x), x^20480 mod p(x) */ + .octa 0x000000009132000000000000cf4c0000 + + /* x^19520 mod p(x), x^19456 mod p(x) */ + .octa 0x00000000608a00000000000076610000 + + /* x^18496 mod p(x), x^18432 mod p(x) */ + .octa 0x000000009948000000000000fb9f0000 + + /* x^17472 mod p(x), x^17408 mod p(x) */ + .octa 0x00000000173000000000000003770000 + + /* x^16448 mod p(x), x^16384 mod p(x) */ + .octa 0x000000006fe300000000000004880000 + + /* x^15424 mod p(x), x^15360 mod p(x) */ + .octa 0x00000000e15300000000000056a70000 + + /* x^14400 mod p(x), x^14336 mod p(x) */ + .octa 0x0000000092d60000000000009dfd0000 + + /* x^13376 mod p(x), x^13312 mod p(x) */ + .octa 0x0000000002fd00000000000074c80000 + + /* x^12352 mod p(x), x^12288 mod p(x) */ + .octa 0x00000000c78b000000000000a3ec0000 + + /* x^11328 mod p(x), x^11264 mod p(x) */ + .octa 0x000000009262000000000000b3530000 + + /* x^10304 mod p(x), x^10240 mod p(x) */ + .octa 0x0000000084f200000000000047bf0000 + + /* x^9280 mod p(x), x^9216 mod p(x) */ + .octa 0x0000000067ee000000000000e97c0000 + + /* x^8256 mod p(x), x^8192 mod p(x) */ + .octa 0x00000000535b00000000000091e10000 + + /* x^7232 mod p(x), x^7168 mod p(x) */ + .octa 0x000000007ebb00000000000055060000 + + /* x^6208 mod p(x), x^6144 mod p(x) */ + .octa 0x00000000c6a1000000000000fd360000 + + /* x^5184 mod p(x), x^5120 mod p(x) */ + .octa 0x000000001be500000000000055860000 + + /* x^4160 mod p(x), x^4096 mod p(x) */ + .octa 0x00000000ae0e0000000000005bd00000 + + /* x^3136 mod p(x), x^3072 mod p(x) */ + .octa 0x0000000022040000000000008db20000 + + /* x^2112 mod p(x), x^2048 mod p(x) */ + .octa 0x00000000c9eb000000000000efe20000 + + /* x^1088 mod p(x), x^1024 mod p(x) */ + .octa 0x0000000039b400000000000051d10000 + +.short_constants: + + /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + /* x^2048 mod p(x), x^2016 mod p(x), x^1984 mod p(x), x^1952 mod p(x) */ + .octa 0xefe20000dccf00009440000033590000 + + /* x^1920 mod p(x), x^1888 mod p(x), x^1856 mod p(x), x^1824 mod p(x) */ + .octa 0xee6300002f3f000062180000e0ed0000 + + /* x^1792 mod p(x), x^1760 mod p(x), x^1728 mod p(x), x^1696 mod p(x) */ + .octa 0xcf5f000017ef0000ccbe000023d30000 + + /* x^1664 mod p(x), x^1632 mod p(x), x^1600 mod p(x), x^1568 mod p(x) */ + .octa 0x6d0c0000a30e00000920000042630000 + + /* x^1536 mod p(x), x^1504 mod p(x), x^1472 mod p(x), x^1440 mod p(x) */ + .octa 0x21d30000932b0000a7a00000efcc0000 + + /* x^1408 mod p(x), x^1376 mod p(x), x^1344 mod p(x), x^1312 mod p(x) */ + .octa 0x10be00000b310000666f00000d1c0000 + + /* x^1280 mod p(x), x^1248 mod p(x), x^1216 mod p(x), x^1184 mod p(x) */ + .octa 0x1f240000ce9e0000caad0000589e0000 + + /* x^1152 mod p(x), x^1120 mod p(x), x^1088 mod p(x), x^1056 mod p(x) */ + .octa 0x29610000d02b000039b400007cf50000 + + /* x^1024 mod p(x), x^992 mod p(x), x^960 mod p(x), x^928 mod p(x) */ + .octa 0x51d100009d9d00003c0e0000bfd60000 + + /* x^896 mod p(x), x^864 mod p(x), x^832 mod p(x), x^800 mod p(x) */ + .octa 0xda390000ceae000013830000713c0000 + + /* x^768 mod p(x), x^736 mod p(x), x^704 mod p(x), x^672 mod p(x) */ + .octa 0xb67800001e16000085c0000080a60000 + + /* x^640 mod p(x), x^608 mod p(x), x^576 mod p(x), x^544 mod p(x) */ + .octa 0x0db40000f7f90000371d0000e6580000 + + /* x^512 mod p(x), x^480 mod p(x), x^448 mod p(x), x^416 mod p(x) */ + .octa 0x87e70000044c0000aadb0000a4970000 + + /* x^384 mod p(x), x^352 mod p(x), x^320 mod p(x), x^288 mod p(x) */ + .octa 0x1f990000ad180000d8b30000e7b50000 + + /* x^256 mod p(x), x^224 mod p(x), x^192 mod p(x), x^160 mod p(x) */ + .octa 0xbe6c00006ee300004c1a000006df0000 + + /* x^128 mod p(x), x^96 mod p(x), x^64 mod p(x), x^32 mod p(x) */ + .octa 0xfb0b00002d560000136800008bb70000 + + +.barrett_constants: + /* Barrett constant m - (4^32)/n */ + .octa 0x000000000000000000000001f65a57f8 /* x^64 div p(x) */ + /* Barrett constant n */ + .octa 0x0000000000000000000000018bb70000 + +#define CRC_FUNCTION_NAME __crct10dif_vpmsum +#include "crc32-vpmsum_core.S" diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c new file mode 100644 index 000000000000..02ea277863d1 --- /dev/null +++ b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c @@ -0,0 +1,128 @@ +/* + * Calculate a CRC T10-DIF with vpmsum acceleration + * + * Copyright 2017, Daniel Axtens, IBM Corporation. + * [based on crc32c-vpmsum_glue.c] + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + */ + +#include <linux/crc-t10dif.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/cpufeature.h> +#include <asm/switch_to.h> + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#define VECTOR_BREAKPOINT 64 + +u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len); + +static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len) +{ + unsigned int prealign; + unsigned int tail; + u32 crc = crci; + + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt()) + return crc_t10dif_generic(crc, p, len); + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = crc_t10dif_generic(crc, p, prealign); + len -= prealign; + p += prealign; + } + + if (len & ~VMX_ALIGN_MASK) { + crc <<= 16; + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + disable_kernel_altivec(); + pagefault_enable(); + preempt_enable(); + crc >>= 16; + } + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = crc_t10dif_generic(crc, p, tail); + } + + return crc & 0xffff; +} + +static int crct10dif_vpmsum_init(struct shash_desc *desc) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = 0; + return 0; +} + +static int crct10dif_vpmsum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = crct10dif_vpmsum(*crc, data, length); + + return 0; +} + + +static int crct10dif_vpmsum_final(struct shash_desc *desc, u8 *out) +{ + u16 *crcp = shash_desc_ctx(desc); + + *(u16 *)out = *crcp; + return 0; +} + +static struct shash_alg alg = { + .init = crct10dif_vpmsum_init, + .update = crct10dif_vpmsum_update, + .final = crct10dif_vpmsum_final, + .descsize = CRC_T10DIF_DIGEST_SIZE, + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-vpmsum", + .cra_priority = 200, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init crct10dif_vpmsum_mod_init(void) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return crypto_register_shash(&alg); +} + +static void __exit crct10dif_vpmsum_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crct10dif_vpmsum_mod_init); +module_exit(crct10dif_vpmsum_mod_fini); + +MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>"); +MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif-vpmsum"); diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 73eb794d6163..bc5fdfd22788 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -51,6 +51,10 @@ #define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit)) #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) +/* Put a PPC bit into a "normal" bit position */ +#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \ + ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit)) + #include <asm/barrier.h> /* Macro for generating the ***_bits() functions */ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 012223638815..26ed228d4dc6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/book3s/32/hash.h> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1eeeb72c7015..8f4d41936e5a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1,9 +1,12 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +#include <asm-generic/5level-fixup.h> + #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> #endif + /* * Common bits between hash and Radix page table */ @@ -347,23 +350,58 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, __r; \ }) +static inline int __pte_write(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); +} + +#ifdef CONFIG_NUMA_BALANCING +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + /* + * Saved write ptes are prot none ptes that doesn't have + * privileged bit sit. We mark prot none as one which has + * present and pviliged bit set and RWX cleared. To mark + * protnone which used to have _PAGE_WRITE set we clear + * the privileged bit. + */ + return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); +} +#else +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + return false; +} +#endif + +static inline int pte_write(pte_t pte) +{ + return __pte_write(pte) || pte_savedwrite(pte); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + if (__pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); } static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + /* + * We should not find protnone for hugetlb, but this complete the + * interface. + */ + if (__pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1); } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR @@ -397,11 +435,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_update(mm, addr, ptep, ~0UL, 0, 0); } -static inline int pte_write(pte_t pte) -{ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); -} - static inline int pte_dirty(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY)); @@ -465,19 +498,12 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) VM_BUG_ON(!pte_protnone(pte)); return __pte(pte_val(pte) | _PAGE_PRIVILEGED); } - -#define pte_savedwrite pte_savedwrite -static inline bool pte_savedwrite(pte_t pte) +#else +#define pte_clear_savedwrite pte_clear_savedwrite +static inline pte_t pte_clear_savedwrite(pte_t pte) { - /* - * Saved write ptes are prot none ptes that doesn't have - * privileged bit sit. We mark prot none as one which has - * present and pviliged bit set and RWX cleared. To mark - * protnone which used to have _PAGE_WRITE set we clear - * the privileged bit. - */ - VM_BUG_ON(!pte_protnone(pte)); - return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); + VM_WARN_ON(1); + return __pte(pte_val(pte) & ~_PAGE_WRITE); } #endif /* CONFIG_NUMA_BALANCING */ @@ -506,6 +532,8 @@ static inline unsigned long pte_pfn(pte_t pte) /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { + if (unlikely(pte_savedwrite(pte))) + return pte_clear_savedwrite(pte); return __pte(pte_val(pte) & ~_PAGE_WRITE); } @@ -926,6 +954,7 @@ static inline int pmd_protnone(pmd_t pmd) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -982,11 +1011,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - - if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + if (__pmd_write((*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + else if (unlikely(pmd_savedwrite(*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); } static inline int pmd_trans_huge(pmd_t pmd) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 3a39283333c3..f2c562a0a427 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -85,12 +85,12 @@ } \ } while (0) -#define __WARN_TAINT(taint) do { \ +#define __WARN_FLAGS(flags) do { \ __asm__ __volatile__( \ "1: twi 31,0,0\n" \ _EMIT_BUG_ENTRY \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (BUGFLAG_TAINT(taint)), \ + "i" (BUGFLAG_WARNING|(flags)), \ "i" (sizeof(struct bug_entry))); \ } while (0) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 4e63787dc3be..842124b199b5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index fd321eb423cb..155731557c9b 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmpd cr0,r0,r0; \ - bne 1b; \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ IDLE_INST; \ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 93b9b84568e8..09bde6e34f5d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define ARCH_DLINFO_CACHE_GEOMETRY \ NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ - NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ - NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d)); \ NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 14752eee3d0c..ed3beadd2cc5 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -236,9 +236,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtctr reg; \ bctr -#define BRANCH_LINK_TO_FAR(reg, label) \ - __LOAD_FAR_HANDLER(reg, label); \ - mtctr reg; \ +#define BRANCH_LINK_TO_FAR(label) \ + __LOAD_FAR_HANDLER(r12, label); \ + mtctr r12; \ bctrl /* @@ -265,7 +265,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define BRANCH_TO_COMMON(reg, label) \ b label -#define BRANCH_LINK_TO_FAR(reg, label) \ +#define BRANCH_LINK_TO_FAR(label) \ bl label #define BRANCH_TO_KVM(reg, label) \ diff --git a/arch/powerpc/include/asm/extable.h b/arch/powerpc/include/asm/extable.h new file mode 100644 index 000000000000..07cc45cd86d9 --- /dev/null +++ b/arch/powerpc/include/asm/extable.h @@ -0,0 +1,29 @@ +#ifndef _ARCH_POWERPC_EXTABLE_H +#define _ARCH_POWERPC_EXTABLE_H + +/* + * The exception table consists of pairs of relative addresses: the first is + * the address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out what + * to do. + * + * All the routines below use bits of fixup code that are out of line with the + * main instruction path. This means when everything is well, we don't even + * have to jump over them. Further, they do not intrude on our cache or tlb + * entries. + */ + +#define ARCH_HAS_RELATIVE_EXTABLE + +struct exception_table_entry { + int insn; + int fixup; +}; + +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + +#endif diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index f97d8cb6bdf6..ed62efe01e49 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -66,6 +66,55 @@ #define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \ P8_DSISR_MC_ERAT_MULTIHIT_SEC) + +/* + * Machine Check bits on power9 + */ +#define P9_SRR1_MC_LOADSTORE(srr1) (((srr1) >> PPC_BITLSHIFT(42)) & 1) + +#define P9_SRR1_MC_IFETCH(srr1) ( \ + PPC_BITEXTRACT(srr1, 45, 0) | \ + PPC_BITEXTRACT(srr1, 44, 1) | \ + PPC_BITEXTRACT(srr1, 43, 2) | \ + PPC_BITEXTRACT(srr1, 36, 3) ) + +/* 0 is reserved */ +#define P9_SRR1_MC_IFETCH_UE 1 +#define P9_SRR1_MC_IFETCH_SLB_PARITY 2 +#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT 3 +#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT 4 +#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT 5 +#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD 6 +/* 7 is reserved */ +#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT 8 +#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT 9 +/* 10 ? */ +#define P9_SRR1_MC_IFETCH_RA 11 +#define P9_SRR1_MC_IFETCH_RA_TABLEWALK 12 +#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE 13 +#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT 14 +#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN 15 + +/* DSISR bits for machine check (On Power9) */ +#define P9_DSISR_MC_UE (PPC_BIT(48)) +#define P9_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) +#define P9_DSISR_MC_LINK_LOAD_TIMEOUT (PPC_BIT(50)) +#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT (PPC_BIT(51)) +#define P9_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) +#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) +#define P9_DSISR_MC_USER_TLBIE (PPC_BIT(54)) +#define P9_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) +#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB (PPC_BIT(56)) +#define P9_DSISR_MC_RA_LOAD (PPC_BIT(57)) +#define P9_DSISR_MC_RA_TABLEWALK (PPC_BIT(58)) +#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN (PPC_BIT(59)) +#define P9_DSISR_MC_RA_FOREIGN (PPC_BIT(60)) + +/* SLB error bits */ +#define P9_DSISR_MC_SLB_ERRORS (P9_DSISR_MC_ERAT_MULTIHIT | \ + P9_DSISR_MC_SLB_PARITY_MFSLB | \ + P9_DSISR_MC_SLB_MULTIHIT_MFSLB) + enum MCE_Version { MCE_V1 = 1, }; @@ -93,6 +142,9 @@ enum MCE_ErrorType { MCE_ERROR_TYPE_SLB = 2, MCE_ERROR_TYPE_ERAT = 3, MCE_ERROR_TYPE_TLB = 4, + MCE_ERROR_TYPE_USER = 5, + MCE_ERROR_TYPE_RA = 6, + MCE_ERROR_TYPE_LINK = 7, }; enum MCE_UeErrorType { @@ -121,6 +173,32 @@ enum MCE_TlbErrorType { MCE_TLB_ERROR_MULTIHIT = 2, }; +enum MCE_UserErrorType { + MCE_USER_ERROR_INDETERMINATE = 0, + MCE_USER_ERROR_TLBIE = 1, +}; + +enum MCE_RaErrorType { + MCE_RA_ERROR_INDETERMINATE = 0, + MCE_RA_ERROR_IFETCH = 1, + MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2, + MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3, + MCE_RA_ERROR_LOAD = 4, + MCE_RA_ERROR_STORE = 5, + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6, + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7, + MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8, +}; + +enum MCE_LinkErrorType { + MCE_LINK_ERROR_INDETERMINATE = 0, + MCE_LINK_ERROR_IFETCH_TIMEOUT = 1, + MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2, + MCE_LINK_ERROR_LOAD_TIMEOUT = 3, + MCE_LINK_ERROR_STORE_TIMEOUT = 4, + MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5, +}; + struct machine_check_event { enum MCE_Version version:8; /* 0x00 */ uint8_t in_use; /* 0x01 */ @@ -166,6 +244,30 @@ struct machine_check_event { uint64_t effective_address; uint8_t reserved_2[16]; } tlb_error; + + struct { + enum MCE_UserErrorType user_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } user_error; + + struct { + enum MCE_RaErrorType ra_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } ra_error; + + struct { + enum MCE_LinkErrorType link_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } link_error; } u; }; @@ -176,8 +278,12 @@ struct mce_error_info { enum MCE_SlbErrorType slb_error_type:8; enum MCE_EratErrorType erat_error_type:8; enum MCE_TlbErrorType tlb_error_type:8; + enum MCE_UserErrorType user_error_type:8; + enum MCE_RaErrorType ra_error_type:8; + enum MCE_LinkErrorType link_error_type:8; } u; - uint8_t reserved[2]; + enum MCE_Severity severity:8; + enum MCE_Initiator initiator:8; }; #define MAX_MC_EVT 100 diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b9e3f0aca261..ecf9885ab660 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -163,11 +163,5 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, /* by default, allow everything */ return true; } - -static inline bool arch_pte_access_permitted(pte_t pte, bool write) -{ - /* by default, allow everything */ - return true; -} #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_MMU_CONTEXT_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ba9921bf202e..5134ade2e850 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index d0db98793dd8..9f4de0a1035e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -1,5 +1,8 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H + +#include <asm-generic/5level-fixup.h> + /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 55b28ef3409a..1facb584dd29 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 0cd8a3852763..e5805ad78e12 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd) return ((hpd_val(hpd) & 0x4) != 0); #else /* We clear the top bit to indicate hugepd */ - return ((hpd_val(hpd) & PD_HUGE) == 0); + return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); #endif } diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d99bd442aacb..e7d6d86563ee 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -284,6 +284,13 @@ #define PPC_INST_BRANCH_COND 0x40800000 #define PPC_INST_LBZCIX 0x7c0006aa #define PPC_INST_STBCIX 0x7c0007aa +#define PPC_INST_LWZX 0x7c00002e +#define PPC_INST_LFSX 0x7c00042e +#define PPC_INST_STFSX 0x7c00052e +#define PPC_INST_LFDX 0x7c0004ae +#define PPC_INST_STFDX 0x7c0005ae +#define PPC_INST_LVX 0x7c0000ce +#define PPC_INST_STVX 0x7c0001ce /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 4a90634e8322..35c00d7a0cf8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -160,12 +160,18 @@ struct of_drconf_cell { #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ -#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ -#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ -#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ -#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ -#define OV5_MMU_SLB 0x1800 /* always use SLB */ -#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ +/* MMU Base Architecture */ +#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ +#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ +#define OV5_MMU_RADIX 0x1840 /* Radix MMU Only */ +#define OV5_MMU_EITHER 0x1880 /* Hash or Radix Supported */ +#define OV5_MMU_DYNAMIC 0x18C0 /* Hash or Radix Can Switch Later */ +#define OV5_NMMU 0x1820 /* Nest MMU Available */ +/* Hash Table Extensions */ +#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment Tables Available */ +#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ +/* Radix Table Extensions */ +#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 4b369d83fe9c..1c9470881c4a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -387,3 +387,4 @@ SYSCALL(copy_file_range) COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) SYSCALL(kexec_file_load) +SYSCALL(statx) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 87e4b2d8dcd4..6fc6464f7421 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -92,6 +92,7 @@ static inline struct thread_info *current_thread_info(void) TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ +#define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ #define TIF_NOHZ 9 /* in adaptive nohz mode */ @@ -115,6 +116,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_32BIT (1<<TIF_32BIT) #define _TIF_RESTORE_TM (1<<TIF_RESTORE_TM) +#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) #define _TIF_SECCOMP (1<<TIF_SECCOMP) @@ -131,7 +133,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_RESTORE_TM) + _TIF_RESTORE_TM | _TIF_PATCH_PENDING) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) /* Bits in local_flags */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 0e6add3187bc..5c0d8a8cdae5 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -1,18 +1,11 @@ #ifndef _ARCH_POWERPC_UACCESS_H #define _ARCH_POWERPC_UACCESS_H -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ - -#include <linux/sched.h> -#include <linux/errno.h> #include <asm/asm-compat.h> #include <asm/ppc_asm.h> #include <asm/processor.h> #include <asm/page.h> - -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 +#include <asm/extable.h> /* * The fs value determines whether argument validity checking should be @@ -64,31 +57,6 @@ __access_ok((__force unsigned long)(addr), (size), get_fs())) /* - * The exception table consists of pairs of relative addresses: the first is - * the address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out what - * to do. - * - * All the routines below use bits of fixup code that are out of line with the - * main instruction path. This means when everything is well, we don't even - * have to jump over them. Further, they do not intrude on our cache or tlb - * entries. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE - -struct exception_table_entry { - int insn; - int fixup; -}; - -static inline unsigned long extable_fixup(const struct exception_table_entry *x) -{ - return (unsigned long)&x->fixup + x->fixup; -} - -/* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. * @@ -301,42 +269,19 @@ extern unsigned long __copy_tofrom_user(void __user *to, #ifndef __powerpc64__ -static inline unsigned long copy_from_user(void *to, - const void __user *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_READ, from, n))) { - check_object_size(to, n, false); - return __copy_tofrom_user((__force void __user *)to, from, n); - } - memset(to, 0, n); - return n; -} - -static inline unsigned long copy_to_user(void __user *to, - const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) { - check_object_size(from, n, true); - return __copy_tofrom_user(to, (__force void __user *)from, n); - } - return n; -} +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER #else /* __powerpc64__ */ -#define __copy_in_user(to, from, size) \ - __copy_tofrom_user((to), (from), (size)) - -extern unsigned long copy_from_user(void *to, const void __user *from, - unsigned long n); -extern unsigned long copy_to_user(void __user *to, const void *from, - unsigned long n); -extern unsigned long copy_in_user(void __user *to, const void __user *from, - unsigned long n); - +static inline unsigned long +raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + return __copy_tofrom_user(to, from, n); +} #endif /* __powerpc64__ */ -static inline unsigned long __copy_from_user_inatomic(void *to, +static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { if (__builtin_constant_p(n) && (n <= 8)) { @@ -360,12 +305,10 @@ static inline unsigned long __copy_from_user_inatomic(void *to, return 0; } - check_object_size(to, n, false); - return __copy_tofrom_user((__force void __user *)to, from, n); } -static inline unsigned long __copy_to_user_inatomic(void __user *to, +static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { if (__builtin_constant_p(n) && (n <= 8)) { @@ -389,25 +332,9 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to, return 0; } - check_object_size(from, n, true); - return __copy_tofrom_user(to, (__force const void __user *)from, n); } -static inline unsigned long __copy_from_user(void *to, - const void __user *from, unsigned long size) -{ - might_fault(); - return __copy_from_user_inatomic(to, from, size); -} - -static inline unsigned long __copy_to_user(void __user *to, - const void *from, unsigned long size) -{ - might_fault(); - return __copy_to_user_inatomic(to, from, size); -} - extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) @@ -422,7 +349,4 @@ extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); -#endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - #endif /* _ARCH_POWERPC_UACCESS_H */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index eb1acee91a20..9ba11dbcaca9 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 383 +#define NR_syscalls 384 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 44583a52f882..58e2ec0310fc 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -99,4 +99,10 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + +#define SO_INCOMING_NAPI_ID 56 + +#define SO_COOKIE 57 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 2f26335a3c42..b85f14228857 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -393,5 +393,6 @@ #define __NR_preadv2 380 #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 +#define __NR_statx 383 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index cbc7c42cdb74..ec7a8b099dd9 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -807,14 +807,25 @@ int fix_alignment(struct pt_regs *regs) nb = aligninfo[instr].len; flags = aligninfo[instr].flags; - /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */ - if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) { - nb = 8; - flags = LD+SW; - } else if (IS_XFORM(instruction) && - ((instruction >> 1) & 0x3ff) == 660) { - nb = 8; - flags = ST+SW; + /* + * Handle some cases which give overlaps in the DSISR values. + */ + if (IS_XFORM(instruction)) { + switch (get_xop(instruction)) { + case 532: /* ldbrx */ + nb = 8; + flags = LD+SW; + break; + case 660: /* stdbrx */ + nb = 8; + flags = ST+SW; + break; + case 20: /* lwarx */ + case 84: /* ldarx */ + case 116: /* lharx */ + case 276: /* lqarx */ + return 0; /* not emulated ever */ + } } /* Byteswap little endian loads and stores */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index bb7a1890aeb7..e79b9daa873c 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action); extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, { /* Power9 */ @@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, { /* Cell Broadband Engine */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6432d4bf08c8..767ef6d68c9e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -689,7 +689,7 @@ resume_kernel: addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ - lwz r3,GPR1(r1) + ld r3,GPR1(r1) subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ mr r4,r1 /* src: current exception frame */ mr r1,r3 /* Reroute the trampoline frame to r1 */ @@ -703,8 +703,8 @@ resume_kernel: addi r6,r6,8 bdnz 2b - /* Do real store operation to complete stwu */ - lwz r5,GPR1(r1) + /* Do real store operation to complete stdu */ + ld r5,GPR1(r1) std r8,0(r5) /* Clear _TIF_EMULATE_STACK_STORE flag */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 857bf7c5b946..6353019966e6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -982,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD - BRANCH_LINK_TO_FAR(r4, hmi_exception_realmode) + BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */ /* Windup the stack. */ /* Move original HSRR0 and HSRR1 into the respective regs */ ld r9,_MSR(r1) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 5f61cc0349c0..6fd08219248d 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -276,19 +276,21 @@ power_enter_stop: */ andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne 1f + bne .Lhandle_esl_ec_set IDLE_STATE_ENTER_SEQ(PPC_STOP) li r3,0 /* Since we didn't lose state, return 0 */ b pnv_wakeup_noloss + +.Lhandle_esl_ec_set: /* * Check if the requested state is a deep idle state. */ -1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 - bge 2f + bge .Lhandle_deep_stop IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) -2: +.Lhandle_deep_stop: /* * Entering deep idle state. * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to @@ -447,9 +449,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) _GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) /* - * Before entering any idle state, the NVGPRs are saved in the stack - * and they are restored before switching to the process context. Hence - * until they are restored, they are free to be used. + * Before entering any idle state, the NVGPRs are saved in the stack. + * If there was a state loss, or PACA_NAPSTATELOST was set, then the + * NVGPRs are restored. If we are here, it is likely that state is lost, + * but not guaranteed -- neither ISA207 nor ISA300 tests to reach + * here are the same as the test to restore NVGPRS: + * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300, + * and SRR1 test for restoring NVGPRs. + * + * We are about to clobber NVGPRs now, so set NAPSTATELOST to + * guarantee they will always be restored. This might be tightened + * with careful reading of specs (particularly for ISA300) but this + * is already a slow wakeup path and it's simpler to be safe. + */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) + + /* * * Save SRR1 and LR in NVGPRs as they might be clobbered in * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index c6923ff45131..a1475e6aef3a 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce, case MCE_ERROR_TYPE_TLB: mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; break; + case MCE_ERROR_TYPE_USER: + mce->u.user_error.user_error_type = mce_err->u.user_error_type; + break; + case MCE_ERROR_TYPE_RA: + mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; + break; + case MCE_ERROR_TYPE_LINK: + mce->u.link_error.link_error_type = mce_err->u.link_error_type; + break; case MCE_ERROR_TYPE_UNKNOWN: default: break; @@ -90,13 +99,14 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->gpr3 = regs->gpr[3]; mce->in_use = 1; - mce->initiator = MCE_INITIATOR_CPU; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; - mce->severity = MCE_SEV_ERROR_SYNC; + + mce->initiator = mce_err->initiator; + mce->severity = mce_err->severity; /* * Populate the mce error_type and type-specific error_type. @@ -115,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled, } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { mce->u.erat_error.effective_address_provided = true; mce->u.erat_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_USER) { + mce->u.user_error.effective_address_provided = true; + mce->u.user_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_RA) { + mce->u.ra_error.effective_address_provided = true; + mce->u.ra_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { + mce->u.link_error.effective_address_provided = true; + mce->u.link_error.effective_address = addr; } else if (mce->error_type == MCE_ERROR_TYPE_UE) { mce->u.ue_error.effective_address_provided = true; mce->u.ue_error.effective_address = addr; @@ -239,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt) "Parity", "Multihit", }; + static const char *mc_user_types[] = { + "Indeterminate", + "tlbie(l) invalid", + }; + static const char *mc_ra_types[] = { + "Indeterminate", + "Instruction fetch (bad)", + "Page table walk ifetch (bad)", + "Page table walk ifetch (foreign)", + "Load (bad)", + "Store (bad)", + "Page table walk Load/Store (bad)", + "Page table walk Load/Store (foreign)", + "Load/Store (foreign)", + }; + static const char *mc_link_types[] = { + "Indeterminate", + "Instruction fetch (timeout)", + "Page table walk ifetch (timeout)", + "Load (timeout)", + "Store (timeout)", + "Page table walk Load/Store (timeout)", + }; /* Print things out */ if (evt->version != MCE_V1) { @@ -315,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt) printk("%s Effective address: %016llx\n", level, evt->u.tlb_error.effective_address); break; + case MCE_ERROR_TYPE_USER: + subtype = evt->u.user_error.user_error_type < + ARRAY_SIZE(mc_user_types) ? + mc_user_types[evt->u.user_error.user_error_type] + : "Unknown"; + printk("%s Error type: User [%s]\n", level, subtype); + if (evt->u.user_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.user_error.effective_address); + break; + case MCE_ERROR_TYPE_RA: + subtype = evt->u.ra_error.ra_error_type < + ARRAY_SIZE(mc_ra_types) ? + mc_ra_types[evt->u.ra_error.ra_error_type] + : "Unknown"; + printk("%s Error type: Real address [%s]\n", level, subtype); + if (evt->u.ra_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.ra_error.effective_address); + break; + case MCE_ERROR_TYPE_LINK: + subtype = evt->u.link_error.link_error_type < + ARRAY_SIZE(mc_link_types) ? + mc_link_types[evt->u.link_error.link_error_type] + : "Unknown"; + printk("%s Error type: Link [%s]\n", level, subtype); + if (evt->u.link_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.link_error.effective_address); + break; default: case MCE_ERROR_TYPE_UNKNOWN: printk("%s Error type: Unknown\n", level); @@ -341,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt) if (evt->u.tlb_error.effective_address_provided) return evt->u.tlb_error.effective_address; break; + case MCE_ERROR_TYPE_USER: + if (evt->u.user_error.effective_address_provided) + return evt->u.user_error.effective_address; + break; + case MCE_ERROR_TYPE_RA: + if (evt->u.ra_error.effective_address_provided) + return evt->u.ra_error.effective_address; + break; + case MCE_ERROR_TYPE_LINK: + if (evt->u.link_error.effective_address_provided) + return evt->u.link_error.effective_address; + break; default: case MCE_ERROR_TYPE_UNKNOWN: break; diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 7353991c4ece..763d6f58caa8 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -116,6 +116,51 @@ static void flush_and_reload_slb(void) } #endif +static void flush_erat(void) +{ + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); +} + +#define MCE_FLUSH_SLB 1 +#define MCE_FLUSH_TLB 2 +#define MCE_FLUSH_ERAT 3 + +static int mce_flush(int what) +{ +#ifdef CONFIG_PPC_STD_MMU_64 + if (what == MCE_FLUSH_SLB) { + flush_and_reload_slb(); + return 1; + } +#endif + if (what == MCE_FLUSH_ERAT) { + flush_erat(); + return 1; + } + if (what == MCE_FLUSH_TLB) { + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { + cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); + return 1; + } + } + + return 0; +} + +static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat) +{ + if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB)) + dsisr &= ~slb; + if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT)) + dsisr &= ~erat; + if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB)) + dsisr &= ~tlb; + /* Any other errors we don't understand? */ + if (dsisr) + return 0; + return 1; +} + static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) { long handled = 1; @@ -281,6 +326,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) long handled = 1; struct mce_error_info mce_error_info = { 0 }; + mce_error_info.severity = MCE_SEV_ERROR_SYNC; + mce_error_info.initiator = MCE_INITIATOR_CPU; + srr1 = regs->msr; nip = regs->nip; @@ -352,6 +400,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) long handled = 1; struct mce_error_info mce_error_info = { 0 }; + mce_error_info.severity = MCE_SEV_ERROR_SYNC; + mce_error_info.initiator = MCE_INITIATOR_CPU; + srr1 = regs->msr; nip = regs->nip; @@ -372,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) save_mce_event(regs, handled, &mce_error_info, nip, addr); return handled; } + +static int mce_handle_derror_p9(struct pt_regs *regs) +{ + uint64_t dsisr = regs->dsisr; + + return mce_handle_flush_derrors(dsisr, + P9_DSISR_MC_SLB_PARITY_MFSLB | + P9_DSISR_MC_SLB_MULTIHIT_MFSLB, + + P9_DSISR_MC_TLB_MULTIHIT_MFTLB, + + P9_DSISR_MC_ERAT_MULTIHIT); +} + +static int mce_handle_ierror_p9(struct pt_regs *regs) +{ + uint64_t srr1 = regs->msr; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_SLB_PARITY: + case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: + return mce_flush(MCE_FLUSH_SLB); + case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: + return mce_flush(MCE_FLUSH_TLB); + case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: + return mce_flush(MCE_FLUSH_ERAT); + default: + return 0; + } +} + +static void mce_get_derror_p9(struct pt_regs *regs, + struct mce_error_info *mce_err, uint64_t *addr) +{ + uint64_t dsisr = regs->dsisr; + + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; + + if (dsisr & P9_DSISR_MC_USER_TLBIE) + *addr = regs->nip; + else + *addr = regs->dar; + + if (dsisr & P9_DSISR_MC_UE) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) { + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT; + } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) { + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT; + } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) { + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_USER_TLBIE) { + mce_err->error_type = MCE_ERROR_TYPE_USER; + mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE; + } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_RA_LOAD) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD; + } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN; + } +} + +static void mce_get_ierror_p9(struct pt_regs *regs, + struct mce_error_info *mce_err, uint64_t *addr) +{ + uint64_t srr1 = regs->msr; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: + case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: + mce_err->severity = MCE_SEV_FATAL; + break; + default: + mce_err->severity = MCE_SEV_ERROR_SYNC; + break; + } + + mce_err->initiator = MCE_INITIATOR_CPU; + + *addr = regs->nip; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_UE: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case P9_SRR1_MC_IFETCH_SLB_PARITY: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case P9_SRR1_MC_IFETCH_LINK_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_RA: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH; + break; + case P9_SRR1_MC_IFETCH_RA_TABLEWALK: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_STORE; + break; + case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN; + break; + default: + break; + } +} + +long __machine_check_early_realmode_p9(struct pt_regs *regs) +{ + uint64_t nip, addr; + long handled; + struct mce_error_info mce_error_info = { 0 }; + + nip = regs->nip; + + if (P9_SRR1_MC_LOADSTORE(regs->msr)) { + handled = mce_handle_derror_p9(regs); + mce_get_derror_p9(regs, &mce_error_info, &addr); + } else { + handled = mce_handle_ierror_p9(regs); + mce_get_ierror_p9(regs, &mce_error_info, &addr); + } + + /* Handle UE error. */ + if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + handled = mce_handle_ue_error(regs); + + save_mce_event(regs, handled, &mce_error_info, nip, addr); + return handled; +} diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index ae179cb1bb3c..c119044cad0d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -67,7 +67,7 @@ PPC64_CACHES: * flush all bytes from start through stop-1 inclusive */ -_GLOBAL(flush_icache_range) +_GLOBAL_TOC(flush_icache_range) BEGIN_FTR_SECTION PURGE_PREFETCHED_INS blr @@ -120,7 +120,7 @@ EXPORT_SYMBOL(flush_icache_range) * * flush all bytes from start to stop-1 inclusive */ -_GLOBAL(flush_dcache_range) +_GLOBAL_TOC(flush_dcache_range) /* * Flush the data cache to memory diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index d5e2b8309939..eae61b044e9e 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -389,51 +389,40 @@ static int nvram_pstore_open(struct pstore_info *psi) /** * nvram_pstore_write - pstore write callback for nvram - * @type: Type of message logged - * @reason: reason behind dump (oops/panic) - * @id: identifier to indicate the write performed - * @part: pstore writes data to registered buffer in parts, - * part number will indicate the same. - * @count: Indicates oops count - * @compressed: Flag to indicate the log is compressed - * @size: number of bytes written to the registered buffer - * @psi: registered pstore_info structure + * @record: pstore record to write, with @id to be set * * Called by pstore_dump() when an oops or panic report is logged in the * printk buffer. * Returns 0 on successful write. */ -static int nvram_pstore_write(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, int count, - bool compressed, size_t size, - struct pstore_info *psi) +static int nvram_pstore_write(struct pstore_record *record) { int rc; unsigned int err_type = ERR_TYPE_KERNEL_PANIC; struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf; /* part 1 has the recent messages from printk buffer */ - if (part > 1 || (type != PSTORE_TYPE_DMESG)) + if (record->part > 1 || (record->type != PSTORE_TYPE_DMESG)) return -1; if (clobbering_unread_rtas_event()) return -1; oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION); - oops_hdr->report_length = cpu_to_be16(size); + oops_hdr->report_length = cpu_to_be16(record->size); oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds()); - if (compressed) + if (record->compressed) err_type = ERR_TYPE_KERNEL_PANIC_GZ; rc = nvram_write_os_partition(&oops_log_partition, oops_buf, - (int) (sizeof(*oops_hdr) + size), err_type, count); + (int) (sizeof(*oops_hdr) + record->size), err_type, + record->count); if (rc != 0) return rc; - *id = part; + record->id = record->part; return 0; } @@ -442,10 +431,7 @@ static int nvram_pstore_write(enum pstore_type_id type, * Returns the length of the data we read from each partition. * Returns 0 if we've been called before. */ -static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, - int *count, struct timespec *time, char **buf, - bool *compressed, ssize_t *ecc_notice_size, - struct pstore_info *psi) +static ssize_t nvram_pstore_read(struct pstore_record *record) { struct oops_log_info *oops_hdr; unsigned int err_type, id_no, size = 0; @@ -459,40 +445,40 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, switch (nvram_type_ids[read_type]) { case PSTORE_TYPE_DMESG: part = &oops_log_partition; - *type = PSTORE_TYPE_DMESG; + record->type = PSTORE_TYPE_DMESG; break; case PSTORE_TYPE_PPC_COMMON: sig = NVRAM_SIG_SYS; part = &common_partition; - *type = PSTORE_TYPE_PPC_COMMON; - *id = PSTORE_TYPE_PPC_COMMON; - time->tv_sec = 0; - time->tv_nsec = 0; + record->type = PSTORE_TYPE_PPC_COMMON; + record->id = PSTORE_TYPE_PPC_COMMON; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; break; #ifdef CONFIG_PPC_PSERIES case PSTORE_TYPE_PPC_RTAS: part = &rtas_log_partition; - *type = PSTORE_TYPE_PPC_RTAS; - time->tv_sec = last_rtas_event; - time->tv_nsec = 0; + record->type = PSTORE_TYPE_PPC_RTAS; + record->time.tv_sec = last_rtas_event; + record->time.tv_nsec = 0; break; case PSTORE_TYPE_PPC_OF: sig = NVRAM_SIG_OF; part = &of_config_partition; - *type = PSTORE_TYPE_PPC_OF; - *id = PSTORE_TYPE_PPC_OF; - time->tv_sec = 0; - time->tv_nsec = 0; + record->type = PSTORE_TYPE_PPC_OF; + record->id = PSTORE_TYPE_PPC_OF; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; break; #endif #ifdef CONFIG_PPC_POWERNV case PSTORE_TYPE_PPC_OPAL: sig = NVRAM_SIG_FW; part = &skiboot_partition; - *type = PSTORE_TYPE_PPC_OPAL; - *id = PSTORE_TYPE_PPC_OPAL; - time->tv_sec = 0; - time->tv_nsec = 0; + record->type = PSTORE_TYPE_PPC_OPAL; + record->id = PSTORE_TYPE_PPC_OPAL; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; break; #endif default: @@ -520,10 +506,10 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, return 0; } - *count = 0; + record->count = 0; if (part->os_partition) - *id = id_no; + record->id = id_no; if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) { size_t length, hdr_size; @@ -533,34 +519,35 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, /* Old format oops header had 2-byte record size */ hdr_size = sizeof(u16); length = be16_to_cpu(oops_hdr->version); - time->tv_sec = 0; - time->tv_nsec = 0; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; } else { hdr_size = sizeof(*oops_hdr); length = be16_to_cpu(oops_hdr->report_length); - time->tv_sec = be64_to_cpu(oops_hdr->timestamp); - time->tv_nsec = 0; + record->time.tv_sec = be64_to_cpu(oops_hdr->timestamp); + record->time.tv_nsec = 0; } - *buf = kmemdup(buff + hdr_size, length, GFP_KERNEL); + record->buf = kmemdup(buff + hdr_size, length, GFP_KERNEL); kfree(buff); - if (*buf == NULL) + if (record->buf == NULL) return -ENOMEM; - *ecc_notice_size = 0; + record->ecc_notice_size = 0; if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) - *compressed = true; + record->compressed = true; else - *compressed = false; + record->compressed = false; return length; } - *buf = buff; + record->buf = buff; return part->size; } static struct pstore_info nvram_pstore_info = { .owner = THIS_MODULE, .name = "nvram", + .flags = PSTORE_FLAGS_DMESG, .open = nvram_pstore_open, .read = nvram_pstore_read, .write = nvram_pstore_write, diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a3944540fe0d..1c1b44ec7642 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +static bool __initdata prom_radix_disable; + +struct platform_support { + bool hash_mmu; + bool radix_mmu; + bool radix_gtse; +}; + /* Platforms codes are now obsolete in the kernel. Now only used within this * file and ultimately gone too. Feel free to change them if you need, they * are not shared with anything outside of this file anymore @@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void) prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } + + opt = strstr(prom_cmd_line, "disable_radix"); + if (opt) { + prom_debug("Radix disabled from cmdline\n"); + prom_radix_disable = true; + } } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -695,6 +709,8 @@ struct option_vector5 { u8 byte22; u8 intarch; u8 mmu; + u8 hash_ext; + u8 radix_ext; } __packed; struct option_vector6 { @@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved3 = 0, .subprocessors = 1, .intarch = 0, - .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | - OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), + .mmu = 0, + .hash_ext = 0, + .radix_ext = 0, }, /* option vector 6: IBM PAPR hints */ @@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void) } +static void __init prom_parse_mmu_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_MMU_DYNAMIC): + case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */ + prom_debug("MMU - either supported\n"); + support->radix_mmu = !prom_radix_disable; + support->hash_mmu = true; + break; + case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */ + prom_debug("MMU - radix only\n"); + if (prom_radix_disable) { + /* + * If we __have__ to do radix, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option disable_radix\n"); + } + support->radix_mmu = true; + break; + case OV5_FEAT(OV5_MMU_HASH): + prom_debug("MMU - hash only\n"); + support->hash_mmu = true; + break; + default: + prom_debug("Unknown mmu support option: 0x%x\n", val); + break; + } +} + +static void __init prom_parse_platform_support(u8 index, u8 val, + struct platform_support *support) +{ + switch (index) { + case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */ + prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); + break; + case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ + if (val & OV5_FEAT(OV5_RADIX_GTSE)) { + prom_debug("Radix - GTSE supported\n"); + support->radix_gtse = true; + } + break; + } +} + +static void __init prom_check_platform_support(void) +{ + struct platform_support supported = { + .hash_mmu = false, + .radix_mmu = false, + .radix_gtse = false + }; + int prop_len = prom_getproplen(prom.chosen, + "ibm,arch-vec-5-platform-support"); + if (prop_len > 1) { + int i; + u8 vec[prop_len]; + prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", + prop_len); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", + &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 + , vec[i] + , vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], + &supported); + } + } + + if (supported.radix_mmu && supported.radix_gtse) { + /* Radix preferred - but we require GTSE for now */ + prom_debug("Asking for radix with GTSE\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); + ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + } else if (supported.hash_mmu) { + /* Default to hash mmu (if we can) */ + prom_debug("Asking for hash\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH); + } else { + /* We're probably on a legacy hypervisor */ + prom_debug("Assuming legacy hash support\n"); + } +} static void __init prom_send_capabilities(void) { @@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void) prom_arg_t ret; u32 cores; + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */ + prom_check_platform_support(); + root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* We need to tell the FW about the number of cores we support. @@ -2993,6 +3099,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities @@ -3009,11 +3120,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, copy_and_flush(0, kbase, 0x100, 0); /* - * Do early parsing of command line - */ - early_cmdline_parse(); - - /* * Initialize memory management within prom_init */ prom_init_mem(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index adf2084f214b..f997154dfc41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -236,6 +236,15 @@ static void cpu_ready_for_interrupts(void) mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } + /* + * Fixup HFSCR:TM based on CPU features. The bit is set by our + * early asm init because at that point we haven't updated our + * CPU features from firmware and device-tree. Here we have, + * so let's do it. + */ + if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP)) + mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); + /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; } @@ -408,7 +417,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, info->line_size = lsize; info->block_size = bsize; info->log_block_size = __ilog2(bsize); - info->blocks_per_page = PAGE_SIZE / bsize; + if (bsize) + info->blocks_per_page = PAGE_SIZE / bsize; + else + info->blocks_per_page = 0; if (sets == 0) info->assoc = 0xffff; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 3a3671172436..e9436c5e1e09 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -14,6 +14,7 @@ #include <linux/uprobes.h> #include <linux/key.h> #include <linux/context_tracking.h> +#include <linux/livepatch.h> #include <asm/hw_breakpoint.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -162,6 +163,9 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) tracehook_notify_resume(regs); } + if (thread_info_flags & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + user_enter(); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 46f89e66a273..d68ed1f004a3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -787,24 +787,21 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -void __init smp_cpus_done(unsigned int max_cpus) +static __init long smp_setup_cpu_workfn(void *data __always_unused) { - cpumask_var_t old_mask; + smp_ops->setup_cpu(boot_cpuid); + return 0; +} - /* We want the setup_cpu() here to be called from CPU 0, but our - * init thread may have been "borrowed" by another CPU in the meantime - * se we pin us down to CPU 0 for a short while +void __init smp_cpus_done(unsigned int max_cpus) +{ + /* + * We want the setup_cpu() here to be called on the boot CPU, but + * init might run on any CPU, so make sure it's invoked on the boot + * CPU. */ - alloc_cpumask_var(&old_mask, GFP_NOWAIT); - cpumask_copy(old_mask, ¤t->cpus_allowed); - set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); - if (smp_ops && smp_ops->setup_cpu) - smp_ops->setup_cpu(boot_cpuid); - - set_cpus_allowed_ptr(current, old_mask); - - free_cpumask_var(old_mask); + work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL); if (smp_ops && smp_ops->bringup_done) smp_ops->bringup_done(); @@ -812,7 +809,6 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); set_sched_topology(powerpc_topology); - } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 07b90725855e..2b33cfaac7b8 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -995,8 +995,10 @@ static void __init init_decrementer_clockevent(void) decrementer_clockevent.max_delta_ns = clockevent_delta2ns(decrementer_max, &decrementer_clockevent); + decrementer_clockevent.max_delta_ticks = decrementer_max; decrementer_clockevent.min_delta_ns = clockevent_delta2ns(2, &decrementer_clockevent); + decrementer_clockevent.min_delta_ticks = 2; register_decrementer_clockevent(cpu); } diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7394b770ae1f..1c24c894c908 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -312,6 +312,8 @@ SECTIONS NOSAVE_DATA } + BUG_TABLE + . = ALIGN(PAGE_SIZE); _edata = .; PROVIDE32 (edata = .); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f3158fb16de3..710e491206ed 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); - if (pte_write(pte)) + if (__pte_write(pte)) write_ok = 1; } local_irq_restore(flags); @@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, /* start new resize */ resize = kzalloc(sizeof(*resize), GFP_KERNEL); + if (!resize) { + ret = -ENOMEM; + goto out; + } resize->order = shift; resize->kvm = kvm; INIT_WORK(&resize->work, resize_hpt_prepare_work); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6fca970373ee..ce6f2121fffe 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte = kvmppc_read_update_linux_pte(ptep, writing); if (pte_present(pte) && !pte_protnone(pte)) { - if (writing && !pte_write(pte)) + if (writing && !__pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); is_ci = pte_ci(pte); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0e649d72fe8d..ed7dfce331e0 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -14,12 +14,13 @@ obj-y += string.o alloc.o crtsavres.o code-patching.o \ obj-$(CONFIG_PPC32) += div64.o copy_32.o -obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ +obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \ memcpy_64.o memcmp_64.o obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o obj-y += checksum_$(BITS).o checksum_wrappers.o diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index ff0d894d7ff9..8aedbb5f4b86 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -477,18 +477,6 @@ _GLOBAL(__copy_tofrom_user) bdnz 130b /* then clear out the destination: r3 bytes starting at 4(r6) */ 132: mfctr r3 - srwi. r0,r3,2 - li r9,0 - mtctr r0 - beq 113f -112: stwu r9,4(r6) - bdnz 112b -113: andi. r0,r3,3 - mtctr r0 - beq 120f -114: stb r9,4(r6) - addi r6,r6,1 - bdnz 114b 120: blr EX_TABLE(30b,108b) @@ -497,7 +485,5 @@ _GLOBAL(__copy_tofrom_user) EX_TABLE(41b,111b) EX_TABLE(130b,132b) EX_TABLE(131b,120b) - EX_TABLE(112b,120b) - EX_TABLE(114b,120b) EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index aee6e24e81ab..08da06e1bd72 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -319,32 +319,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blr /* - * here we have trapped again, need to clear ctr bytes starting at r3 + * here we have trapped again, amount remaining is in ctr. */ -143: mfctr r5 - li r0,0 - mr r4,r3 - mr r3,r5 /* return the number of bytes not copied */ -1: andi. r9,r4,7 - beq 3f -90: stb r0,0(r4) - addic. r5,r5,-1 - addi r4,r4,1 - bne 1b - blr -3: cmpldi cr1,r5,8 - srdi r9,r5,3 - andi. r5,r5,7 - blt cr1,93f - mtctr r9 -91: std r0,0(r4) - addi r4,r4,8 - bdnz 91b -93: beqlr - mtctr r5 -92: stb r0,0(r4) - addi r4,r4,1 - bdnz 92b +143: mfctr r3 blr /* @@ -389,10 +366,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) ld r5,-8(r1) add r6,r6,r5 subf r3,r3,r6 /* #bytes not copied */ -190: -191: -192: - blr /* #bytes not copied in r3 */ + blr EX_TABLE(20b,120b) EX_TABLE(220b,320b) @@ -451,9 +425,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) EX_TABLE(88b,188b) EX_TABLE(43b,143b) EX_TABLE(89b,189b) - EX_TABLE(90b,190b) - EX_TABLE(91b,191b) - EX_TABLE(92b,192b) /* * Routine to copy a whole page of data, optimized for POWER4. diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 846dba2c6360..9c542ec70c5b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c new file mode 100644 index 000000000000..2534c1447554 --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step.c @@ -0,0 +1,434 @@ +/* + * Simple sanity test for emulate_step load/store instructions. + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) "emulate_step_test: " fmt + +#include <linux/ptrace.h> +#include <asm/sstep.h> +#include <asm/ppc-opcode.h> + +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) + +/* + * Defined with TEST_ prefix so it does not conflict with other + * definitions. + */ +#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ + ___PPC_RA(base) | ((i) & 0xfffc)) +#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) + + +static void __init init_pt_regs(struct pt_regs *regs) +{ + static unsigned long msr; + static bool msr_cached; + + memset(regs, 0, sizeof(struct pt_regs)); + + if (likely(msr_cached)) { + regs->msr = msr; + return; + } + + asm volatile("mfmsr %0" : "=r"(regs->msr)); + + regs->msr |= MSR_FP; + regs->msr |= MSR_VEC; + regs->msr |= MSR_VSX; + + msr = regs->msr; + msr_cached = true; +} + +static void __init show_result(char *ins, char *result) +{ + pr_info("%-14s : %s\n", ins, result); +} + +static void __init test_ld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* ld r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("ld", "PASS"); + else + show_result("ld", "FAIL"); +} + +static void __init test_lwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* lwz r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("lwz", "PASS"); + else + show_result("lwz", "FAIL"); +} + +static void __init test_lwzx(void) +{ + struct pt_regs regs; + unsigned int a[3] = {0x0, 0x0, 0x1234}; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) a; + regs.gpr[4] = 8; + regs.gpr[5] = 0x8765; + + /* lwzx r5, r3, r4 */ + stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + if (stepped == 1 && regs.gpr[5] == a[2]) + show_result("lwzx", "PASS"); + else + show_result("lwzx", "FAIL"); +} + +static void __init test_std(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + regs.gpr[5] = 0x5678; + + /* std r5, 0(r3) */ + stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("std", "PASS"); + else + show_result("std", "FAIL"); +} + +static void __init test_ldarx_stdcx(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */ + + init_pt_regs(®s); + asm volatile("mfcr %0" : "=r"(regs.ccr)); + + + /*** ldarx ***/ + + regs.gpr[3] = (unsigned long) &a; + regs.gpr[4] = 0; + regs.gpr[5] = 0x5678; + + /* ldarx r5, r3, r4, 0 */ + stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + + /* + * Don't touch 'a' here. Touching 'a' can do Load/store + * of 'a' which result in failure of subsequent stdcx. + * Instead, use hardcoded value for comparison. + */ + if (stepped <= 0 || regs.gpr[5] != 0x1234) { + show_result("ldarx / stdcx.", "FAIL (ldarx)"); + return; + } + + + /*** stdcx. ***/ + + regs.gpr[5] = 0x9ABC; + + /* stdcx. r5, r3, r4 */ + stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + + /* + * Two possible scenarios that indicates successful emulation + * of stdcx. : + * 1. Reservation is active and store is performed. In this + * case cr0.eq bit will be set to 1. + * 2. Reservation is not active and store is not performed. + * In this case cr0.eq bit will be set to 0. + */ + if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq)) + || (regs.gpr[5] != a && !(regs.ccr & cr0_eq)))) + show_result("ldarx / stdcx.", "PASS"); + else + show_result("ldarx / stdcx.", "FAIL (stdcx.)"); +} + +#ifdef CONFIG_PPC_FPU +static void __init test_lfsx_stfsx(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfsx ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfsx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + + if (stepped == 1) + show_result("lfsx", "PASS"); + else + show_result("lfsx", "FAIL"); + + + /*** stfsx ***/ + + c.a = 678.91; + + /* stfsx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfsx", "PASS"); + else + show_result("stfsx", "FAIL"); +} + +static void __init test_lfdx_stfdx(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfdx ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfdx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + + if (stepped == 1) + show_result("lfdx", "PASS"); + else + show_result("lfdx", "FAIL"); + + + /*** stfdx ***/ + + c.a = 987654.32; + + /* stfdx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfdx", "PASS"); + else + show_result("stfdx", "FAIL"); +} +#else +static void __init test_lfsx_stfsx(void) +{ + show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_lfdx_stfdx(void) +{ + show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +static void __init test_lvx_stvx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lvx ***/ + + cached_b[0] = c.b[0] = 923745; + cached_b[1] = c.b[1] = 2139478; + cached_b[2] = c.b[2] = 9012; + cached_b[3] = c.b[3] = 982134; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lvx vrt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + + if (stepped == 1) + show_result("lvx", "PASS"); + else + show_result("lvx", "FAIL"); + + + /*** stvx ***/ + + c.b[0] = 4987513; + c.b[1] = 84313948; + c.b[2] = 71; + c.b[3] = 498532; + + /* stvx vrs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stvx", "PASS"); + else + show_result("stvx", "FAIL"); +} +#else +static void __init test_lvx_stvx(void) +{ + show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)"); + show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)"); +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static void __init test_lxvd2x_stxvd2x(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lxvd2x ***/ + + cached_b[0] = c.b[0] = 18233; + cached_b[1] = c.b[1] = 34863571; + cached_b[2] = c.b[2] = 834; + cached_b[3] = c.b[3] = 6138911; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + + if (stepped == 1) + show_result("lxvd2x", "PASS"); + else + show_result("lxvd2x", "FAIL"); + + + /*** stxvd2x ***/ + + c.b[0] = 21379463; + c.b[1] = 87; + c.b[2] = 374234; + c.b[3] = 4; + + /* stxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stxvd2x", "PASS"); + else + show_result("stxvd2x", "FAIL"); +} +#else +static void __init test_lxvd2x_stxvd2x(void) +{ + show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +static int __init test_emulate_step(void) +{ + test_ld(); + test_lwz(); + test_lwzx(); + test_std(); + test_ldarx_stdcx(); + test_lfsx_stfsx(); + test_lfdx_stfdx(); + test_lvx_stvx(); + test_lxvd2x_stxvd2x(); + + return 0; +} +late_initcall(test_emulate_step); diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c deleted file mode 100644 index 9bd3a3dad78d..000000000000 --- a/arch/powerpc/lib/usercopy_64.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Functions which are too large to be inlined. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/module.h> -#include <linux/uaccess.h> - -unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_READ, from, n))) - n = __copy_from_user(to, from, n); - else - memset(to, 0, n); - return n; -} - -unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_WRITE, to, n))) - n = __copy_to_user(to, from, n); - return n; -} - -unsigned long copy_in_user(void __user *to, const void __user *from, - unsigned long n) -{ - might_sleep(); - if (likely(access_ok(VERIFY_READ, from, n) && - access_ok(VERIFY_WRITE, to, n))) - n =__copy_tofrom_user(to, from, n); - return n; -} - -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(copy_in_user); - diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index cc332608e656..65bb8f33b399 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -638,6 +638,10 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long psize = batch->psize; int ssize = batch->ssize; int i; + unsigned int use_local; + + use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && + mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use(); local_irq_save(flags); @@ -667,8 +671,7 @@ static void native_flush_hash_range(unsigned long number, int local) } pte_iterate_hashed_end(); } - if (mmu_has_feature(MMU_FTR_TLBIEL) && - mmu_psize_defs[psize].tlbiel && local) { + if (use_local) { asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { vpn = batch->vpn[i]; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa3b76aa0d6..c22f207aa656 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -356,25 +356,48 @@ static void early_check_vec5(void) unsigned long root, chosen; int size; const u8 *vec5; + u8 mmu_supported; root = of_get_flat_dt_root(); chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); - if (chosen == -FDT_ERR_NOTFOUND) + if (chosen == -FDT_ERR_NOTFOUND) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; + } vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); - if (!vec5) + if (!vec5) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; - if (size <= OV5_INDX(OV5_MMU_RADIX_300) || - !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) - /* Hypervisor doesn't support radix */ + } + if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + + /* Check for supported configuration */ + mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & + OV5_FEAT(OV5_MMU_SUPPORT); + if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { + /* Hypervisor only supports radix - check enabled && GTSE */ + if (!early_radix_enabled()) { + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & + OV5_FEAT(OV5_RADIX_GTSE))) { + pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); + } + /* Do radix anyway - the hypervisor said we had to */ + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { + /* Hypervisor only supports hash - disable radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + } } void __init mmu_early_init_devtree(void) { /* Disable radix mode based on kernel command line. */ - /* We don't yet have the machinery to do radix as a guest. */ - if (disable_radix || !(mfmsr() & MSR_HV)) + if (disable_radix) cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; /* @@ -383,7 +406,7 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + if (!(mfmsr() & MSR_HV)) early_check_vec5(); if (early_radix_enabled()) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 2a590a98e652..c28165d8970b 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 595dd718ea87..2ff13249f87a 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) sdsync = POWER7P_MMCRA_SDAR_VALID; else if (ppmu->flags & PPMU_ALT_SIPR) sdsync = POWER6_MMCRA_SDSYNC; + else if (ppmu->flags & PPMU_NO_SIAR) + sdsync = MMCRA_SAMPLE_ENABLE; else sdsync = MMCRA_SDSYNC; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index e79fb5fb817d..cd951fd231c4 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -65,12 +65,41 @@ static bool is_event_valid(u64 event) return !(event & ~valid_mask); } -static u64 mmcra_sdar_mode(u64 event) +static inline bool is_event_marked(u64 event) { - if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) - return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + if (event & EVENT_IS_MARKED) + return true; + + return false; +} - return MMCRA_SDAR_MODE_TLB; +static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) +{ + /* + * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in + * continous sampling mode. + * + * Incase of Power8: + * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling + * mode and will be un-changed when setting MMCRA[63] (Marked events). + * + * Incase of Power9: + * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), + * or if group already have any marked events. + * Non-Marked events (for DD1): + * MMCRA[SDAR_MODE] will be set to 0b01 + * For rest + * MMCRA[SDAR_MODE] will be set from event code. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) + *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; + else if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) + *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + *mmcra |= MMCRA_SDAR_MODE_TLB; + } else + *mmcra |= MMCRA_SDAR_MODE_TLB; } static u64 thresh_cmp_val(u64 value) @@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) value |= CNST_L1_QUAL_VAL(cache); } - if (event & EVENT_IS_MARKED) { + if (is_event_marked(event)) { mask |= CNST_SAMPLE_MASK; value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); } @@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, } /* In continuous sampling mode, update SDAR on TLB miss */ - mmcra |= mmcra_sdar_mode(event[i]); + mmcra_sdar_mode(event[i], &mmcra); if (event[i] & EVENT_IS_L1) { cache = event[i] >> EVENT_CACHE_SEL_SHIFT; @@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT; } - if (event[i] & EVENT_IS_MARKED) { + if (is_event_marked(event[i])) { mmcra |= MMCRA_SAMPLE_ENABLE; val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index cf9bd8990159..899210f14ee4 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -246,6 +246,7 @@ #define MMCRA_THR_CMP_SHIFT 32 #define MMCRA_SDAR_MODE_SHIFT 42 #define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT) +#define MMCRA_SDAR_MODE_NO_UPDATES ~(0x3ull << MMCRA_SDAR_MODE_SHIFT) #define MMCRA_IFM_SHIFT 30 /* MMCR1 Threshold Compare bit constant for power9 */ diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6693f75e93d1..da8a0f7a035c 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -39,8 +39,8 @@ opal_tracepoint_refcount: BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,opal_tracepoint_refcount@toc(r2); \ - cmpdi r12,0; \ + ld r11,opal_tracepoint_refcount@toc(r2); \ + cmpdi r11,0; \ bne- LABEL; \ 1: diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 86d9fde93c17..e0f856bfbfe8 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs, struct machine_check_event *evt) { int recovered = 0; - uint64_t ea = get_mce_fault_addr(evt); if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ @@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs, } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { /* Platform corrected itself */ recovered = 1; - } else if (ea && !is_kernel_addr(ea)) { + } else if (evt->severity == MCE_SEV_FATAL) { + /* Fatal machine check */ + pr_err("Machine check interrupt is fatal\n"); + recovered = 0; + } else if ((evt->severity == MCE_SEV_ERROR_SYNC) && + (user_mode(regs) && !is_global_init(current))) { /* - * Faulting address is not in kernel text. We should be fine. - * We need to find which process uses this address. * For now, kill the task if we have received exception when * in userspace. * * TODO: Queue up this address for hwpoisioning later. */ - if (user_mode(regs) && !is_global_init(current)) { - _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); - recovered = 1; - } else - recovered = 0; - } else if (user_mode(regs) && !is_global_init(current) && - evt->severity == MCE_SEV_ERROR_SYNC) { - /* - * If we have received a synchronous error when in userspace - * kill the task. - */ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); recovered = 1; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6901a06da2f9..e36738291c32 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev) } static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, - struct pci_bus *bus) + struct pci_bus *bus, + bool add_to_group) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); set_dma_offset(&dev->dev, pe->tce_bypass_base); - iommu_add_device(&dev->dev); + if (add_to_group) + iommu_add_device(&dev->dev); if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); + pnv_ioda_setup_bus_dma(pe, dev->subordinate, + add_to_group); } } @@ -2191,7 +2194,7 @@ found: set_iommu_table_base(&pe->pdev->dev, tbl); iommu_add_device(&pe->pdev->dev); } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); return; fail: @@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) pnv_pci_ioda2_set_bypass(pe, false); pnv_pci_ioda2_unset_window(&pe->table_group, 0); + if (pe->pbus) + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); pnv_ioda2_table_free(tbl); } @@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) table_group); pnv_pci_ioda2_setup_default_config(pe); + if (pe->pbus) + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { @@ -2624,6 +2631,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); + if ((level_shift - 3) * levels + page_shift >= 60) + return -EINVAL; + /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, levels, tce_table_size, &offset, &total_allocated); @@ -2728,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (pe->flags & PNV_IODA_PE_DEV) iommu_add_device(&pe->pdev->dev); else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); } #ifdef CONFIG_PCI_MSI diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 251060cf1713..8b1fe895daa3 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -751,7 +751,9 @@ void __init hpte_init_pseries(void) mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; - mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; + + if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) + mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; } void radix_init_pseries(void) diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index f9760ccf4032..3696ea6c4826 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -116,13 +116,13 @@ dt_offset: .data .balign 8 -.globl sha256_digest -sha256_digest: +.globl purgatory_sha256_digest +purgatory_sha256_digest: .skip 32 - .size sha256_digest, . - sha256_digest + .size purgatory_sha256_digest, . - purgatory_sha256_digest .balign 8 -.globl sha_regions -sha_regions: +.globl purgatory_sha_regions +purgatory_sha_regions: .skip 8 * 2 * 16 - .size sha_regions, . - sha_regions + .size purgatory_sha_regions, . - purgatory_sha_regions diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ada29eaed6e2..f523ac883150 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -274,7 +274,9 @@ failed: if (bank->disk->major > 0) unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); + if (bank->disk->flags & GENHD_FL_UP) + del_gendisk(bank->disk); + put_disk(bank->disk); } device->dev.platform_data = NULL; if (bank->io_addr != 0) @@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device) device_remove_file(&device->dev, &dev_attr_ecc); free_irq(bank->irq_id, device); del_gendisk(bank->disk); + put_disk(bank->disk); iounmap((void __iomem *) bank->io_addr); kfree(bank); diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index f9670eabfcfa..b53f80f0b4d8 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void) static void icp_opal_set_cpu_priority(unsigned char cppr) { + /* + * Here be dragons. The caller has asked to allow only IPI's and not + * external interrupts. But OPAL XIVE doesn't support that. So instead + * of allowing no interrupts allow all. That's still not right, but + * currently the only caller who does this is xics_migrate_irqs_away() + * and it works in that case. + */ + if (cppr >= DEFAULT_PRIORITY) + cppr = LOWEST_PRIORITY; + xics_set_base_cppr(cppr); opal_int_set_cppr(cppr); iosync(); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 69d858e51ac7..23efe4e42172 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/delay.h> #include <asm/prom.h> #include <asm/io.h> @@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void) /* Remove ourselves from the global interrupt queue */ xics_set_cpu_giq(xics_default_distrib_server, 0); - /* Allow IPIs again... */ - icp_ops->set_priority(DEFAULT_PRIORITY); - for_each_irq_desc(virq, desc) { struct irq_chip *chip; long server; @@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void) unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } + + /* Allow "sufficient" time to drop any inflight IRQ's */ + mdelay(5); + + /* + * Allow IPIs again. This is done at the very end, after migrating all + * interrupts, the expectation is that we'll only get woken up by an IPI + * interrupt beyond this point, but leave externals masked just to be + * safe. If we're using icp-opal this may actually allow all + * interrupts anyway, but that should be OK. + */ + icp_ops->set_priority(DEFAULT_PRIORITY); + } #endif /* CONFIG_HOTPLUG_CPU */ |