summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/.gitignore1
-rw-r--r--arch/powerpc/kernel/Makefile46
-rw-r--r--arch/powerpc/kernel/asm-offsets.c32
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S6
-rw-r--r--arch/powerpc/kernel/cputable.c6
-rw-r--r--arch/powerpc/kernel/crash.c374
-rw-r--r--arch/powerpc/kernel/dawr.c6
-rw-r--r--arch/powerpc/kernel/dbell.c6
-rw-r--r--arch/powerpc/kernel/dma-iommu.c13
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c48
-rw-r--r--arch/powerpc/kernel/early_32.c9
-rw-r--r--arch/powerpc/kernel/eeh.c313
-rw-r--r--arch/powerpc/kernel/eeh_cache.c47
-rw-r--r--arch/powerpc/kernel/eeh_dev.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c304
-rw-r--r--arch/powerpc/kernel/eeh_event.c34
-rw-r--r--arch/powerpc/kernel/eeh_pe.c145
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c40
-rw-r--r--arch/powerpc/kernel/entry_32.S78
-rw-r--r--arch/powerpc/kernel/entry_64.S49
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S34
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S1666
-rw-r--r--arch/powerpc/kernel/fadump.c1353
-rw-r--r--arch/powerpc/kernel/fpu.S3
-rw-r--r--arch/powerpc/kernel/fsl_booke_entry_mapping.S25
-rw-r--r--arch/powerpc/kernel/head_32.S117
-rw-r--r--arch/powerpc/kernel/head_32.h201
-rw-r--r--arch/powerpc/kernel/head_40x.S2
-rw-r--r--arch/powerpc/kernel/head_64.S8
-rw-r--r--arch/powerpc/kernel/head_8xx.S217
-rw-r--r--arch/powerpc/kernel/head_booke.h2
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S68
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c173
-rw-r--r--arch/powerpc/kernel/idle.c25
-rw-r--r--arch/powerpc/kernel/idle_book3s.S20
-rw-r--r--arch/powerpc/kernel/idle_power4.S83
-rw-r--r--arch/powerpc/kernel/ima_arch.c78
-rw-r--r--arch/powerpc/kernel/ima_kexec.c219
-rw-r--r--arch/powerpc/kernel/io-workarounds.c13
-rw-r--r--arch/powerpc/kernel/iommu.c97
-rw-r--r--arch/powerpc/kernel/irq.c26
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c660
-rw-r--r--arch/powerpc/kernel/kvm.c58
-rw-r--r--arch/powerpc/kernel/kvm_emul.S16
-rw-r--r--arch/powerpc/kernel/legacy_serial.c4
-rw-r--r--arch/powerpc/kernel/machine_kexec.c279
-rw-r--r--arch/powerpc/kernel/machine_kexec_32.c69
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c408
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c254
-rw-r--r--arch/powerpc/kernel/mce.c71
-rw-r--r--arch/powerpc/kernel/mce_power.c50
-rw-r--r--arch/powerpc/kernel/misc_32.S623
-rw-r--r--arch/powerpc/kernel/misc_64.S109
-rw-r--r--arch/powerpc/kernel/note.S40
-rw-r--r--arch/powerpc/kernel/paca.c52
-rw-r--r--arch/powerpc/kernel/pci-common.c50
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c8
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/pci_64.c12
-rw-r--r--arch/powerpc/kernel/pci_dn.c68
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c67
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c10
-rw-r--r--arch/powerpc/kernel/process.c121
-rw-r--r--arch/powerpc/kernel/prom.c8
-rw-r--r--arch/powerpc/kernel/prom_init.c147
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh5
-rw-r--r--arch/powerpc/kernel/ptrace.c85
-rw-r--r--arch/powerpc/kernel/rtas-proc.c70
-rw-r--r--arch/powerpc/kernel/rtas.c15
-rw-r--r--arch/powerpc/kernel/rtas_flash.c34
-rw-r--r--arch/powerpc/kernel/rtasd.c14
-rw-r--r--arch/powerpc/kernel/secure_boot.c50
-rw-r--r--arch/powerpc/kernel/security.c123
-rw-r--r--arch/powerpc/kernel/secvar-ops.c17
-rw-r--r--arch/powerpc/kernel/secvar-sysfs.c248
-rw-r--r--arch/powerpc/kernel/setup-common.c37
-rw-r--r--arch/powerpc/kernel/setup.h2
-rw-r--r--arch/powerpc/kernel/setup_32.c22
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/kernel/stacktrace.c2
-rw-r--r--arch/powerpc/kernel/syscalls.c4
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kernel/sysfs.c20
-rw-r--r--arch/powerpc/kernel/time.c14
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c5
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.S1
-rw-r--r--arch/powerpc/kernel/traps.c25
-rw-r--r--arch/powerpc/kernel/ucall.S14
-rw-r--r--arch/powerpc/kernel/udbg.c14
-rw-r--r--arch/powerpc/kernel/vdso.c27
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile4
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S32
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S33
-rw-r--r--arch/powerpc/kernel/vdso32/getcpu.S23
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S130
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S6
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S4
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S15
-rw-r--r--arch/powerpc/kernel/vector.S3
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S39
103 files changed, 4345 insertions, 6006 deletions
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index c5f676c3c224..67ebd3003c05 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1 +1,2 @@
+prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 56dfa7a2a6f2..78a1b22d4fd8 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -5,9 +5,6 @@
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-# Disable clang warning for using setjmp without setjmp.h header
-CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header)
-
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
endif
@@ -22,6 +19,8 @@ CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -ffreestanding
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
@@ -39,7 +38,6 @@ KASAN_SANITIZE_btext.o := n
ifdef CONFIG_KASAN
CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
-CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
endif
@@ -52,7 +50,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o
+ paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@@ -64,8 +62,7 @@ obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
-obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
-obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o
+obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o
procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
@@ -79,6 +76,7 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_FA_DUMP) += fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o
ifdef CONFIG_PPC32
obj-$(CONFIG_E500) += idle_e500.o
endif
@@ -124,14 +122,6 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
pci-common.o pci_of_scan.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \
- machine_kexec_$(BITS).o
-obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o
-ifdef CONFIG_HAVE_IMA_KEXEC
-ifdef CONFIG_IMA
-obj-y += ima_kexec.o
-endif
-endif
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
@@ -155,17 +145,17 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
+obj-y += ucall.o
+endif
+
+obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o secvar-ops.o
+obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
KCOV_INSTRUMENT_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
-GCOV_PROFILE_machine_kexec_64.o := n
-KCOV_INSTRUMENT_machine_kexec_64.o := n
-UBSAN_SANITIZE_machine_kexec_64.o := n
-GCOV_PROFILE_machine_kexec_32.o := n
-KCOV_INSTRUMENT_machine_kexec_32.o := n
-UBSAN_SANITIZE_machine_kexec_32.o := n
GCOV_PROFILE_kprobes.o := n
KCOV_INSTRUMENT_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
@@ -184,15 +174,13 @@ extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
-ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-$(obj)/built-in.a: prom_init_check
+extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
-quiet_cmd_prom_init_check = CALL $<
- cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+quiet_cmd_prom_init_check = PROMCHK $@
+ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
-PHONY += prom_init_check
-prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
- $(call cmd,prom_init_check)
-endif
+$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
+ $(call if_changed,prom_init_check)
+targets += prom_init_check
clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4ccb6b3a7fbd..c25e562f1cd9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -127,6 +127,12 @@ int main(void)
OFFSET(KSP_VSID, thread_struct, ksp_vsid);
#else /* CONFIG_PPC64 */
OFFSET(PGDIR, thread_struct, pgdir);
+#ifdef CONFIG_VMAP_STACK
+ OFFSET(SRR0, thread_struct, srr0);
+ OFFSET(SRR1, thread_struct, srr1);
+ OFFSET(DAR, thread_struct, dar);
+ OFFSET(DSISR, thread_struct, dsisr);
+#endif
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
OFFSET(THREAD_ACC, thread_struct, acc);
@@ -385,28 +391,25 @@ int main(void)
OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
- OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
+ OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec);
+ OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec);
OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+ OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
+#ifdef CONFIG_PPC64
OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
-#ifdef CONFIG_PPC64
OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
- OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
- OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
+ OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec);
+ OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec);
+#endif
+ OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec);
+ OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec);
OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
- OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
- OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
-#else
- OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
- OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
- OFFSET(TSPC32_TV_SEC, timespec, tv_sec);
- OFFSET(TSPC32_TV_NSEC, timespec, tv_nsec);
-#endif
/* timeval/timezone offsets for use by vdso */
OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);
@@ -416,8 +419,10 @@ int main(void)
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
+ DEFINE(CLOCK_MAX, CLOCK_TAI);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+ DEFINE(EINVAL, EINVAL);
+ DEFINE(KTIME_LOW_RES, KTIME_LOW_RES);
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
@@ -506,6 +511,7 @@ int main(void)
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
+ OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 2b4f3ec0acf7..1d308780e0d3 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -231,7 +231,7 @@ _GLOBAL(__setup_cpu_e5500)
blr
#endif
-/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */
+/* flush L1 data cache, it can apply to e500v2, e500mc and e5500 */
_GLOBAL(flush_dcache_L1)
mfmsr r10
wrteei 0
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 3239a9fe6c1c..a460298c7ddb 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -23,6 +23,7 @@ _GLOBAL(__setup_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -37,6 +38,7 @@ _GLOBAL(__restore_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -54,6 +56,7 @@ _GLOBAL(__setup_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -76,6 +79,7 @@ _GLOBAL(__restore_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index bfe5f4a2886b..e745abc5457a 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -569,7 +569,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_32
+#ifdef CONFIG_PPC_BOOK3S_601
{ /* 601 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00010000,
@@ -583,6 +583,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc601",
},
+#endif /* CONFIG_PPC_BOOK3S_601 */
+#ifdef CONFIG_PPC_BOOK3S_6xx
{ /* 603 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00030000,
@@ -1212,7 +1214,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc603",
},
-#endif /* CONFIG_PPC_BOOK3S_32 */
+#endif /* CONFIG_PPC_BOOK3S_6xx */
#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
deleted file mode 100644
index d488311efab1..000000000000
--- a/arch/powerpc/kernel/crash.c
+++ /dev/null
@@ -1,374 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Architecture specific (PPC64) functions for kexec based crash dumps.
- *
- * Copyright (C) 2005, IBM Corp.
- *
- * Created by: Haren Myneni
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/reboot.h>
-#include <linux/kexec.h>
-#include <linux/export.h>
-#include <linux/crash_dump.h>
-#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/machdep.h>
-#include <asm/kexec.h>
-#include <asm/prom.h>
-#include <asm/smp.h>
-#include <asm/setjmp.h>
-#include <asm/debug.h>
-
-/*
- * The primary CPU waits a while for all secondary CPUs to enter. This is to
- * avoid sending an IPI if the secondary CPUs are entering
- * crash_kexec_secondary on their own (eg via a system reset).
- *
- * The secondary timeout has to be longer than the primary. Both timeouts are
- * in milliseconds.
- */
-#define PRIMARY_TIMEOUT 500
-#define SECONDARY_TIMEOUT 1000
-
-#define IPI_TIMEOUT 10000
-#define REAL_MODE_TIMEOUT 10000
-
-static int time_to_dump;
-/*
- * crash_wake_offline should be set to 1 by platforms that intend to wake
- * up offline cpus prior to jumping to a kdump kernel. Currently powernv
- * sets it to 1, since we want to avoid things from happening when an
- * offline CPU wakes up due to something like an HMI (malfunction error),
- * which propagates to all threads.
- */
-int crash_wake_offline;
-
-#define CRASH_HANDLER_MAX 3
-/* List of shutdown handles */
-static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
-static DEFINE_SPINLOCK(crash_handlers_lock);
-
-static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
-static int crash_shutdown_cpu = -1;
-
-static int handle_fault(struct pt_regs *regs)
-{
- if (crash_shutdown_cpu == smp_processor_id())
- longjmp(crash_shutdown_buf, 1);
- return 0;
-}
-
-#ifdef CONFIG_SMP
-
-static atomic_t cpus_in_crash;
-void crash_ipi_callback(struct pt_regs *regs)
-{
- static cpumask_t cpus_state_saved = CPU_MASK_NONE;
-
- int cpu = smp_processor_id();
-
- hard_irq_disable();
- if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
- crash_save_cpu(regs, cpu);
- cpumask_set_cpu(cpu, &cpus_state_saved);
- }
-
- atomic_inc(&cpus_in_crash);
- smp_mb__after_atomic();
-
- /*
- * Starting the kdump boot.
- * This barrier is needed to make sure that all CPUs are stopped.
- */
- while (!time_to_dump)
- cpu_relax();
-
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(1, 1);
-
-#ifdef CONFIG_PPC64
- kexec_smp_wait();
-#else
- for (;;); /* FIXME */
-#endif
-
- /* NOTREACHED */
-}
-
-static void crash_kexec_prepare_cpus(int cpu)
-{
- unsigned int msecs;
- unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
- int tries = 0;
- int (*old_handler)(struct pt_regs *regs);
-
- printk(KERN_EMERG "Sending IPI to other CPUs\n");
-
- if (crash_wake_offline)
- ncpus = num_present_cpus() - 1;
-
- crash_send_ipi(crash_ipi_callback);
- smp_wmb();
-
-again:
- /*
- * FIXME: Until we will have the way to stop other CPUs reliably,
- * the crash CPU will send an IPI and wait for other CPUs to
- * respond.
- */
- msecs = IPI_TIMEOUT;
- while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
- mdelay(1);
-
- /* Would it be better to replace the trap vector here? */
-
- if (atomic_read(&cpus_in_crash) >= ncpus) {
- printk(KERN_EMERG "IPI complete\n");
- return;
- }
-
- printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
- ncpus - atomic_read(&cpus_in_crash));
-
- /*
- * If we have a panic timeout set then we can't wait indefinitely
- * for someone to activate system reset. We also give up on the
- * second time through if system reset fail to work.
- */
- if ((panic_timeout > 0) || (tries > 0))
- return;
-
- /*
- * A system reset will cause all CPUs to take an 0x100 exception.
- * The primary CPU returns here via setjmp, and the secondary
- * CPUs reexecute the crash_kexec_secondary path.
- */
- old_handler = __debugger;
- __debugger = handle_fault;
- crash_shutdown_cpu = smp_processor_id();
-
- if (setjmp(crash_shutdown_buf) == 0) {
- printk(KERN_EMERG "Activate system reset (dumprestart) "
- "to stop other cpu(s)\n");
-
- /*
- * A system reset will force all CPUs to execute the
- * crash code again. We need to reset cpus_in_crash so we
- * wait for everyone to do this.
- */
- atomic_set(&cpus_in_crash, 0);
- smp_mb();
-
- while (atomic_read(&cpus_in_crash) < ncpus)
- cpu_relax();
- }
-
- crash_shutdown_cpu = -1;
- __debugger = old_handler;
-
- tries++;
- goto again;
-}
-
-/*
- * This function will be called by secondary cpus.
- */
-void crash_kexec_secondary(struct pt_regs *regs)
-{
- unsigned long flags;
- int msecs = SECONDARY_TIMEOUT;
-
- local_irq_save(flags);
-
- /* Wait for the primary crash CPU to signal its progress */
- while (crashing_cpu < 0) {
- if (--msecs < 0) {
- /* No response, kdump image may not have been loaded */
- local_irq_restore(flags);
- return;
- }
-
- mdelay(1);
- }
-
- crash_ipi_callback(regs);
-}
-
-#else /* ! CONFIG_SMP */
-
-static void crash_kexec_prepare_cpus(int cpu)
-{
- /*
- * move the secondaries to us so that we can copy
- * the new kernel 0-0x100 safely
- *
- * do this if kexec in setup.c ?
- */
-#ifdef CONFIG_PPC64
- smp_release_cpus();
-#else
- /* FIXME */
-#endif
-}
-
-void crash_kexec_secondary(struct pt_regs *regs)
-{
-}
-#endif /* CONFIG_SMP */
-
-/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
-static void __maybe_unused crash_kexec_wait_realmode(int cpu)
-{
- unsigned int msecs;
- int i;
-
- msecs = REAL_MODE_TIMEOUT;
- for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
- if (i == cpu)
- continue;
-
- while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
- barrier();
- if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
- break;
- msecs--;
- mdelay(1);
- }
- }
- mb();
-}
-#else
-static inline void crash_kexec_wait_realmode(int cpu) {}
-#endif /* CONFIG_SMP && CONFIG_PPC64 */
-
-/*
- * Register a function to be called on shutdown. Only use this if you
- * can't reset your device in the second kernel.
- */
-int crash_shutdown_register(crash_shutdown_t handler)
-{
- unsigned int i, rc;
-
- spin_lock(&crash_handlers_lock);
- for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
- if (!crash_shutdown_handles[i]) {
- /* Insert handle at first empty entry */
- crash_shutdown_handles[i] = handler;
- rc = 0;
- break;
- }
-
- if (i == CRASH_HANDLER_MAX) {
- printk(KERN_ERR "Crash shutdown handles full, "
- "not registered.\n");
- rc = 1;
- }
-
- spin_unlock(&crash_handlers_lock);
- return rc;
-}
-EXPORT_SYMBOL(crash_shutdown_register);
-
-int crash_shutdown_unregister(crash_shutdown_t handler)
-{
- unsigned int i, rc;
-
- spin_lock(&crash_handlers_lock);
- for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
- if (crash_shutdown_handles[i] == handler)
- break;
-
- if (i == CRASH_HANDLER_MAX) {
- printk(KERN_ERR "Crash shutdown handle not found\n");
- rc = 1;
- } else {
- /* Shift handles down */
- for (; i < (CRASH_HANDLER_MAX - 1); i++)
- crash_shutdown_handles[i] =
- crash_shutdown_handles[i+1];
- /*
- * Reset last entry to NULL now that it has been shifted down,
- * this will allow new handles to be added here.
- */
- crash_shutdown_handles[i] = NULL;
- rc = 0;
- }
-
- spin_unlock(&crash_handlers_lock);
- return rc;
-}
-EXPORT_SYMBOL(crash_shutdown_unregister);
-
-void default_machine_crash_shutdown(struct pt_regs *regs)
-{
- unsigned int i;
- int (*old_handler)(struct pt_regs *regs);
-
- /*
- * This function is only called after the system
- * has panicked or is otherwise in a critical state.
- * The minimum amount of code to allow a kexec'd kernel
- * to run successfully needs to happen here.
- *
- * In practice this means stopping other cpus in
- * an SMP system.
- * The kernel is broken so disable interrupts.
- */
- hard_irq_disable();
-
- /*
- * Make a note of crashing cpu. Will be used in machine_kexec
- * such that another IPI will not be sent.
- */
- crashing_cpu = smp_processor_id();
-
- /*
- * If we came in via system reset, wait a while for the secondary
- * CPUs to enter.
- */
- if (TRAP(regs) == 0x100)
- mdelay(PRIMARY_TIMEOUT);
-
- crash_kexec_prepare_cpus(crashing_cpu);
-
- crash_save_cpu(regs, crashing_cpu);
-
- time_to_dump = 1;
-
- crash_kexec_wait_realmode(crashing_cpu);
-
- machine_kexec_mask_interrupts();
-
- /*
- * Call registered shutdown routines safely. Swap out
- * __debugger_fault_handler, and replace on exit.
- */
- old_handler = __debugger_fault_handler;
- __debugger_fault_handler = handle_fault;
- crash_shutdown_cpu = smp_processor_id();
- for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
- if (setjmp(crash_shutdown_buf) == 0) {
- /*
- * Insert syncs and delay to ensure
- * instructions in the dangerous region don't
- * leak away from this protected region.
- */
- asm volatile("sync; isync");
- /* dangerous region */
- crash_shutdown_handles[i]();
- asm volatile("sync; isync");
- }
- }
- crash_shutdown_cpu = -1;
- __debugger_fault_handler = old_handler;
-
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(1, 0);
-}
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 5f66b95b6858..cc14aa6c4a1b 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -30,10 +30,10 @@ int set_dawr(struct arch_hw_breakpoint *brk)
* DAWR length is stored in field MDR bits 48:53. Matches range in
* doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
* 0b111111=64DW.
- * brk->len is in bytes.
+ * brk->hw_len is in bytes.
* This aligns up to double word size, shifts and does the bias.
*/
- mrd = ((brk->len + 7) >> 3) - 1;
+ mrd = ((brk->hw_len + 7) >> 3) - 1;
dawrx |= (mrd & 0x3f) << (63 - 53);
if (ppc_md.set_dawr)
@@ -54,7 +54,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
- struct arch_hw_breakpoint null_brk = {0, 0, 0};
+ struct arch_hw_breakpoint null_brk = {0};
size_t rc;
/* Send error to user if they hypervisor won't allow us to write DAWR */
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 804b1a6196fa..f17ff1200eaa 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -33,7 +33,7 @@ void doorbell_global_ipi(int cpu)
{
u32 tag = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -48,7 +48,7 @@ void doorbell_core_ipi(int cpu)
{
u32 tag = cpu_thread_in_core(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -84,7 +84,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
- kvmppc_set_host_ipi(smp_processor_id(), 0);
+ kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux_relaxed(); /* already performed the barrier */
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index a0879674a9c8..e486d1d78de2 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -122,18 +122,17 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
- if (!tbl) {
- dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
- ", table unavailable\n", mask);
- return 0;
- }
-
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
dev->archdata.iommu_bypass = true;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
+ if (!tbl) {
+ dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
+ return 0;
+ }
+
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
@@ -208,4 +207,6 @@ const struct dma_map_ops dma_iommu_ops = {
.sync_single_for_device = dma_iommu_sync_for_device,
.sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
.sync_sg_for_device = dma_iommu_sync_sg_for_device,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index bd95318d2202..182b4047c1ef 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -101,7 +101,7 @@ static void __restore_cpu_cpufeatures(void)
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
- mtspr(SPRN_PCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
}
mtspr(SPRN_FSCR, system_registers.fscr);
@@ -144,6 +144,7 @@ static void __init cpufeatures_setup_cpu(void)
mtspr(SPRN_HFSCR, 0);
}
mtspr(SPRN_FSCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
/*
* LPCR does not get cleared, to match behaviour with secondaries
@@ -691,31 +692,62 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
return true;
}
+/*
+ * Handle POWER9 broadcast tlbie invalidation issue using
+ * cpu feature flag.
+ */
+static __init void update_tlbie_feature_flag(unsigned long pvr)
+{
+ if (PVR_VER(pvr) == PVR_POWER9) {
+ /*
+ * Set the tlbie feature flag for anything below
+ * Nimbus DD 2.3 and Cumulus DD 1.3
+ */
+ if ((pvr & 0xe000) == 0) {
+ /* Nimbus */
+ if ((pvr & 0xfff) < 0x203)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else if ((pvr & 0xc000) == 0) {
+ /* Cumulus */
+ if ((pvr & 0xfff) < 0x103)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else {
+ WARN_ONCE(1, "Unknown PVR");
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ }
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+ }
+}
+
static __init void cpufeatures_cpu_quirks(void)
{
- int version = mfspr(SPRN_PVR);
+ unsigned long version = mfspr(SPRN_PVR);
/*
* Not all quirks can be derived from the cpufeatures device tree.
*/
- if ((version & 0xffffefff) == 0x004e0200)
- ; /* DD2.0 has no feature flag */
- else if ((version & 0xffffefff) == 0x004e0201)
+ if ((version & 0xffffefff) == 0x004e0200) {
+ /* DD2.0 has no feature flag */
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+ } else if ((version & 0xffffefff) == 0x004e0201) {
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- else if ((version & 0xffffefff) == 0x004e0202) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+ } else if ((version & 0xffffefff) == 0x004e0202) {
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- } else if ((version & 0xffff0000) == 0x004e0000)
+ } else if ((version & 0xffff0000) == 0x004e0000) {
/* DD2.1 and up have DD2_1 */
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ }
if ((version & 0xffff0000) == 0x004e0000) {
cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
}
+ update_tlbie_feature_flag(version);
/*
* PKEY was not in the initial base or feature node
* specification, but it should become optional in the next
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
index 3482118ffe76..ef2ad4945904 100644
--- a/arch/powerpc/kernel/early_32.c
+++ b/arch/powerpc/kernel/early_32.c
@@ -19,10 +19,13 @@
*/
notrace unsigned long __init early_init(unsigned long dt_ptr)
{
- unsigned long offset = reloc_offset();
+ unsigned long kva, offset = reloc_offset();
+
+ kva = *PTRRELOC(&kernstart_virt_addr);
/* First zero the BSS */
- memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+ if (kva == KERNELBASE)
+ memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
/*
* Identify the CPU type and fix up code sections
@@ -32,5 +35,5 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
apply_feature_fixups();
- return KERNELBASE + offset;
+ return kva + offset;
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c0e4b73191f3..17cb3e9b5697 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -150,6 +150,16 @@ static int __init eeh_setup(char *str)
}
__setup("eeh=", eeh_setup);
+void eeh_show_enabled(void)
+{
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ pr_info("EEH: Recovery disabled by kernel parameter.\n");
+ else if (eeh_has_flag(EEH_ENABLED))
+ pr_info("EEH: Capable adapter found: recovery enabled.\n");
+ else
+ pr_info("EEH: No capable adapters found: recovery disabled.\n");
+}
+
/*
* This routine captures assorted PCI configuration space data
* for the indicated PCI device, and puts them into a buffer
@@ -410,11 +420,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
- pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pr_debug("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
- dump_stack();
eeh_send_failure_event(phb_pe);
-
return 1;
out:
eeh_serialize_unlock(flags);
@@ -441,7 +449,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
- struct eeh_pe *pe, *parent_pe, *phb_pe;
+ struct eeh_pe *pe, *parent_pe;
int rc = 0;
const char *location = NULL;
@@ -460,8 +468,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
- pr_debug("EEH: Ignored check for %s\n",
- eeh_pci_name(dev));
+ eeh_edev_dbg(edev, "Ignored check\n");
return 0;
}
@@ -496,17 +503,16 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
rc = 1;
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
- if (pe->check_count % EEH_MAX_FAILS == 0) {
+ if (pe->check_count == EEH_MAX_FAILS) {
dn = pci_device_to_OF_node(dev);
if (dn)
location = of_get_property(dn, "ibm,loc-code",
NULL);
- printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
- "location=%s driver=%s pci addr=%s\n",
+ eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
pe->check_count,
location ? location : "unknown",
- eeh_driver_name(dev), eeh_pci_name(dev));
- printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ eeh_driver_name(dev));
+ eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@@ -573,13 +579,8 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
- phb_pe = eeh_phb_pe_get(pe->phb);
- pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
- pe->phb->global_number, pe->addr);
- pr_err("EEH: PE location: %s, PHB location: %s\n",
- eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
- dump_stack();
-
+ pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
+ __func__, pe->phb->global_number, pe->addr);
eeh_send_failure_event(pe);
return 1;
@@ -697,7 +698,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
return rc;
}
-static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
void *userdata)
{
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
@@ -708,7 +709,7 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* state for the specified device
*/
if (!pdev || pdev == dev)
- return NULL;
+ return;
/* Ensure we have D0 power state */
pci_set_power_state(pdev, PCI_D0);
@@ -721,18 +722,16 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* interrupt from the device
*/
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
-
- return NULL;
}
-static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
+static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
if (!pdev)
- return NULL;
+ return;
/* Apply customization from firmware */
if (pdn && eeh_ops->restore_config)
@@ -741,8 +740,6 @@ static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
-
- return NULL;
}
int eeh_restore_vf_config(struct pci_dn *pdn)
@@ -868,7 +865,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
-static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
+static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
@@ -876,8 +873,6 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
-
- return NULL;
}
static void eeh_pe_refreeze_passed(struct eeh_pe *root)
@@ -1063,23 +1058,6 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
-void eeh_probe_devices(void)
-{
- struct pci_controller *hose, *tmp;
- struct pci_dn *pdn;
-
- /* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- pdn = hose->pci_data;
- traverse_pci_dn(pdn, eeh_ops->probe, NULL);
- }
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else
- pr_info("EEH: No capable adapters found\n");
-
-}
-
/**
* eeh_init - EEH initialization
*
@@ -1120,6 +1098,8 @@ static int eeh_init(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(hose);
+ eeh_addr_cache_init();
+
/* Initialize EEH event */
return eeh_event_init();
}
@@ -1190,15 +1170,14 @@ void eeh_add_device_late(struct pci_dev *dev)
struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev || !eeh_enabled())
+ if (!dev)
return;
- pr_debug("EEH: Adding device %s\n", pci_name(dev));
-
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
edev = pdn_to_eeh_dev(pdn);
+ eeh_edev_dbg(edev, "Adding device\n");
if (edev->pdev == dev) {
- pr_debug("EEH: Already referenced !\n");
+ eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
@@ -1212,7 +1191,6 @@ void eeh_add_device_late(struct pci_dev *dev)
eeh_rmv_from_parent_pe(edev);
eeh_addr_cache_rmv_dev(edev->pdev);
eeh_sysfs_remove_device(edev->pdev);
- edev->mode &= ~EEH_DEV_SYSFS;
/*
* We definitely should have the PCI device removed
@@ -1246,6 +1224,8 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
{
struct pci_dev *dev;
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ return;
list_for_each_entry(dev, &bus->devices, bus_list) {
eeh_add_device_late(dev);
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
@@ -1299,10 +1279,10 @@ void eeh_remove_device(struct pci_dev *dev)
edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
- pr_debug("EEH: Removing device %s\n", pci_name(dev));
+ dev_dbg(&dev->dev, "EEH: Removing device\n");
if (!edev || !edev->pdev || !edev->pe) {
- pr_debug("EEH: Not referenced !\n");
+ dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
return;
}
@@ -1315,17 +1295,11 @@ void eeh_remove_device(struct pci_dev *dev)
edev->pdev = NULL;
/*
- * The flag "in_error" is used to trace EEH devices for VFs
- * in error state or not. It's set in eeh_report_error(). If
- * it's not set, eeh_report_{reset,resume}() won't be called
- * for the VF EEH device.
+ * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to
+ * remove the sysfs files before clearing dev.archdata.edev
*/
- edev->in_error = false;
- dev->dev.archdata.edev = NULL;
- if (!(edev->pe->state & EEH_PE_KEEP))
- eeh_rmv_from_parent_pe(edev);
- else
- edev->mode |= EEH_DEV_DISCONNECTED;
+ if (edev->mode & EEH_DEV_SYSFS)
+ eeh_sysfs_remove_device(dev);
/*
* We're removing from the PCI subsystem, that means
@@ -1336,8 +1310,19 @@ void eeh_remove_device(struct pci_dev *dev)
edev->mode |= EEH_DEV_NO_HANDLER;
eeh_addr_cache_rmv_dev(dev);
- eeh_sysfs_remove_device(dev);
- edev->mode &= ~EEH_DEV_SYSFS;
+
+ /*
+ * The flag "in_error" is used to trace EEH devices for VFs
+ * in error state or not. It's set in eeh_report_error(). If
+ * it's not set, eeh_report_{reset,resume}() won't be called
+ * for the VF EEH device.
+ */
+ edev->in_error = false;
+ dev->dev.archdata.edev = NULL;
+ if (!(edev->pe->state & EEH_PE_KEEP))
+ eeh_rmv_from_parent_pe(edev);
+ else
+ edev->mode |= EEH_DEV_DISCONNECTED;
}
int eeh_unfreeze_pe(struct eeh_pe *pe)
@@ -1890,6 +1875,198 @@ static const struct file_operations eeh_force_recover_fops = {
.llseek = no_llseek,
.write = eeh_force_recover_write,
};
+
+static ssize_t eeh_debugfs_dev_usage(struct file *filp,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ usage, sizeof(usage) - 1);
+}
+
+static ssize_t eeh_dev_check_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ struct eeh_dev *edev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ pci_err(pdev, "No eeh_dev for this device!\n");
+ pci_dev_put(pdev);
+ return -ENODEV;
+ }
+
+ ret = eeh_dev_check_failure(edev);
+ pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
+ domain, bus, dev, fn, ret);
+
+ pci_dev_put(pdev);
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_check_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_check_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
+static int eeh_debugfs_break_device(struct pci_dev *pdev)
+{
+ struct resource *bar = NULL;
+ void __iomem *mapped;
+ u16 old, bit;
+ int i, pos;
+
+ /* Do we have an MMIO BAR to disable? */
+ for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ struct resource *r = &pdev->resource[i];
+
+ if (!r->flags || !r->start)
+ continue;
+ if (r->flags & IORESOURCE_IO)
+ continue;
+ if (r->flags & IORESOURCE_UNSET)
+ continue;
+
+ bar = r;
+ break;
+ }
+
+ if (!bar) {
+ pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
+ return -ENXIO;
+ }
+
+ pci_err(pdev, "Going to break: %pR\n", bar);
+
+ if (pdev->is_virtfn) {
+#ifndef CONFIG_PCI_IOV
+ return -ENXIO;
+#else
+ /*
+ * VFs don't have a per-function COMMAND register, so the best
+ * we can do is clear the Memory Space Enable bit in the PF's
+ * SRIOV control reg.
+ *
+ * Unfortunately, this requires that we have a PF (i.e doesn't
+ * work for a passed-through VF) and it has the potential side
+ * effect of also causing an EEH on every other VF under the
+ * PF. Oh well.
+ */
+ pdev = pdev->physfn;
+ if (!pdev)
+ return -ENXIO; /* passed through VFs have no PF */
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ pos += PCI_SRIOV_CTRL;
+ bit = PCI_SRIOV_CTRL_MSE;
+#endif /* !CONFIG_PCI_IOV */
+ } else {
+ bit = PCI_COMMAND_MEMORY;
+ pos = PCI_COMMAND;
+ }
+
+ /*
+ * Process here is:
+ *
+ * 1. Disable Memory space.
+ *
+ * 2. Perform an MMIO to the device. This should result in an error
+ * (CA / UR) being raised by the device which results in an EEH
+ * PE freeze. Using the in_8() accessor skips the eeh detection hook
+ * so the freeze hook so the EEH Detection machinery won't be
+ * triggered here. This is to match the usual behaviour of EEH
+ * where the HW will asyncronously freeze a PE and it's up to
+ * the kernel to notice and deal with it.
+ *
+ * 3. Turn Memory space back on. This is more important for VFs
+ * since recovery will probably fail if we don't. For normal
+ * the COMMAND register is reset as a part of re-initialising
+ * the device.
+ *
+ * Breaking stuff is the point so who cares if it's racy ;)
+ */
+ pci_read_config_word(pdev, pos, &old);
+
+ mapped = ioremap(bar->start, PAGE_SIZE);
+ if (!mapped) {
+ pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
+ return -ENXIO;
+ }
+
+ pci_write_config_word(pdev, pos, old & ~bit);
+ in_8(mapped);
+ pci_write_config_word(pdev, pos, old);
+
+ iounmap(mapped);
+
+ return 0;
+}
+
+static ssize_t eeh_dev_break_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ ret = eeh_debugfs_break_device(pdev);
+ pci_dev_put(pdev);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_break_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_break_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
#endif
static int __init eeh_init_proc(void)
@@ -1905,6 +2082,12 @@ static int __init eeh_init_proc(void)
debugfs_create_bool("eeh_disable_recovery", 0600,
powerpc_debugfs_root,
&eeh_debugfs_no_recover);
+ debugfs_create_file_unsafe("eeh_dev_check", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_check_fops);
+ debugfs_create_file_unsafe("eeh_dev_break", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_force_recover_fops);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 05ffd32b3416..6b50bf15d8c1 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -148,8 +148,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->pcidev = dev;
piar->flags = flags;
- pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
- &alo, &ahi, pci_name(dev));
+ eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
+ &alo, &ahi);
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -159,18 +159,10 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
- struct pci_dn *pdn;
struct eeh_dev *edev;
int i;
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- if (!pdn) {
- pr_warn("PCI: no pci dn found for dev=%s\n",
- pci_name(dev));
- return;
- }
-
- edev = pdn_to_eeh_dev(pdn);
+ edev = pci_dev_to_eeh_dev(dev);
if (!edev) {
pr_warn("PCI: no EEH dev found for %s\n",
pci_name(dev));
@@ -229,8 +221,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
- pr_debug("PIAR: remove range=[%pap:%pap] dev=%s\n",
- &piar->addr_lo, &piar->addr_hi, pci_name(dev));
+ eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
+ &piar->addr_lo, &piar->addr_hi);
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -258,37 +250,14 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
}
/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
+ * eeh_addr_cache_init - Initialize a cache of I/O addresses
*
- * Build a cache of pci i/o addresses. This cache will be used to
+ * Initialize a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
*/
-void eeh_addr_cache_build(void)
+void eeh_addr_cache_init(void)
{
- struct pci_dn *pdn;
- struct eeh_dev *edev;
- struct pci_dev *dev = NULL;
-
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-
- for_each_pci_dev(dev) {
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- if (!pdn)
- continue;
-
- edev = pdn_to_eeh_dev(pdn);
- if (!edev)
- continue;
-
- dev->dev.archdata.edev = edev;
- edev->pdev = dev;
-
- eeh_addr_cache_insert_dev(dev);
- eeh_sysfs_add_device(dev);
- }
}
static int eeh_addr_cache_show(struct seq_file *s, void *v)
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index c4317c452d98..7370185c7a05 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -47,6 +47,8 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
+ edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+ edev->controller = pdn->phb;
return edev;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 89623962c727..a1eaffe868de 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PCI Error Recovery Driver for RPA-compliant PPC64 platform.
* Copyright IBM Corp. 2004 2005
* Copyright Linas Vepstas <linas@linas.org> 2004, 2005
*
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
* Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
#include <linux/delay.h>
@@ -27,6 +11,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
@@ -81,23 +66,6 @@ static const char *pci_ers_result_name(enum pci_ers_result result)
}
};
-static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
- edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
-
- va_end(args);
-}
-
static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
enum pci_ers_result new)
{
@@ -113,8 +81,16 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
static bool eeh_edev_actionable(struct eeh_dev *edev)
{
- return (edev->pdev && !eeh_dev_removed(edev) &&
- !eeh_pe_passed(edev->pe));
+ if (!edev->pdev)
+ return false;
+ if (edev->pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+ if (eeh_dev_removed(edev))
+ return false;
+ if (eeh_pe_passed(edev->pe))
+ return false;
+
+ return true;
}
/**
@@ -214,12 +190,12 @@ static void eeh_enable_irq(struct eeh_dev *edev)
}
}
-static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* We cannot access the config space on some adapters.
@@ -229,14 +205,13 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
* device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
- return NULL;
+ return;
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_save_state(pdev);
- return NULL;
}
static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
@@ -274,20 +249,27 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
}
typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+ struct pci_dev *,
struct pci_driver *);
static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
enum pci_ers_result *result)
{
+ struct pci_dev *pdev;
struct pci_driver *driver;
enum pci_ers_result new_result;
- if (!edev->pdev) {
+ pci_lock_rescan_remove();
+ pdev = edev->pdev;
+ if (pdev)
+ get_device(&pdev->dev);
+ pci_unlock_rescan_remove();
+ if (!pdev) {
eeh_edev_info(edev, "no device");
return;
}
- device_lock(&edev->pdev->dev);
+ device_lock(&pdev->dev);
if (eeh_edev_actionable(edev)) {
- driver = eeh_pcid_get(edev->pdev);
+ driver = eeh_pcid_get(pdev);
if (!driver)
eeh_edev_info(edev, "no driver");
@@ -296,7 +278,7 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
else if (edev->mode & EEH_DEV_NO_HANDLER)
eeh_edev_info(edev, "driver bound too late");
else {
- new_result = fn(edev, driver);
+ new_result = fn(edev, pdev, driver);
eeh_edev_info(edev, "%s driver reports: '%s'",
driver->name,
pci_ers_result_name(new_result));
@@ -305,12 +287,15 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
new_result);
}
if (driver)
- eeh_pcid_put(edev->pdev);
+ eeh_pcid_put(pdev);
} else {
- eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
+ eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
!eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
}
- device_unlock(&edev->pdev->dev);
+ device_unlock(&pdev->dev);
+ if (edev->pdev != pdev)
+ eeh_edev_warn(edev, "Device changed during processing!\n");
+ put_device(&pdev->dev);
}
static void eeh_pe_report(const char *name, struct eeh_pe *root,
@@ -337,20 +322,20 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root,
* Report an EEH error to each device driver.
*/
static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
- struct pci_dev *dev = edev->pdev;
if (!driver->err_handler->error_detected)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
driver->name);
- rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
+ rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
edev->in_error = true;
- pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
return rc;
}
@@ -363,12 +348,13 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
* are now enabled.
*/
static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->mmio_enabled)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
- return driver->err_handler->mmio_enabled(edev->pdev);
+ return driver->err_handler->mmio_enabled(pdev);
}
/**
@@ -382,20 +368,21 @@ static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
* driver can work again while the device is recovered.
*/
static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->slot_reset || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
- return driver->err_handler->slot_reset(edev->pdev);
+ return driver->err_handler->slot_reset(pdev);
}
-static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* The content in the config space isn't saved because
@@ -407,15 +394,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
- return NULL;
+ return;
}
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_restore_state(pdev);
- return NULL;
}
/**
@@ -428,13 +414,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
* to make the recovered device work again.
*/
static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->resume || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
- driver->err_handler->resume(edev->pdev);
+ driver->err_handler->resume(pdev);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
@@ -453,6 +440,7 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
* dead, and that no further recovery attempts will be made on it.
*/
static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
@@ -462,10 +450,10 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
driver->name);
- rc = driver->err_handler->error_detected(edev->pdev,
+ rc = driver->err_handler->error_detected(pdev,
pci_channel_io_perm_failure);
- pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
return rc;
}
@@ -473,12 +461,9 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!(edev->physfn)) {
- pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
- __func__, pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ eeh_edev_warn(edev, "Not for VF\n");
return NULL;
}
@@ -492,12 +477,12 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
}
#ifdef CONFIG_PCI_IOV
- pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
+ pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
#endif
return NULL;
}
-static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
+static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
@@ -512,7 +497,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
*/
if (!eeh_edev_actionable(edev) ||
(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
- return NULL;
+ return;
if (rmv_data) {
driver = eeh_pcid_get(dev);
@@ -521,7 +506,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
driver->err_handler->error_detected &&
driver->err_handler->slot_reset) {
eeh_pcid_put(dev);
- return NULL;
+ return;
}
eeh_pcid_put(dev);
}
@@ -540,12 +525,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
edev->pdev = NULL;
-
- /*
- * We have to set the VF PE number to invalid one, which is
- * required to plug the VF successfully.
- */
- pdn->pe_number = IODA_INVALID_PE;
#endif
if (rmv_data)
list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
@@ -554,8 +533,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pci_stop_and_remove_bus_device(dev);
pci_unlock_rescan_remove();
}
-
- return NULL;
}
static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
@@ -744,6 +721,99 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
#define MAX_WAIT_FOR_RECOVERY 300
+
+/* Walks the PE tree after processing an event to remove any stale PEs.
+ *
+ * NB: This needs to be recursive to ensure the leaf PEs get removed
+ * before their parents do. Although this is possible to do recursively
+ * we don't since this is easier to read and we need to garantee
+ * the leaf nodes will be handled first.
+ */
+static void eeh_pe_cleanup(struct eeh_pe *pe)
+{
+ struct eeh_pe *child_pe, *tmp;
+
+ list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
+ eeh_pe_cleanup(child_pe);
+
+ if (pe->state & EEH_PE_KEEP)
+ return;
+
+ if (!(pe->state & EEH_PE_INVALID))
+ return;
+
+ if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ }
+}
+
+/**
+ * eeh_check_slot_presence - Check if a device is still present in a slot
+ * @pdev: pci_dev to check
+ *
+ * This function may return a false positive if we can't determine the slot's
+ * presence state. This might happen for for PCIe slots if the PE containing
+ * the upstream bridge is also frozen, or the bridge is part of the same PE
+ * as the device.
+ *
+ * This shouldn't happen often, but you might see it if you hotplug a PCIe
+ * switch.
+ */
+static bool eeh_slot_presence_check(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+ u8 state;
+ int rc;
+
+ if (!pdev)
+ return false;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return true;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->get_adapter_status)
+ return true;
+
+ /* set the attention indicator while we've got the slot ops */
+ if (ops->set_attention_status)
+ ops->set_attention_status(slot->hotplug, 1);
+
+ rc = ops->get_adapter_status(slot->hotplug, &state);
+ if (rc)
+ return true;
+
+ return !!state;
+}
+
+static void eeh_clear_slot_attention(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+
+ if (!pdev)
+ return;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->set_attention_status)
+ return;
+
+ ops->set_attention_status(slot->hotplug, 0);
+}
+
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
* @pe: EEH PE - which should not be used after we return, as it may
@@ -774,6 +844,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data =
{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
+ int devices = 0;
bus = eeh_pe_bus_get(pe);
if (!bus) {
@@ -782,7 +853,59 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ /*
+ * When devices are hot-removed we might get an EEH due to
+ * a driver attempting to touch the MMIO space of a removed
+ * device. In this case we don't have a device to recover
+ * so suppress the event if we can't find any present devices.
+ *
+ * The hotplug driver should take care of tearing down the
+ * device itself.
+ */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ if (eeh_slot_presence_check(edev->pdev))
+ devices++;
+
+ if (!devices) {
+ pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+ pe->phb->global_number, pe->addr);
+ goto out; /* nothing to recover */
+ }
+
+ /* Log the event */
+ if (pe->type & EEH_PE_PHB) {
+ pr_err("EEH: Recovering PHB#%x, location: %s\n",
+ pe->phb->global_number, eeh_pe_loc_get(pe));
+ } else {
+ struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+ pr_err("EEH: Recovering PHB#%x-PE#%x\n",
+ pe->phb->global_number, pe->addr);
+ pr_err("EEH: PE location: %s, PHB location: %s\n",
+ eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+ }
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Print the saved stack trace now that we've verified there's
+ * something to recover.
+ */
+ if (pe->trace_entries) {
+ void **ptrs = (void **) pe->stack_trace;
+ int i;
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+
+ /* FIXME: Use the same format as dump_stack() */
+ pr_err("EEH: Call Trace:\n");
+ for (i = 0; i < pe->trace_entries; i++)
+ pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
+
+ pe->trace_entries = 0;
+ }
+#endif /* CONFIG_STACKTRACE */
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
@@ -793,6 +916,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_DISCONNECT;
}
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
* to accomplish the reset. Each child gets a report of the
@@ -969,6 +1096,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
}
+
+out:
+ /*
+ * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
+ * we don't want to modify the PE tree structure so we do it here.
+ */
+ eeh_pe_cleanup(pe);
+
+ /* clear the slot attention LED for all recovered devices */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ eeh_clear_slot_attention(edev->pdev);
+
eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
@@ -981,7 +1121,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
*/
void eeh_handle_special_event(void)
{
- struct eeh_pe *pe, *phb_pe;
+ struct eeh_pe *pe, *phb_pe, *tmp_pe;
+ struct eeh_dev *edev, *tmp_edev;
struct pci_bus *bus;
struct pci_controller *hose;
unsigned long flags;
@@ -1040,6 +1181,7 @@ void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
@@ -1050,6 +1192,10 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 64cfbe41174b..a7a8dc182efb 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -40,7 +40,6 @@ static int eeh_event_handler(void * dummy)
{
unsigned long flags;
struct eeh_event *event;
- struct eeh_pe *pe;
while (!kthread_should_stop()) {
if (wait_for_completion_interruptible(&eeh_eventlist_event))
@@ -59,19 +58,10 @@ static int eeh_event_handler(void * dummy)
continue;
/* We might have event without binding PE */
- pe = event->pe;
- if (pe) {
- if (pe->type & EEH_PE_PHB)
- pr_info("EEH: Detected error on PHB#%x\n",
- pe->phb->global_number);
- else
- pr_info("EEH: Detected PCI bus error on "
- "PHB#%x-PE#%x\n",
- pe->phb->global_number, pe->addr);
- eeh_handle_normal_event(pe);
- } else {
+ if (event->pe)
+ eeh_handle_normal_event(event->pe);
+ else
eeh_handle_special_event();
- }
kfree(event);
}
@@ -121,6 +111,24 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
}
event->pe = pe;
+ /*
+ * Mark the PE as recovering before inserting it in the queue.
+ * This prevents the PE from being free()ed by a hotplug driver
+ * while the PE is sitting in the event queue.
+ */
+ if (pe) {
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Save the current stack trace so we can dump it from the
+ * event handler thread.
+ */
+ pe->trace_entries = stack_trace_save(pe->stack_trace,
+ ARRAY_SIZE(pe->stack_trace), 0);
+#endif /* CONFIG_STACKTRACE */
+
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ }
+
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_add(&event->list, &eeh_eventlist);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 854cef7b18f4..177852e39a25 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -231,29 +231,22 @@ void *eeh_pe_traverse(struct eeh_pe *root,
* The function is used to traverse the devices of the specified
* PE and its child PEs.
*/
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
+void eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
- void *ret;
if (!root) {
pr_warn("%s: Invalid PE %p\n",
__func__, root);
- return NULL;
+ return;
}
/* Traverse root PE */
- eeh_for_each_pe(root, pe) {
- eeh_pe_for_each_dev(pe, edev, tmp) {
- ret = fn(edev, flag);
- if (ret)
- return ret;
- }
- }
-
- return NULL;
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ fn(edev, flag);
}
/**
@@ -379,8 +372,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
- __func__, config_addr, pdn->phb->global_number);
+ eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
return -EINVAL;
}
@@ -391,42 +383,34 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* components.
*/
pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
- if (pe && !(pe->type & EEH_PE_INVALID)) {
- /* Mark the PE as type of PCI bus */
- pe->type = EEH_PE_BUS;
- edev->pe = pe;
-
- /* Put the edev to PE */
- list_add_tail(&edev->entry, &pe->edevs);
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr);
- return 0;
- } else if (pe && (pe->type & EEH_PE_INVALID)) {
- list_add_tail(&edev->entry, &pe->edevs);
- edev->pe = pe;
- /*
- * We're running to here because of PCI hotplug caused by
- * EEH recovery. We need clear EEH_PE_INVALID until the top.
- */
- parent = pe;
- while (parent) {
- if (!(parent->type & EEH_PE_INVALID))
- break;
- parent->type &= ~EEH_PE_INVALID;
- parent = parent->parent;
- }
+ if (pe) {
+ if (pe->type & EEH_PE_INVALID) {
+ list_add_tail(&edev->entry, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~EEH_PE_INVALID;
+ parent = parent->parent;
+ }
+
+ eeh_edev_dbg(edev,
+ "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
+ } else {
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
- "PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ /* Put the edev to PE */
+ list_add_tail(&edev->entry, &pe->edevs);
+ eeh_edev_dbg(edev, "Added to bus PE\n");
+ }
return 0;
}
@@ -468,13 +452,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
- "Device PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
return 0;
}
@@ -491,16 +470,12 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
+ bool keep, recover;
int cnt;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pe = eeh_dev_to_pe(edev);
if (!pe) {
- pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "No PE found for device.\n");
return -EEXIST;
}
@@ -516,10 +491,21 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
while (1) {
parent = pe->parent;
+
+ /* PHB PEs should never be removed */
if (pe->type & EEH_PE_PHB)
break;
- if (!(pe->state & EEH_PE_KEEP)) {
+ /*
+ * XXX: KEEP is set while resetting a PE. I don't think it's
+ * ever set without RECOVERING also being set. I could
+ * be wrong though so catch that with a WARN.
+ */
+ keep = !!(pe->state & EEH_PE_KEEP);
+ recover = !!(pe->state & EEH_PE_RECOVERING);
+ WARN_ON(keep && !recover);
+
+ if (!keep && !recover) {
if (list_empty(&pe->edevs) &&
list_empty(&pe->child_list)) {
list_del(&pe->child);
@@ -528,6 +514,15 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
break;
}
} else {
+ /*
+ * Mark the PE as invalid. At the end of the recovery
+ * process any invalid PEs will be garbage collected.
+ *
+ * We need to delay the free()ing of them since we can
+ * remove edev's while traversing the PE tree which
+ * might trigger the removal of a PE and we can't
+ * deal with that (yet).
+ */
if (list_empty(&pe->edevs)) {
cnt = 0;
list_for_each_entry(child, &pe->child_list, child) {
@@ -623,13 +618,11 @@ void eeh_pe_mark_isolated(struct eeh_pe *root)
}
EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
-static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
+static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
int mode = *((int *)flag);
edev->mode |= mode;
-
- return NULL;
}
/**
@@ -717,17 +710,13 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
return;
- pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "Checking PCIe link...\n");
/* Check slot status */
cap = edev->pcie_cap;
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
- pr_debug(" No card in the slot (0x%04x) !\n", val);
+ eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
@@ -736,7 +725,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (val & PCI_EXP_SLTCAP_PCP) {
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
- pr_debug(" In power-off state, power it on ...\n");
+ eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
@@ -752,7 +741,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
/* Check link */
eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
- pr_debug(" No link reporting capability (0x%08x) \n", val);
+ eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
msleep(1000);
return;
}
@@ -769,10 +758,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
}
if (val & PCI_EXP_LNKSTA_DLLLA)
- pr_debug(" Link up (%s)\n",
+ eeh_edev_dbg(edev, "Link up (%s)\n",
(val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
else
- pr_debug(" Link not ready (0x%04x)\n", val);
+ eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
}
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
@@ -852,7 +841,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
+static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
@@ -864,8 +853,6 @@ static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
if (eeh_ops->restore_config && pdn)
eeh_ops->restore_config(pdn);
-
- return NULL;
}
/**
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 3fa04dda1737..4fb0f1e1017a 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
* Copyright IBM Corporation 2007
* Copyright Linas Vepstas <linas@austin.ibm.com> 2007
*
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
* Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
#include <linux/pci.h>
@@ -30,7 +14,7 @@
/**
* EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
* @_name: name of file in sysfs directory
- * @_memb: name of member in struct pci_dn to access
+ * @_memb: name of member in struct eeh_dev to access
* @_format: printf format for display
*
* All of the attributes look very similar, so just
@@ -91,7 +75,7 @@ static ssize_t eeh_pe_state_store(struct device *dev,
static DEVICE_ATTR_RW(eeh_pe_state);
-#ifdef CONFIG_PCI_IOV
+#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PPC_PSERIES)
static ssize_t eeh_notify_resume_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -102,7 +86,6 @@ static ssize_t eeh_notify_resume_show(struct device *dev,
if (!edev || !edev->pe)
return -ENODEV;
- pdn = pci_get_pdn(pdev);
return sprintf(buf, "%d\n", pdn->last_allow_rc);
}
@@ -148,7 +131,7 @@ static void eeh_notify_resume_remove(struct pci_dev *pdev)
#else
static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
-#endif /* CONFIG_PCI_IOV */
+#endif /* CONFIG_PCI_IOV && CONFIG PPC_PSERIES*/
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
@@ -176,22 +159,23 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ WARN_ON(eeh_enabled());
+ return;
+ }
+
+ edev->mode &= ~EEH_DEV_SYSFS;
+
/*
* The parent directory might have been removed. We needn't
* continue for that case.
*/
- if (!pdev->dev.kobj.sd) {
- if (edev)
- edev->mode &= ~EEH_DEV_SYSFS;
+ if (!pdev->dev.kobj.sd)
return;
- }
device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
eeh_notify_resume_remove(pdev);
-
- if (edev)
- edev->mode &= ~EEH_DEV_SYSFS;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 54fab22c9a43..0713daa651d9 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -140,6 +140,7 @@ transfer_to_handler:
stw r12,_CTR(r11)
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD
+ tovirt_vmstack r12, r12
beq 2f /* if from user, fix up THREAD.regs */
addi r2, r12, -THREAD
addi r11,r1,STACK_FRAME_OVERHEAD
@@ -179,11 +180,13 @@ transfer_to_handler:
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
- kuap_save_and_lock r11, r12, r9, r2, r0
+ kuap_save_and_lock r11, r12, r9, r2, r6
addi r2, r12, -THREAD
+#ifndef CONFIG_VMAP_STACK
lwz r9,KSP_LIMIT(r12)
cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
+#endif
5:
#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
lwz r12,TI_LOCAL_FLAGS(r2)
@@ -195,7 +198,8 @@ transfer_to_handler:
transfer_to_handler_cont:
3:
mflr r9
- tovirt(r2, r2) /* set r2 to current */
+ tovirt_novmstack r2, r2 /* set r2 to current */
+ tovirt_vmstack r9, r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
@@ -210,7 +214,7 @@ transfer_to_handler_cont:
* To speed up the syscall path where interrupts stay on, let's check
* first if we are changing the MSR value at all.
*/
- tophys(r12, r1)
+ tophys_novmstack r12, r1
lwz r12,_MSR(r12)
andi. r12,r12,MSR_EE
bne 1f
@@ -230,7 +234,7 @@ transfer_to_handler_cont:
*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
- LOAD_MSR_KERNEL(r0, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
SYNC
@@ -284,9 +288,11 @@ reenable_mmu:
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
kuap_restore r11, r2, r3, r4, r5
+ lwz r2, GPR2(r11)
b fast_exception_return
#endif
+#ifndef CONFIG_VMAP_STACK
/*
* On kernel stack overflow, load up an initial stack pointer
* and call StackOverflow(regs), which should not return.
@@ -304,7 +310,7 @@ stack_ovf:
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@@ -312,6 +318,7 @@ stack_ovf:
mtspr SPRN_SRR1,r10
SYNC
RFI
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
trace_syscall_entry_irq_off:
@@ -324,7 +331,7 @@ trace_syscall_entry_irq_off:
bl trace_hardirqs_on
/* Now enable for real */
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr r10
REST_GPR(0, r1)
@@ -394,10 +401,10 @@ ret_from_syscall:
#endif
mr r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
- MTMSRD(r10)
+ mtmsr r10
lwz r9,TI_FLAGS(r2)
li r8,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
@@ -554,7 +561,7 @@ syscall_exit_work:
*/
ori r10,r10,MSR_EE
SYNC
- MTMSRD(r10)
+ mtmsr r10
/* Save NVGPRS if they're not saved already */
lwz r4,_TRAP(r1)
@@ -621,7 +628,6 @@ ppc_swapcontext:
*/
.globl handle_page_fault
handle_page_fault:
- stw r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S_32
andis. r0,r5,DSISR_DABRMATCH@h
@@ -697,7 +703,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
and. r0,r0,r11 /* FP or altivec or SPE enabled? */
beq+ 1f
andc r11,r11,r0
- MTMSRD(r11)
+ mtmsr r11
isync
1: stw r11,_MSR(r1)
mfcr r10
@@ -777,11 +783,19 @@ fast_exception_return:
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
+#if CONFIG_PPC_BOOK3S_601
+ bge 2b
+#else
bge 3f
+#endif
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
+#if CONFIG_PPC_BOOK3S_601
+ blt 2b
+#else
blt 3f
+#endif
lis r3,fee_restarts@ha
tophys(r3,r3)
lwz r5,fee_restarts@l(r3)
@@ -800,9 +814,6 @@ fee_restarts:
/* aargh, we don't know which trap this is */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
-BEGIN_FTR_SECTION
- b 2b
-END_FTR_SECTION_IFSET(CPU_FTR_601)
li r10,-1
stw r10,_TRAP(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -824,9 +835,9 @@ ret_from_except:
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
- MTMSRD(r10) /* disable interrupts */
+ mtmsr r10 /* disable interrupts */
lwz r3,_MSR(r1) /* Returning to user mode? */
andi. r0,r3,MSR_PR
@@ -892,7 +903,7 @@ resume_kernel:
bne- 0b
1:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r2)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
@@ -916,7 +927,7 @@ resume_kernel:
*/
bl trace_hardirqs_on
#endif
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
restore_kuap:
kuap_restore r1, r2, r9, r10, r0
@@ -991,9 +1002,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
- MTMSRD(r10) /* clear the RI bit */
+ mtmsr r10 /* clear the RI bit */
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
@@ -1066,7 +1077,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@@ -1229,16 +1240,16 @@ do_resched: /* r10 contains MSR_KERNEL here */
#endif
ori r10,r10,MSR_EE
SYNC
- MTMSRD(r10) /* hard-enable interrupts */
+ mtmsr r10 /* hard-enable interrupts */
bl schedule
recheck:
/* Note: And we don't tell it we are disabling them again
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
- MTMSRD(r10) /* disable interrupts */
+ mtmsr r10 /* disable interrupts */
lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_NEED_RESCHED
bne- do_resched
@@ -1247,7 +1258,7 @@ recheck:
do_user_signal: /* r10 contains MSR_KERNEL here */
ori r10,r10,MSR_EE
SYNC
- MTMSRD(r10) /* hard-enable interrupts */
+ mtmsr r10 /* hard-enable interrupts */
/* save r13-r31 in the exception frame, if not already done */
lwz r3,_TRAP(r1)
andi. r0,r3,1
@@ -1270,11 +1281,19 @@ nonrecoverable:
lis r10,exc_exit_restart_end@ha
addi r10,r10,exc_exit_restart_end@l
cmplw r12,r10
+#ifdef CONFIG_PPC_BOOK3S_601
+ bgelr
+#else
bge 3f
+#endif
lis r11,exc_exit_restart@ha
addi r11,r11,exc_exit_restart@l
cmplw r12,r11
+#ifdef CONFIG_PPC_BOOK3S_601
+ bltlr
+#else
blt 3f
+#endif
lis r10,ee_restarts@ha
lwz r12,ee_restarts@l(r10)
addi r12,r12,1
@@ -1283,9 +1302,6 @@ nonrecoverable:
blr
3: /* OK, we can't recover, kill this process */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
beq 5f
@@ -1324,14 +1340,14 @@ _GLOBAL(enter_rtas)
lis r6,1f@ha /* physical return address for rtas */
addi r6,r6,1f@l
tophys(r6,r6)
- tophys(r7,r1)
+ tophys_novmstack r7, r1
lwz r8,RTASENTRY(r4)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
- LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
- MTMSRD(r0) /* don't get trashed */
+ mtmsr r0 /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlr r6
stw r7, THREAD + RTAS_SP(r2)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0a0b5310f54a..6ba675b0cf7d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -69,24 +69,20 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- andi. r10,r12,MSR_PR
mr r10,r1
- addi r1,r1,-INT_FRAME_SIZE
- beq- 1f
ld r1,PACAKSAVE(r13)
-1: std r10,0(r1)
+ std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
- beq 2f /* if from kernel mode */
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
-2: std r2,GPR2(r1)
+ std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
std r4,GPR4(r1)
@@ -122,14 +118,13 @@ END_BTB_FLUSH_SECTION
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
- beq 33f
- /* if from user, see if there are any DTL entries to process */
+ /* see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
addi r10,r10,LPPACA_DTLIDX
LDX_BE r10,0,r10 /* get log write index */
- cmpd cr1,r11,r10
- beq+ cr1,33f
+ cmpd r11,r10
+ beq+ 33f
bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
@@ -203,6 +198,7 @@ system_call: /* label this so stack traces look sane */
mtctr r12
bctrl /* Call handler */
+ /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
std r3,RESULT(r1)
@@ -216,11 +212,6 @@ system_call: /* label this so stack traces look sane */
ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- andi. r10,r8,MSR_RI
- beq- .Lunrecov_restore
-#endif
/*
* This is a few instructions into the actual syscall exit path (which actually
@@ -546,6 +537,7 @@ flush_count_cache:
/* Save LR into r9 */
mflr r9
+ // Flush the link stack
.rept 64
bl .+4
.endr
@@ -555,6 +547,11 @@ flush_count_cache:
.balign 32
/* Restore LR */
1: mtlr r9
+
+ // If we're just flushing the link stack, return here
+3: nop
+ patch_site 3b patch__flush_link_stack_return
+
li r9,0x7fff
mtctr r9
@@ -600,8 +597,7 @@ _GLOBAL(_switch)
std r0,16(r1)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* r3-r13 are caller saved -- Cort */
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_NVGPRS(r1)
std r0,_NIP(r1) /* Return to switch caller */
mfcr r23
std r23,_CCR(r1)
@@ -725,8 +721,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtcrf 0xFF,r6
/* r3-r13 are destroyed -- Cort */
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
+ REST_NVGPRS(r1)
/* convert old thread to its task_struct for return value */
addi r3,r3,-THREAD
@@ -849,7 +844,7 @@ resume_kernel:
bne- 0b
1:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/* Check if we need to preempt */
andi. r0,r4,_TIF_NEED_RESCHED
beq+ restore
@@ -880,7 +875,7 @@ resume_kernel:
li r10,MSR_RI
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
.globl fast_exc_return_irq
fast_exc_return_irq:
@@ -1158,8 +1153,7 @@ _GLOBAL(enter_rtas)
*/
SAVE_GPR(2, r1) /* Save the TOC */
SAVE_GPR(13, r1) /* Save paca */
- SAVE_8GPRS(14, r1) /* Save the non-volatiles */
- SAVE_10GPRS(22, r1) /* ditto */
+ SAVE_NVGPRS(r1) /* Save the non-volatiles */
mfcr r4
std r4,_CCR(r1)
@@ -1266,8 +1260,7 @@ rtas_restore_regs:
/* relocation is on at this point */
REST_GPR(2, r1) /* Restore the TOC */
REST_GPR(13, r1) /* Restore paca */
- REST_8GPRS(14, r1) /* Restore the non-volatiles */
- REST_10GPRS(22, r1) /* ditto */
+ REST_NVGPRS(r1) /* Restore the non-volatiles */
GET_PACA(r13)
@@ -1301,8 +1294,7 @@ _GLOBAL(enter_prom)
*/
SAVE_GPR(2, r1)
SAVE_GPR(13, r1)
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_NVGPRS(r1)
mfcr r10
mfmsr r11
std r10,_CCR(r1)
@@ -1346,8 +1338,7 @@ _GLOBAL(enter_prom)
/* Restore other registers */
REST_GPR(2, r1)
REST_GPR(13, r1)
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
+ REST_NVGPRS(r1)
ld r4,_CCR(r1)
mtcr r4
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..e4076e3c072d 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -750,12 +750,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -820,12 +822,14 @@ kernel_dbg_exc:
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -1342,16 +1346,6 @@ skpinv: addi r6,r6,1 /* Increment */
sync
isync
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED MAS2_M
-#else
-#define M_IF_NEEDED 0
-#endif
-
/* 6. Setup KERNELBASE mapping in TLB[0]
*
* r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
@@ -1364,7 +1358,7 @@ skpinv: addi r6,r6,1 /* Increment */
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
mtspr SPRN_MAS1,r6
- LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_NEEDED)
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED)
mtspr SPRN_MAS2,r6
rlwinm r5,r5,0,0,25
@@ -1449,7 +1443,7 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r3,1f)
+ LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
mtctr r3
bctr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6ba3cc2ef8ab..ffc15f4f079d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -44,6 +44,58 @@
#endif
/*
+ * Following are fixed section helper macros.
+ *
+ * EXC_REAL_BEGIN/END - real, unrelocated exception vectors
+ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
+ * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
+ * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
+ * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
+ * EXC_COMMON - After switching to virtual, relocated mode.
+ */
+
+#define EXC_REAL_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_REAL_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_VIRT_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_VIRT_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_COMMON_BEGIN(name) \
+ USE_TEXT_SECTION(); \
+ .balign IFETCH_ALIGN_BYTES; \
+ .global name; \
+ _ASM_NOKPROBE_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name); \
+name:
+
+#define TRAMP_REAL_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
+
+#define TRAMP_VIRT_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#define TRAMP_KVM_BEGIN(name) \
+ TRAMP_VIRT_BEGIN(name)
+#else
+#define TRAMP_KVM_BEGIN(name)
+#endif
+
+#define EXC_REAL_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
+
+#define EXC_VIRT_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
+
+/*
* We're short on space and time in the exception prolog, so we can't
* use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
* Instead we get the base of the kernel from paca->kernelbase and or in the low
@@ -68,6 +120,7 @@
addis reg,reg,(ABS_ADDR(label))@h
/* Exception register prefixes */
+#define EXC_HV_OR_STD 2 /* depends on HVMODE */
#define EXC_HV 1
#define EXC_STD 0
@@ -127,126 +180,6 @@ BEGIN_FTR_SECTION_NESTED(943) \
std ra,offset(r13); \
END_FTR_SECTION_NESTED(ftr,ftr,943)
-.macro EXCEPTION_PROLOG_0 area
- SET_SCRATCH0(r13) /* save r13 */
- GET_PACA(r13)
- std r9,\area\()+EX_R9(r13) /* save r9 */
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
- HMT_MEDIUM
- std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-.endm
-
-.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, dar, dsisr, bitmask
- OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
- OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
- INTERRUPT_TO_KERNEL
- SAVE_CTR(r10, \area\())
- mfcr r9
- .if \kvm
- KVMTEST \hsrr \vec
- .endif
- .if \bitmask
- lbz r10,PACAIRQSOFTMASK(r13)
- andi. r10,r10,\bitmask
- /* Associate vector numbers with bits in paca->irq_happened */
- .if \vec == 0x500 || \vec == 0xea0
- li r10,PACA_IRQ_EE
- .elseif \vec == 0x900
- li r10,PACA_IRQ_DEC
- .elseif \vec == 0xa00 || \vec == 0xe80
- li r10,PACA_IRQ_DBELL
- .elseif \vec == 0xe60
- li r10,PACA_IRQ_HMI
- .elseif \vec == 0xf00
- li r10,PACA_IRQ_PMI
- .else
- .abort "Bad maskable vector"
- .endif
-
- .if \hsrr
- bne masked_Hinterrupt
- .else
- bne masked_interrupt
- .endif
- .endif
-
- std r11,\area\()+EX_R11(r13)
- std r12,\area\()+EX_R12(r13)
-
- /*
- * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
- * because a d-side MCE will clobber those registers so is
- * not recoverable if they are live.
- */
- GET_SCRATCH0(r10)
- std r10,\area\()+EX_R13(r13)
- .if \dar
- mfspr r10,SPRN_DAR
- std r10,\area\()+EX_DAR(r13)
- .endif
- .if \dsisr
- mfspr r10,SPRN_DSISR
- stw r10,\area\()+EX_DSISR(r13)
- .endif
-.endm
-
-.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri
- ld r10,PACAKMSR(r13) /* get MSR value for kernel */
- .if ! \set_ri
- xori r10,r10,MSR_RI /* Clear MSR_RI */
- .endif
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- .endif
- LOAD_HANDLER(r10, \label\())
- .if \hsrr
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- .else
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- .endif
- b . /* prevent speculative execution */
-.endm
-
-.macro EXCEPTION_PROLOG_2_VIRT label, hsrr
-#ifdef CONFIG_RELOCATABLE
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- .endif
- LOAD_HANDLER(r12, \label\())
- mtctr r12
- .if \hsrr
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- bctr
-#else
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- b \label
-#endif
-.endm
-
/*
* Branch to label using its 0xC000 address. This results in instruction
* address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -260,6 +193,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \
bctr
+.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
+ TRAMP_KVM_BEGIN(\name\()_kvm)
+ KVM_HANDLER \vec, \hsrr, \area, \skip
+.endm
+
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
@@ -272,17 +210,13 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
-.macro KVMTEST hsrr, n
+.macro KVMTEST name, hsrr, n
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
- .if \hsrr
- bne do_kvm_H\n
- .else
- bne do_kvm_\n
- .endif
+ bne \name\()_kvm
.endm
-.macro KVM_HANDLER area, hsrr, n, skip
+.macro KVM_HANDLER vec, hsrr, area, skip
.if \skip
cmpwi r10,KVM_GUEST_MODE_SKIP
beq 89f
@@ -301,10 +235,16 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r9,32
/* HSRR variants have the 0x2 bit added to their trap number */
- .if \hsrr
- ori r12,r12,(\n + 0x2)
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ ori r12,r12,(\vec + 0x2)
+ FTR_SECTION_ELSE
+ ori r12,r12,(\vec)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ ori r12,r12,(\vec + 0x2)
.else
- ori r12,r12,(\n)
+ ori r12,r12,(\vec)
.endif
#ifdef CONFIG_RELOCATABLE
@@ -329,7 +269,13 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
89: mtocrf 0x80,r9
ld r9,\area+EX_R9(r13)
ld r10,\area+EX_R10(r13)
- .if \hsrr
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ b kvmppc_skip_Hinterrupt
+ FTR_SECTION_ELSE
+ b kvmppc_skip_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
b kvmppc_skip_Hinterrupt
.else
b kvmppc_skip_interrupt
@@ -338,88 +284,328 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endm
#else
-.macro KVMTEST hsrr, n
+.macro KVMTEST name, hsrr, n
.endm
-.macro KVM_HANDLER area, hsrr, n, skip
+.macro KVM_HANDLER name, vec, hsrr, area, skip
.endm
#endif
-#define EXCEPTION_PROLOG_COMMON_1() \
- std r9,_CCR(r1); /* save CR in stackframe */ \
- std r11,_NIP(r1); /* save SRR0 in stackframe */ \
- std r12,_MSR(r1); /* save SRR1 in stackframe */ \
- std r10,0(r1); /* make stack chain pointer */ \
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r10,GPR1(r1); /* save r1 in stackframe */ \
-
-/* Save original regs values from save area to stack frame. */
-#define EXCEPTION_PROLOG_COMMON_2(area) \
- ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
- ld r10,area+EX_R10(r13); \
- std r9,GPR9(r1); \
- std r10,GPR10(r1); \
- ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \
- ld r10,area+EX_R12(r13); \
- ld r11,area+EX_R13(r13); \
- std r9,GPR11(r1); \
- std r10,GPR12(r1); \
- std r11,GPR13(r1); \
-BEGIN_FTR_SECTION_NESTED(66); \
- ld r10,area+EX_CFAR(r13); \
- std r10,ORIG_GPR3(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
- GET_CTR(r10, area); \
- std r10,_CTR(r1);
-
-#define EXCEPTION_PROLOG_COMMON_3(trap) \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- mflr r9; /* Get LR, later save to stack */ \
- ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
- std r9,_LINK(r1); \
- lbz r10,PACAIRQSOFTMASK(r13); \
- mfspr r11,SPRN_XER; /* save XER in stackframe */ \
- std r10,SOFTE(r1); \
- std r11,_XER(r1); \
- li r9,(trap)+1; \
- std r9,_TRAP(r1); /* set trap number */ \
- li r10,0; \
- ld r11,exception_marker@toc(r2); \
- std r10,RESULT(r1); /* clear regs->result */ \
- std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
+.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ .if ! \set_ri
+ xori r10,r10,MSR_RI /* Clear MSR_RI */
+ .endif
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ .endif
+ LOAD_HANDLER(r10, \label\())
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ FTR_SECTION_ELSE
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ .else
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+.endm
+
+/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
+.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
+#ifdef CONFIG_RELOCATABLE
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ .endif
+ LOAD_HANDLER(r12, \label\())
+ mtctr r12
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ bctr
+#else
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ b \label
+#endif
+.endm
+
+/*
+ * This is the BOOK3S interrupt entry code macro.
+ *
+ * This can result in one of several things happening:
+ * - Branch to the _common handler, relocated, in virtual mode.
+ * These are normal interrupts (synchronous and asynchronous) handled by
+ * the kernel.
+ * - Branch to KVM, relocated but real mode interrupts remain in real mode.
+ * These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by
+ * / intended for host or guest kernel, but KVM must always be involved
+ * because the machine state is set for guest execution.
+ * - Branch to the masked handler, unrelocated.
+ * These occur when maskable asynchronous interrupts are taken with the
+ * irq_soft_mask set.
+ * - Branch to an "early" handler in real mode but relocated.
+ * This is done if early=1. MCE and HMI use these to handle errors in real
+ * mode.
+ * - Fall through and continue executing in real, unrelocated mode.
+ * This is done if early=2.
+ */
+.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+ SET_SCRATCH0(r13) /* save r13 */
+ GET_PACA(r13)
+ std r9,\area\()+EX_R9(r13) /* save r9 */
+ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ HMT_MEDIUM
+ std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
+ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ .if \ool
+ .if !\virt
+ b tramp_real_\name
+ .pushsection .text
+ TRAMP_REAL_BEGIN(tramp_real_\name)
+ .else
+ b tramp_virt_\name
+ .pushsection .text
+ TRAMP_VIRT_BEGIN(tramp_virt_\name)
+ .endif
+ .endif
+
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+ INTERRUPT_TO_KERNEL
+ SAVE_CTR(r10, \area\())
+ mfcr r9
+ .if \kvm
+ KVMTEST \name \hsrr \vec
+ .endif
+ .if \bitmask
+ lbz r10,PACAIRQSOFTMASK(r13)
+ andi. r10,r10,\bitmask
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if \vec == 0x500 || \vec == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif \vec == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif \vec == 0xa00 || \vec == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif \vec == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif \vec == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ std r11,\area\()+EX_R11(r13)
+ std r12,\area\()+EX_R12(r13)
+
+ /*
+ * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
+ * because a d-side MCE will clobber those registers so is
+ * not recoverable if they are live.
+ */
+ GET_SCRATCH0(r10)
+ std r10,\area\()+EX_R13(r13)
+ .if \dar
+ .if \hsrr
+ mfspr r10,SPRN_HDAR
+ .else
+ mfspr r10,SPRN_DAR
+ .endif
+ std r10,\area\()+EX_DAR(r13)
+ .endif
+ .if \dsisr
+ .if \hsrr
+ mfspr r10,SPRN_HDSISR
+ .else
+ mfspr r10,SPRN_DSISR
+ .endif
+ stw r10,\area\()+EX_DSISR(r13)
+ .endif
+
+ .if \early == 2
+ /* nothing more */
+ .elseif \early
+ mfctr r10 /* save ctr, even for !RELOCATABLE */
+ BRANCH_TO_C000(r11, \name\()_early_common)
+ .elseif !\virt
+ INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .else
+ INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ .endif
+ .if \ool
+ .popsection
+ .endif
+.endm
/*
* On entry r13 points to the paca, r9-r13 are saved in the paca,
* r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
* SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
*/
-#define EXCEPTION_COMMON(area, trap) \
- andi. r10,r12,MSR_PR; /* See if coming from user */ \
- mr r10,r1; /* Save r1 */ \
- subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \
- beq- 1f; \
- ld r1,PACAKSAVE(r13); /* kernel stack to use */ \
-1: tdgei r1,-INT_FRAME_SIZE; /* trap if r1 is in userspace */ \
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; \
-3: EXCEPTION_PROLOG_COMMON_1(); \
- kuap_save_amr_and_lock r9, r10, cr1, cr0; \
- beq 4f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
- SAVE_PPR(area, r9); \
-4: EXCEPTION_PROLOG_COMMON_2(area); \
- EXCEPTION_PROLOG_COMMON_3(trap); \
+.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr
+ .if \stack
+ andi. r10,r12,MSR_PR /* See if coming from user */
+ mr r10,r1 /* Save r1 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
+ beq- 100f
+ ld r1,PACAKSAVE(r13) /* kernel stack to use */
+100: tdgei r1,-INT_FRAME_SIZE /* trap if r1 is in userspace */
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+ .endif
+
+ std r9,_CCR(r1) /* save CR in stackframe */
+ std r11,_NIP(r1) /* save SRR0 in stackframe */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+
+ .if \stack
+ .if \kuap
+ kuap_save_amr_and_lock r9, r10, cr1, cr0
+ .endif
+ beq 101f /* if from kernel mode */
+ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
+ SAVE_PPR(\area, r9)
+101:
+ .else
+ .if \kuap
+ kuap_save_amr_and_lock r9, r10, cr1
+ .endif
+ .endif
+
+ /* Save original regs values from save area to stack frame. */
+ ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,\area+EX_R10(r13)
+ std r9,GPR9(r1)
+ std r10,GPR10(r1)
+ ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,\area+EX_R12(r13)
+ ld r11,\area+EX_R13(r13)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+ .if \dar
+ .if \dar == 2
+ ld r10,_NIP(r1)
+ .else
+ ld r10,\area+EX_DAR(r13)
+ .endif
+ std r10,_DAR(r1)
+ .endif
+ .if \dsisr
+ .if \dsisr == 2
+ ld r10,_MSR(r1)
+ lis r11,DSISR_SRR1_MATCH_64S@h
+ and r10,r10,r11
+ .else
+ lwz r10,\area+EX_DSISR(r13)
+ .endif
+ std r10,_DSISR(r1)
+ .endif
+BEGIN_FTR_SECTION_NESTED(66)
+ ld r10,\area+EX_CFAR(r13)
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
+ GET_CTR(r10, \area)
+ std r10,_CTR(r1)
+ std r2,GPR2(r1) /* save r2 in stackframe */
+ SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
+ SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ mflr r9 /* Get LR, later save to stack */
+ ld r2,PACATOC(r13) /* get kernel TOC into r2 */
+ std r9,_LINK(r1)
+ lbz r10,PACAIRQSOFTMASK(r13)
+ mfspr r11,SPRN_XER /* save XER in stackframe */
+ std r10,SOFTE(r1)
+ std r11,_XER(r1)
+ li r9,(\vec)+1
+ std r9,_TRAP(r1) /* set trap number */
+ li r10,0
+ ld r11,exception_marker@toc(r2)
+ std r10,RESULT(r1) /* clear regs->result */
+ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
+
+ .if \stack
ACCOUNT_STOLEN_TIME
+ .endif
-/*
- * Exception where stack is already set in r1, r1 is saved in r10.
- * PPR save and CPU accounting is not done (for some reason).
- */
-#define EXCEPTION_COMMON_STACK(area, trap) \
- EXCEPTION_PROLOG_COMMON_1(); \
- kuap_save_amr_and_lock r9, r10, cr1; \
- EXCEPTION_PROLOG_COMMON_2(area); \
- EXCEPTION_PROLOG_COMMON_3(trap)
+ .if \reconcile
+ RECONCILE_IRQ_STATE(r10, r11)
+ .endif
+.endm
/*
* Restore all registers including H/SRR0/1 saved in a stack frame of a
@@ -428,6 +614,9 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
.macro EXCEPTION_RESTORE_REGS hsrr
/* Move original SRR0 and SRR1 into the respective regs */
ld r9,_MSR(r1)
+ .if \hsrr == EXC_HV_OR_STD
+ .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
+ .endif
.if \hsrr
mtspr SPRN_HSRR1,r9
.else
@@ -481,219 +670,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
#define FINISH_NAP
#endif
-/*
- * Following are the BOOK3S exception handler helper macros.
- * Handlers come in a number of types, and each type has a number of varieties.
- *
- * EXC_REAL_* - real, unrelocated exception vectors
- * EXC_VIRT_* - virt (AIL), unrelocated exception vectors
- * TRAMP_REAL_* - real, unrelocated helpers (virt can call these)
- * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM - KVM handlers that get put into real, unrelocated
- * EXC_COMMON - virt, relocated common handlers
- *
- * The EXC handlers are given a name, and branch to name_common, or the
- * appropriate KVM or masking function. Vector handler verieties are as
- * follows:
- *
- * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception
- *
- * EXC_{REAL|VIRT} - standard exception
- *
- * EXC_{REAL|VIRT}_suffix
- * where _suffix is:
- * - _MASKABLE - maskable exception
- * - _OOL - out of line with trampoline to common handler
- * - _HV - HV exception
- *
- * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV
- *
- * KVM handlers come in the following verieties:
- * TRAMP_KVM
- * TRAMP_KVM_SKIP
- * TRAMP_KVM_HV
- * TRAMP_KVM_HV_SKIP
- *
- * COMMON handlers come in the following verieties:
- * EXC_COMMON_BEGIN/END - used to open-code the handler
- * EXC_COMMON
- * EXC_COMMON_ASYNC
- *
- * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
- * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
- */
-
-#define __EXC_REAL(name, start, size, area) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 area ; \
- EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_REAL(name, start, size) \
- __EXC_REAL(name, start, size, PACA_EXGEN)
-
-#define __EXC_VIRT(name, start, size, realvec, area) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 area ; \
- EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0, 0, 0; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ; \
- EXC_VIRT_END(name, start, size)
-
-#define EXC_VIRT(name, start, size, realvec) \
- __EXC_VIRT(name, start, size, realvec, PACA_EXGEN)
-
-#define EXC_REAL_MASKABLE(name, start, size, bitmask) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ; \
- EXC_VIRT_END(name, start, size)
-
-#define EXC_REAL_HV(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN; \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_VIRT_HV(name, start, size, realvec) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN; \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV ; \
- EXC_VIRT_END(name, start, size)
-
-#define __EXC_REAL_OOL(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- b tramp_real_##name ; \
- EXC_REAL_END(name, start, size)
-
-#define __TRAMP_REAL_OOL(name, vec) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_REAL_OOL(name, start, size) \
- __EXC_REAL_OOL(name, start, size); \
- __TRAMP_REAL_OOL(name, start)
-
-#define __EXC_REAL_OOL_MASKABLE(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \
- __EXC_REAL_OOL_MASKABLE(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask)
-
-#define __EXC_REAL_OOL_HV(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_HV(name, vec) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1
-
-#define EXC_REAL_OOL_HV(name, start, size) \
- __EXC_REAL_OOL_HV(name, start, size); \
- __TRAMP_REAL_OOL_HV(name, start)
-
-#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1
-
-#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \
- __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask)
-
-#define __EXC_VIRT_OOL(name, start, size) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- b tramp_virt_##name; \
- EXC_VIRT_END(name, start, size)
-
-#define __TRAMP_VIRT_OOL(name, realvec) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD
-
-#define EXC_VIRT_OOL(name, start, size, realvec) \
- __EXC_VIRT_OOL(name, start, size); \
- __TRAMP_VIRT_OOL(name, realvec)
-
-#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \
- __EXC_VIRT_OOL_MASKABLE(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask)
-
-#define __EXC_VIRT_OOL_HV(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_HV(name, realvec) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV
-
-#define EXC_VIRT_OOL_HV(name, start, size, realvec) \
- __EXC_VIRT_OOL_HV(name, start, size); \
- __TRAMP_VIRT_OOL_HV(name, realvec)
-
-#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV
-
-#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \
- __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask)
-
-#define TRAMP_KVM(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_##n); \
- KVM_HANDLER area, EXC_STD, n, 0
-
-#define TRAMP_KVM_SKIP(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_##n); \
- KVM_HANDLER area, EXC_STD, n, 1
-
-#define TRAMP_KVM_HV(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_H##n); \
- KVM_HANDLER area, EXC_HV, n, 0
-
-#define TRAMP_KVM_HV_SKIP(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_H##n); \
- KVM_HANDLER area, EXC_HV, n, 1
-
#define EXC_COMMON(name, realvec, hdlr) \
EXC_COMMON_BEGIN(name); \
- EXCEPTION_COMMON(PACA_EXGEN, realvec); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
bl save_nvgprs; \
- RECONCILE_IRQ_STATE(r10, r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
b ret_from_except
@@ -704,9 +684,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
*/
#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
EXC_COMMON_BEGIN(name); \
- EXCEPTION_COMMON(PACA_EXGEN, realvec); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
FINISH_NAP; \
- RECONCILE_IRQ_STATE(r10, r11); \
RUNLATCH_ON; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
@@ -836,9 +815,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
- EXCEPTION_PROLOG_0 PACA_EXNMI
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 1, 0x100, 0, 0, 0
- EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
/*
* MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
* being used, so a nested NMI exception would corrupt it.
@@ -850,9 +827,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
* be dangerous anyway.
*/
EXC_REAL_END(system_reset, 0x100, 0x100)
-
EXC_VIRT_NONE(0x4100, 0x100)
-TRAMP_KVM(PACA_EXNMI, 0x100)
+INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
TRAMP_REAL_BEGIN(system_reset_idle_wake)
@@ -868,9 +844,7 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
/* See comment at system_reset exception, don't turn on RI */
- EXCEPTION_PROLOG_0 PACA_EXNMI
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 0, 0x100, 0, 0, 0
- EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
#endif /* CONFIG_PPC_PSERIES */
@@ -890,7 +864,7 @@ EXC_COMMON_BEGIN(system_reset_common)
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_STACK(PACA_EXNMI, 0x100)
+ INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
bl save_nvgprs
/*
* Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
@@ -933,26 +907,39 @@ EXC_COMMON_BEGIN(system_reset_common)
EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- /* This is moved out of line as it can be patched by FW, but
- * some code path might still want to branch into the original
- * vector
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ /*
+ * MSR_RI is not enabled, because PACA_EXMC is being used, so a
+ * nested machine check corrupts it. machine_check_common enables
+ * MSR_RI.
*/
- EXCEPTION_PROLOG_0 PACA_EXMC
-BEGIN_FTR_SECTION
- b machine_check_common_early
-FTR_SECTION_ELSE
- b machine_check_pSeries_0
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_common_early)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 0, 0x200, 0, 0, 0
+
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+#endif
+
+INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
+
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r9,0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Decrement paca->in_mce now RI is clear. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ EXCEPTION_RESTORE_REGS EXC_STD
+
+EXC_COMMON_BEGIN(machine_check_early_common)
+ mtctr r10 /* Restore ctr */
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+
/*
- * Register contents:
- * R13 = PACA
- * R9 = CR
- * Original R9 to R13 is saved on PACA_EXMC
- *
* Switch to mc_emergency stack and handle re-entrancy (we limit
* the nested MCE upto level 4 to avoid stack overflow).
* Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
@@ -973,103 +960,127 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
* the machine check is handled then the idle wakeup code is called
* to restore state.
*/
- mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
cmpwi r10,0 /* Are we in nested machine check */
- bne 0f /* Yes, we are. */
- /* First machine check entry */
- ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
-0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ cmpwi cr1,r10,MAX_MCE_DEPTH /* Are we at maximum nesting */
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
- /* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,MAX_MCE_DEPTH
- bgt 2f /* Check if we hit limit of 4 */
- std r11,GPR1(r1) /* Save r1 on the stack. */
- std r11,0(r1) /* make stack chain pointer */
- mfspr r11,SPRN_SRR0 /* Save SRR0 */
- std r11,_NIP(r1)
- mfspr r11,SPRN_SRR1 /* Save SRR1 */
- std r11,_MSR(r1)
- mfspr r11,SPRN_DAR /* Save DAR */
- std r11,_DAR(r1)
- mfspr r11,SPRN_DSISR /* Save DSISR */
- std r11,_DSISR(r1)
- std r9,_CCR(r1) /* Save CR in stackframe */
+
+ mr r10,r1 /* Save r1 */
+ bne 1f
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+1: /* Limit nested MCE to level 4 to avoid stack overflow */
+ bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+
/* We don't touch AMR here, we never go to virtual mode */
- /* Save r9 through r13 from EXMC save area to stack frame. */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
- mfmsr r11 /* get MSR value */
+ INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+
BEGIN_FTR_SECTION
- ori r11,r11,MSR_ME /* turn on ME bit */
+ bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ori r11,r11,MSR_RI /* turn on RI bit */
- LOAD_HANDLER(r12, machine_check_handle_early)
-1: mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r11
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-2:
- /* Stack overflow. Stay on emergency stack and panic.
- * Keep the ME bit off while panic-ing, so that if we hit
- * another machine check we checkstop.
- */
- addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
- ld r11,PACAKMSR(r13)
- LOAD_HANDLER(r12, unrecover_mce)
- li r10,MSR_ME
- andc r11,r11,r10 /* Turn off MSR_ME */
- b 1b
- b . /* prevent speculative execution */
+ li r10,MSR_RI
+ mtmsrd r10,1
-TRAMP_REAL_BEGIN(machine_check_pSeries)
- .globl machine_check_fwnmi
-machine_check_fwnmi:
- EXCEPTION_PROLOG_0 PACA_EXMC
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_early
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
+
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
+ *
+ * Go back to nap/sleep/winkle mode again if (b) is true.
+ */
BEGIN_FTR_SECTION
- b machine_check_common_early
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-machine_check_pSeries_0:
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
+ rlwinm. r11,r12,47-31,30,31
+ bne machine_check_idle_common
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
+ * Check if we are coming from guest. If yes, then run the normal
+ * exception handler which will take the
+ * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne mce_deliver /* continue if we are. */
+#endif
+
+ /*
+ * Check if we are coming from userspace. If yes, then run the normal
+ * exception handler which will deliver the MC event to this kernel.
+ */
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne mce_deliver /* continue in V mode if we are. */
+
+ /*
+ * At this point we are coming from kernel context.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
*/
- EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0
+ andi. r11,r12,MSR_RI
+ beq unrecoverable_mce
-TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+ beq unrecoverable_mce /* if !handled then panic */
+
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ RFI_TO_KERNEL
+
+mce_deliver:
+ /*
+ * This is a host user or guest MCE. Restore all registers, then
+ * run the "late" handler. For host user, this will run the
+ * machine_check_exception handler in virtual mode like a normal
+ * interrupt handler. For guest, this will trigger the KVM test
+ * and branch to the KVM interrupt similarly to other interrupts.
+ */
+BEGIN_FTR_SECTION
+ ld r10,ORIG_GPR3(r1)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ MACHINE_CHECK_HANDLER_WINDUP
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- EXCEPTION_COMMON(PACA_EXMC, 0x200)
+ INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
FINISH_NAP
- RECONCILE_IRQ_STATE(r10, r11)
- ld r3,PACA_EXMC+EX_DAR(r13)
- lwz r4,PACA_EXMC+EX_DSISR(r13)
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
b ret_from_except
-#define MACHINE_CHECK_HANDLER_WINDUP \
- /* Clear MSR_RI before setting SRR0 and SRR1. */\
- li r9,0; \
- mtmsrd r9,1; /* Clear MSR_RI */ \
- /* Decrement paca->in_mce now RI is clear. */ \
- lhz r12,PACA_IN_MCE(r13); \
- subi r12,r12,1; \
- sth r12,PACA_IN_MCE(r13); \
- EXCEPTION_RESTORE_REGS EXC_STD
-
#ifdef CONFIG_PPC_P7_NAP
/*
* This is an idle wakeup. Low level machine check has already been
@@ -1101,72 +1112,8 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
bltlr cr1 /* no state loss, return to idle caller */
b idle_return_gpr_loss
#endif
- /*
- * Handle machine check early in real mode. We come here with
- * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
- */
-EXC_COMMON_BEGIN(machine_check_handle_early)
- std r0,GPR0(r1) /* Save r0 */
- EXCEPTION_PROLOG_COMMON_3(0x200)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_early
- std r3,RESULT(r1) /* Save result */
- ld r12,_MSR(r1)
-BEGIN_FTR_SECTION
- b 4f
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-
-#ifdef CONFIG_PPC_P7_NAP
- /*
- * Check if thread was in power saving mode. We come here when any
- * of the following is true:
- * a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss,
- * supervisor state loss or hypervisor state loss.
- *
- * Go back to nap/sleep/winkle mode again if (b) is true.
- */
-BEGIN_FTR_SECTION
- rlwinm. r11,r12,47-31,30,31
- bne machine_check_idle_common
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif
- /*
- * Check if we are coming from hypervisor userspace. If yes then we
- * continue in host kernel in V mode to deliver the MC event.
- */
- rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
- beq 5f
-4: andi. r11,r12,MSR_PR /* See if coming from user. */
- bne 9f /* continue in V mode if we are. */
-
-5:
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-BEGIN_FTR_SECTION
- /*
- * We are coming from kernel context. Check if we are coming from
- * guest. if yes, then we can continue. We will fall through
- * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
- */
- lbz r11,HSTATE_IN_GUEST(r13)
- cmpwi r11,0 /* Check if coming from guest */
- bne 9f /* continue if we are. */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-#endif
- /*
- * At this point we are not sure about what context we come from.
- * Queue up the MCE event and return from the interrupt.
- * But before that, check if this is an un-recoverable exception.
- * If yes, then stay on emergency stack and panic.
- */
- andi. r11,r12,MSR_RI
- bne 2f
-1: mfspr r11,SPRN_SRR0
- LOAD_HANDLER(r10,unrecover_mce)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
+EXC_COMMON_BEGIN(unrecoverable_mce)
/*
* We are going down. But there are chances that we might get hit by
* another MCE during panic path and we may run into unstable state
@@ -1174,84 +1121,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* when another MCE is hit during panic path, system will checkstop
* and hypervisor will get restarted cleanly by SP.
*/
- li r3,MSR_ME
- andc r10,r10,r3 /* Turn off MSR_ME */
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-2:
- /*
- * Check if we have successfully handled/recovered from error, if not
- * then stay on emergency stack and panic.
- */
- ld r3,RESULT(r1) /* Load result */
- cmpdi r3,0 /* see if we handled MCE successfully */
-
- beq 1b /* if !handled then panic */
BEGIN_FTR_SECTION
- /*
- * Return from MC interrupt.
- * Queue up the MCE event so that we can log it later, while
- * returning from kernel or opal call.
- */
- bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_USER_OR_KERNEL
-FTR_SECTION_ELSE
- /*
- * pSeries: Return from MC interrupt. Before that stay on emergency
- * stack and call machine_check_exception to log the MCE event.
- */
- LOAD_HANDLER(r10,mce_return)
- mtspr SPRN_SRR0,r10
+ li r10,0 /* clear MSR_RI */
+ mtmsrd r10,1
+ bl disable_machine_check
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-9:
- /* Deliver the machine check to host kernel in V mode. */
- MACHINE_CHECK_HANDLER_WINDUP
- EXCEPTION_PROLOG_0 PACA_EXMC
- b machine_check_pSeries_0
+ li r3,MSR_ME
+ andc r10,r10,r3
+ mtmsrd r10
-EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
+
/*
- * We will not reach here. Even if we did, there is no way out. Call
- * unrecoverable_exception and die.
+ * We will not reach here. Even if we did, there is no way out.
+ * Call unrecoverable_exception and die.
*/
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-EXC_COMMON_BEGIN(mce_return)
- /* Invoke machine_check_exception to print MCE event and return. */
addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_KERNEL
+ bl unrecoverable_exception
b .
+
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- b tramp_real_data_access
+ INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
EXC_REAL_END(data_access, 0x300, 0x80)
-
-TRAMP_REAL_BEGIN(tramp_real_data_access)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 1, 1, 0
- EXCEPTION_PROLOG_2_REAL data_access_common, EXC_STD, 1
-
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x300, 1, 1, 0
-EXCEPTION_PROLOG_2_VIRT data_access_common, EXC_STD
+ INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
-
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
-
+INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
/*
* Here r13 points to the paca, r9 contains the saved CR,
@@ -1259,15 +1158,12 @@ EXC_COMMON_BEGIN(data_access_common)
* r9 - r13 are saved in paca->exgen.
* EX_DAR and EX_DSISR have saved DAR/DSISR
*/
- EXCEPTION_COMMON(PACA_EXGEN, 0x300)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- li r5,0x300
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x300
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
@@ -1275,26 +1171,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXSLB
- b tramp_real_data_access_slb
+ INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
EXC_REAL_END(data_access_slb, 0x380, 0x80)
-
-TRAMP_REAL_BEGIN(tramp_real_data_access_slb)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSLB, 1, 0x380, 1, 0, 0
- EXCEPTION_PROLOG_2_REAL data_access_slb_common, EXC_STD, 1
-
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXSLB
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSLB, 0, 0x380, 1, 0, 0
- EXCEPTION_PROLOG_2_VIRT data_access_slb_common, EXC_STD
+ INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-
-TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
-
+INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
EXC_COMMON_BEGIN(data_access_slb_common)
- EXCEPTION_COMMON(PACA_EXSLB, 0x380)
- ld r4,PACA_EXSLB+EX_DAR(r13)
- std r4,_DAR(r1)
+ INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
+ ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
@@ -1317,33 +1202,36 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL(instruction_access, 0x400, 0x80)
-EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
-TRAMP_KVM(PACA_EXGEN, 0x400)
-
+EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
+ INT_HANDLER instruction_access, 0x400, kvm=1
+EXC_REAL_END(instruction_access, 0x400, 0x80)
+EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
+ INT_HANDLER instruction_access, 0x400, virt=1
+EXC_VIRT_END(instruction_access, 0x4400, 0x80)
+INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(instruction_access_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x400)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,_NIP(r1)
- andis. r4,r12,DSISR_SRR1_MATCH_64S@h
- li r5,0x400
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x400
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-__EXC_REAL(instruction_access_slb, 0x480, 0x80, PACA_EXSLB)
-__EXC_VIRT(instruction_access_slb, 0x4480, 0x80, 0x480, PACA_EXSLB)
-TRAMP_KVM(PACA_EXSLB, 0x480)
-
+EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
+ INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
+EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
+EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
+ INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
+EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
+INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
EXC_COMMON_BEGIN(instruction_access_slb_common)
- EXCEPTION_COMMON(PACA_EXSLB, 0x480)
- ld r4,_NIP(r1)
+ INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
+ ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
@@ -1359,69 +1247,44 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_NIP(r1)
+ ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b ret_from_except
-
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
-BEGIN_FTR_SECTION
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_HV, 1
-FTR_SECTION_ELSE
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_STD, 1
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
-
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
-BEGIN_FTR_SECTION
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_HV
-FTR_SECTION_ELSE
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_STD
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x500)
-TRAMP_KVM_HV(PACA_EXGEN, 0x500)
+INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
EXC_REAL_BEGIN(alignment, 0x600, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x600, 1, 1, 0
- EXCEPTION_PROLOG_2_REAL alignment_common, EXC_STD, 1
+ INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
EXC_REAL_END(alignment, 0x600, 0x100)
-
EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x600, 1, 1, 0
- EXCEPTION_PROLOG_2_VIRT alignment_common, EXC_STD
+ INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
EXC_VIRT_END(alignment, 0x4600, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x600)
+INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x600)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
b ret_from_except
-EXC_REAL(program_check, 0x700, 0x100)
-EXC_VIRT(program_check, 0x4700, 0x100, 0x700)
-TRAMP_KVM(PACA_EXGEN, 0x700)
+EXC_REAL_BEGIN(program_check, 0x700, 0x100)
+ INT_HANDLER program_check, 0x700, kvm=1
+EXC_REAL_END(program_check, 0x700, 0x100)
+EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
+ INT_HANDLER program_check, 0x700, virt=1
+EXC_VIRT_END(program_check, 0x4700, 0x100)
+INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
/*
* It's possible to receive a TM Bad Thing type program check with
@@ -1447,27 +1310,33 @@ EXC_COMMON_BEGIN(program_check_common)
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- b 3f /* Jump into the macro !! */
+ INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
+ b 3f
2:
- EXCEPTION_COMMON(PACA_EXGEN, 0x700)
+ INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
+3:
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
b ret_from_except
-EXC_REAL(fp_unavailable, 0x800, 0x100)
-EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800)
-TRAMP_KVM(PACA_EXGEN, 0x800)
+EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, kvm=1
+EXC_REAL_END(fp_unavailable, 0x800, 0x100)
+EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, virt=1
+EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
+INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x800)
+ INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
bne 1f /* if from user, just load it up */
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
@@ -1490,21 +1359,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0x900)
+EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
+ INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(decrementer, 0x900, 0x80)
+EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
+ INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(decrementer, 0x4900, 0x80)
+INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
-EXC_REAL_HV(hdecrementer, 0x980, 0x80)
-EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980)
-TRAMP_KVM_HV(PACA_EXGEN, 0x980)
+EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(hdecrementer, 0x980, 0x80)
+EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
+INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xa00)
+EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(doorbell_super, 0xa00, 0x100)
+EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
+INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
#else
@@ -1512,17 +1393,13 @@ EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
#endif
-EXC_REAL(trap_0b, 0xb00, 0x100)
-EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
-TRAMP_KVM(PACA_EXGEN, 0xb00)
-EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
+EXC_REAL_NONE(0xb00, 0x100)
+EXC_VIRT_NONE(0x4b00, 0x100)
/*
* system call / hypercall (0xc00, 0x4c00)
*
* The system call exception is invoked with "sc 0" and does not alter HV bit.
- * There is support for kernel code to invoke system calls but there are no
- * in-tree users.
*
* The hypercall is invoked with "sc 1" and sets HV=1.
*
@@ -1531,22 +1408,9 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
*
* Call convention:
*
- * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
- *
- * For hypercalls, the register convention is as follows:
- * r0 volatile
- * r1-2 nonvolatile
- * r3 volatile parameter and return value for status
- * r4-r10 volatile input and output value
- * r11 volatile hypercall number and output value
- * r12 volatile input and output value
- * r13-r31 nonvolatile
- * LR nonvolatile
- * CTR volatile
- * XER volatile
- * CR0-1 CR5-7 volatile
- * CR2-4 nonvolatile
- * Other registers nonvolatile
+ * syscall and hypercalls register conventions are documented in
+ * Documentation/powerpc/syscall64-abi.rst and
+ * Documentation/powerpc/papr_hcalls.rst respectively.
*
* The intersection of volatile registers that don't contain possible
* inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
@@ -1567,7 +1431,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST EXC_STD 0xc00 /* uses r10, branch to do_kvm_0xc00_system_call */
+ KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
mfctr r9
#else
mr r9,r13
@@ -1621,7 +1485,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
SYSTEM_CALL 0
EXC_REAL_END(system_call, 0xc00, 0x100)
-
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
SYSTEM_CALL 1
EXC_VIRT_END(system_call, 0x4c00, 0x100)
@@ -1634,7 +1497,7 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
* ctr = orig r13
* orig r10 saved in PACA
*/
-TRAMP_KVM_BEGIN(do_kvm_0xc00)
+TRAMP_KVM_BEGIN(system_call_kvm)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
@@ -1647,32 +1510,33 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcr r9
- KVM_HANDLER PACA_EXGEN, EXC_STD, 0xc00, 0
+ KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
#endif
-EXC_REAL(single_step, 0xd00, 0x100)
-EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00)
-TRAMP_KVM(PACA_EXGEN, 0xd00)
+EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
+ INT_HANDLER single_step, 0xd00, kvm=1
+EXC_REAL_END(single_step, 0xd00, 0x100)
+EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
+ INT_HANDLER single_step, 0xd00, virt=1
+EXC_VIRT_END(single_step, 0x4d00, 0x100)
+INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(single_step_common, 0xd00, single_step_exception)
-EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20)
-EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
+
+EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_REAL_END(h_data_storage, 0xe00, 0x20)
+EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
+INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- mfspr r10,SPRN_HDAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_HDSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_COMMON(PACA_EXGEN, 0xe00)
+ INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r4,PACA_EXGEN+EX_DAR(r13)
- lwz r5,PACA_EXGEN+EX_DSISR(r13)
- std r4,_DAR(r1)
- std r5,_DSISR(r1)
+ ld r4,_DAR(r1)
li r5,SIGSEGV
bl bad_page_fault
MMU_FTR_SECTION_ELSE
@@ -1681,15 +1545,23 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20)
-EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
+EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
+EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
+INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
-EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20)
-EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe40)
+EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(emulation_assist, 0xe40, 0x20)
+EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
+INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
@@ -1699,16 +1571,10 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
* mode.
*/
EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- b hmi_exception_early
+ INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
-TRAMP_REAL_BEGIN(hmi_exception_early)
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0xe60, 0, 0, 0
- mfctr r10 /* save ctr, even for !RELOCATABLE */
- BRANCH_TO_C000(r11, hmi_exception_early_common)
-
+INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
EXC_COMMON_BEGIN(hmi_exception_early_common)
mtctr r10 /* Restore ctr */
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
@@ -1716,10 +1582,10 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- EXCEPTION_PROLOG_COMMON_1()
+
/* We don't touch AMR here, we never go to virtual mode */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
+ INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+
addi r3,r1,STACK_FRAME_OVERHEAD
bl hmi_exception_realmode
cmpdi cr0,r3,0
@@ -1734,23 +1600,25 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
* firmware.
*/
EXCEPTION_RESTORE_REGS EXC_HV
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0xe60, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hmi_exception_common, EXC_HV, 1
+ INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
EXC_COMMON_BEGIN(hmi_exception_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xe60)
+ INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
FINISH_NAP
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
RUNLATCH_ON
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b ret_from_except
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
+
+EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_doorbell, 0xe80, 0x20)
+EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
+INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
#else
@@ -1758,9 +1626,13 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
+EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
+EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
+INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1770,17 +1642,25 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
-EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xf00)
+EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+EXC_REAL_END(performance_monitor, 0xf00, 0x20)
+EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
+INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
-EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20)
-EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20)
-TRAMP_KVM(PACA_EXGEN, 0xf20)
+EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
+EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
+INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xf20)
+ INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1813,11 +1693,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
b ret_from_except
-EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20)
-EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40)
-TRAMP_KVM(PACA_EXGEN, 0xf40)
+EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
+EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
+INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xf40)
+ INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1849,15 +1733,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
b ret_from_except
-EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20)
-EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60)
-TRAMP_KVM(PACA_EXGEN, 0xf60)
+EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
+EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
+INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
-EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20)
-EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80)
-TRAMP_KVM_HV(PACA_EXGEN, 0xf80)
+EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
+EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
+INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
@@ -1874,9 +1766,11 @@ EXC_REAL_NONE(0x1100, 0x100)
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_system_error, 0x1200, 0x100)
+EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
+ INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200)
+INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
@@ -1884,37 +1778,43 @@ EXC_VIRT_NONE(0x5200, 0x100)
#endif
-EXC_REAL(instruction_breakpoint, 0x1300, 0x100)
-EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300)
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300)
+EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
+EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
+INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 0, 0x1500, 0, 0, 0
-
+ INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
#ifdef CONFIG_PPC_DENORMALISATION
mfspr r10,SPRN_HSRR1
andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
-
- KVMTEST EXC_HV 0x1500
- EXCEPTION_PROLOG_2_REAL denorm_common, EXC_HV, 1
+ KVMTEST denorm_exception_hv, EXC_HV 0x1500
+ INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- b exc_real_0x1500_denorm_exception_hv
+ INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
+ mfspr r10,SPRN_HSRR1
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ bne+ denorm_assist
+ INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
+INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
@@ -1989,9 +1889,11 @@ EXC_COMMON(denorm_common, 0x1500, unknown_exception)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100)
+EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
+ INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600)
+INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
@@ -1999,9 +1901,13 @@ EXC_VIRT_NONE(0x5600, 0x100)
#endif
-EXC_REAL(altivec_assist, 0x1700, 0x100)
-EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700)
-TRAMP_KVM(PACA_EXGEN, 0x1700)
+EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, kvm=1
+EXC_REAL_END(altivec_assist, 0x1700, 0x100)
+EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, virt=1
+EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
+INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_ALTIVEC
EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
#else
@@ -2010,15 +1916,18 @@ EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_thermal, 0x1800, 0x100)
+EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
+ INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800)
+INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
+
#ifdef CONFIG_PPC_WATCHDOG
#define MASKED_DEC_HANDLER_LABEL 3f
@@ -2028,7 +1937,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r12,PACA_EXGEN+EX_R12(r13); \
GET_SCRATCH0(r10); \
std r10,PACA_EXGEN+EX_R13(r13); \
- EXCEPTION_PROLOG_2_REAL soft_nmi_common, _H, 1
+ INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -2043,9 +1952,8 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_STACK(PACA_EXGEN, 0x900)
+ INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
b ret_from_except
@@ -2287,11 +2195,20 @@ __end_interrupts:
DEFINE_FIXED_SYMBOL(__end_interrupts)
#ifdef CONFIG_PPC_970_NAP
+ /*
+ * Called by exception entry code if _TLF_NAPPING was set, this clears
+ * the NAPPING flag, and redirects the exception exit to
+ * power4_fixup_nap_return.
+ */
+ .globl power4_fixup_nap
EXC_COMMON_BEGIN(power4_fixup_nap)
andc r9,r9,r10
std r9,TI_LOCAL_FLAGS(r11)
- ld r10,_LINK(r1) /* make idle task do the */
- std r10,_NIP(r1) /* equivalent of a blr */
+ LOAD_REG_ADDR(r10, power4_idle_nap_return)
+ std r10,_NIP(r1)
+ blr
+
+power4_idle_nap_return:
blr
#endif
@@ -2302,6 +2219,35 @@ CLOSE_FIXED_SECTION(virt_trampolines);
USE_TEXT_SECTION()
+/* MSR[RI] should be clear because this uses SRR[01] */
+enable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ ori r3,r3,MSR_ME
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
+/* MSR[RI] should be clear because this uses SRR[01] */
+disable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ li r4,MSR_ME
+ andc r3,r3,r4
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
/*
* Hash table stuff
*/
@@ -2310,7 +2256,7 @@ do_hash_page:
#ifdef CONFIG_PPC_BOOK3S_64
lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
- and. r0,r4,r0 /* weird error? */
+ and. r0,r5,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
ld r11, PACA_THREAD_INFO(r13)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
@@ -2318,15 +2264,13 @@ do_hash_page:
bne 77f /* then don't call hash_page now */
/*
- * r3 contains the faulting address
- * r4 msr
- * r5 contains the trap number
- * r6 contains dsisr
+ * r3 contains the trap number
+ * r4 contains the faulting address
+ * r5 contains dsisr
+ * r6 msr
*
* at return r3 = 0 for success, 1 for page fault, negative for error
*/
- mr r4,r12
- ld r6,_DSISR(r1)
bl __hash_page /* build HPTE if possible */
cmpdi r3,0 /* see if __hash_page succeeded */
@@ -2336,16 +2280,15 @@ do_hash_page:
/* Error */
blt- 13f
- /* Reload DSISR into r4 for the DABR check below */
- ld r4,_DSISR(r1)
+ /* Reload DAR/DSISR into r4/r5 for the DABR check below */
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
#endif /* CONFIG_PPC_BOOK3S_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: andis. r0,r4,DSISR_DABRMATCH@h
+11: andis. r0,r5,DSISR_DABRMATCH@h
bne- handle_dabr_fault
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
cmpdi r3,0
@@ -2353,7 +2296,7 @@ handle_page_fault:
bl save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
+ ld r4,_DAR(r1)
bl bad_page_fault
b ret_from_except
@@ -2392,7 +2335,6 @@ handle_dabr_fault:
* the access, or panic if there isn't a handler.
*/
77: bl save_nvgprs
- mr r4,r3
addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl bad_page_fault
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 4eab97292cc2..ff0114aeba9b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -28,24 +28,22 @@
#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/rtas.h>
#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
#include <asm/setup.h>
static struct fw_dump fw_dump;
-static struct fadump_mem_struct fdm;
-static const struct fadump_mem_struct *fdm_active;
-#ifdef CONFIG_CMA
-static struct cma *fadump_cma;
-#endif
+static void __init fadump_reserve_crash_area(u64 base);
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges *crash_memory_ranges;
-int crash_memory_ranges_size;
-int crash_mem_ranges;
-int max_crash_mem_ranges;
+struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
+struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+
/*
* fadump_cma_init() - Initialize CMA area from a fadump reserved memory
*
@@ -107,84 +105,45 @@ static int __init fadump_cma_init(void) { return 1; }
#endif /* CONFIG_CMA */
/* Scan the Firmware Assisted dump configuration details. */
-int __init early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data)
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
{
- const __be32 *sections;
- int i, num_sections;
- int size;
- const __be32 *token;
-
- if (depth != 1 || strcmp(uname, "rtas") != 0)
+ if (depth != 1)
return 0;
- /*
- * Check if Firmware Assisted dump is supported. if yes, check
- * if dump has been initiated on last reboot.
- */
- token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
- if (!token)
+ if (strcmp(uname, "rtas") == 0) {
+ rtas_fadump_dt_scan(&fw_dump, node);
return 1;
+ }
- fw_dump.fadump_supported = 1;
- fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
-
- /*
- * The 'ibm,kernel-dump' rtas node is present only if there is
- * dump data waiting for us.
- */
- fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
- if (fdm_active)
- fw_dump.dump_active = 1;
-
- /* Get the sizes required to store dump data for the firmware provided
- * dump sections.
- * For each dump section type supported, a 32bit cell which defines
- * the ID of a supported section followed by two 32 bit cells which
- * gives teh size of the section in bytes.
- */
- sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
- &size);
-
- if (!sections)
+ if (strcmp(uname, "ibm,opal") == 0) {
+ opal_fadump_dt_scan(&fw_dump, node);
return 1;
-
- num_sections = size / (3 * sizeof(u32));
-
- for (i = 0; i < num_sections; i++, sections += 3) {
- u32 type = (u32)of_read_number(sections, 1);
-
- switch (type) {
- case FADUMP_CPU_STATE_DATA:
- fw_dump.cpu_state_data_size =
- of_read_ulong(&sections[1], 2);
- break;
- case FADUMP_HPTE_REGION:
- fw_dump.hpte_region_size =
- of_read_ulong(&sections[1], 2);
- break;
- }
}
- return 1;
+ return 0;
}
/*
* If fadump is registered, check if the memory provided
* falls within boot memory area and reserved memory area.
*/
-int is_fadump_memory_area(u64 addr, ulong size)
+int is_fadump_memory_area(u64 addr, unsigned long size)
{
- u64 d_start = fw_dump.reserve_dump_area_start;
- u64 d_end = d_start + fw_dump.reserve_dump_area_size;
+ u64 d_start, d_end;
if (!fw_dump.dump_registered)
return 0;
+ if (!size)
+ return 0;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
if (((addr + size) > d_start) && (addr <= d_end))
return 1;
- return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
+ return (addr <= fw_dump.boot_mem_top);
}
int should_fadump_crash(void)
@@ -200,31 +159,29 @@ int is_fadump_active(void)
}
/*
- * Returns 1, if there are no holes in boot memory area,
- * 0 otherwise.
+ * Returns true, if there are no holes in memory area between d_start to d_end,
+ * false otherwise.
*/
-static int is_boot_memory_area_contiguous(void)
+static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(RMA_START);
- unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
- unsigned int ret = 0;
+ bool ret = false;
+ u64 start, end;
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
- if (tstart < tend) {
- /* Memory hole from start_pfn to tstart */
- if (tstart > start_pfn)
+ start = max_t(u64, d_start, reg->base);
+ end = min_t(u64, d_end, (reg->base + reg->size));
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
break;
- if (tend == end_pfn) {
- ret = 1;
+ if (end == d_end) {
+ ret = true;
break;
}
- start_pfn = tend + 1;
+ d_start = end + 1;
}
}
@@ -232,37 +189,45 @@ static int is_boot_memory_area_contiguous(void)
}
/*
- * Returns true, if there are no holes in reserved memory area,
+ * Returns true, if there are no holes in boot memory area,
* false otherwise.
*/
-static bool is_reserved_memory_area_contiguous(void)
+bool is_fadump_boot_mem_contiguous(void)
{
- struct memblock_region *reg;
- unsigned long start, end;
- unsigned long d_start = fw_dump.reserve_dump_area_start;
- unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
-
- for_each_memblock(memory, reg) {
- start = max(d_start, (unsigned long)reg->base);
- end = min(d_end, (unsigned long)(reg->base + reg->size));
- if (d_start < end) {
- /* Memory hole from d_start to start */
- if (start > d_start)
- break;
+ unsigned long d_start, d_end;
+ bool ret = false;
+ int i;
- if (end == d_end)
- return true;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ d_start = fw_dump.boot_mem_addr[i];
+ d_end = d_start + fw_dump.boot_mem_sz[i];
- d_start = end + 1;
- }
+ ret = is_fadump_mem_area_contiguous(d_start, d_end);
+ if (!ret)
+ break;
}
- return false;
+ return ret;
+}
+
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+bool is_fadump_reserved_mem_contiguous(void)
+{
+ u64 d_start, d_end;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
+ return is_fadump_mem_area_contiguous(d_start, d_end);
}
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
+ int i;
+
pr_debug("Support for firmware-assisted dump (fadump): %s\n",
(fw_dump.fadump_supported ? "present" : "no support"));
@@ -276,62 +241,13 @@ static void fadump_show_config(void)
pr_debug("Dump section sizes:\n");
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
- pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
-}
-
-static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
- unsigned long addr)
-{
- if (!fdm)
- return 0;
-
- memset(fdm, 0, sizeof(struct fadump_mem_struct));
- addr = addr & PAGE_MASK;
-
- fdm->header.dump_format_version = cpu_to_be32(0x00000001);
- fdm->header.dump_num_sections = cpu_to_be16(3);
- fdm->header.dump_status_flag = 0;
- fdm->header.offset_first_dump_section =
- cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data));
-
- /*
- * Fields for disk dump option.
- * We are not using disk dump option, hence set these fields to 0.
- */
- fdm->header.dd_block_size = 0;
- fdm->header.dd_block_offset = 0;
- fdm->header.dd_num_blocks = 0;
- fdm->header.dd_offset_disk_path = 0;
-
- /* set 0 to disable an automatic dump-reboot. */
- fdm->header.max_time_auto = 0;
-
- /* Kernel dump sections */
- /* cpu state data section. */
- fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA);
- fdm->cpu_state_data.source_address = 0;
- fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size);
- fdm->cpu_state_data.destination_address = cpu_to_be64(addr);
- addr += fw_dump.cpu_state_data_size;
-
- /* hpte region section */
- fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION);
- fdm->hpte_region.source_address = 0;
- fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size);
- fdm->hpte_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.hpte_region_size;
-
- /* RMA region section */
- fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION);
- fdm->rmr_region.source_address = cpu_to_be64(RMA_START);
- fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size);
- fdm->rmr_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.boot_memory_size;
-
- return addr;
+ pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size);
+ pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top);
+ pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ pr_debug("[%03d] base = %llx, size = %llx\n", i,
+ fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
+ }
}
/**
@@ -349,10 +265,10 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
* that is required for a kernel to boot successfully.
*
*/
-static inline unsigned long fadump_calculate_reserve_size(void)
+static inline u64 fadump_calculate_reserve_size(void)
{
+ u64 base, size, bootmem_min;
int ret;
- unsigned long long base, size;
if (fw_dump.reserve_bootvar)
pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
@@ -402,7 +318,8 @@ static inline unsigned long fadump_calculate_reserve_size(void)
if (memory_limit && size > memory_limit)
size = memory_limit;
- return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ return (size > bootmem_min ? size : bootmem_min);
}
/*
@@ -423,57 +340,136 @@ static unsigned long get_fadump_area_size(void)
size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
size = PAGE_ALIGN(size);
+
+ /* This is to hold kernel metadata on platforms that support it */
+ size += (fw_dump.ops->fadump_get_metadata_size ?
+ fw_dump.ops->fadump_get_metadata_size() : 0);
return size;
}
-static void __init fadump_reserve_crash_area(unsigned long base,
- unsigned long size)
+static int __init add_boot_mem_region(unsigned long rstart,
+ unsigned long rsize)
+{
+ int i = fw_dump.boot_mem_regs_cnt++;
+
+ if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
+ fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
+ return 0;
+ }
+
+ pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
+ i, rstart, (rstart + rsize));
+ fw_dump.boot_mem_addr[i] = rstart;
+ fw_dump.boot_mem_sz[i] = rsize;
+ return 1;
+}
+
+/*
+ * Firmware usually has a hard limit on the data it can copy per region.
+ * Honour that by splitting a memory range into multiple regions.
+ */
+static int __init add_boot_mem_regions(unsigned long mstart,
+ unsigned long msize)
+{
+ unsigned long rstart, rsize, max_size;
+ int ret = 1;
+
+ rstart = mstart;
+ max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
+ while (msize) {
+ if (msize > max_size)
+ rsize = max_size;
+ else
+ rsize = msize;
+
+ ret = add_boot_mem_region(rstart, rsize);
+ if (!ret)
+ break;
+
+ msize -= rsize;
+ rstart += rsize;
+ }
+
+ return ret;
+}
+
+static int __init fadump_get_boot_mem_regions(void)
{
+ unsigned long base, size, cur_size, hole_size, last_end;
+ unsigned long mem_size = fw_dump.boot_memory_size;
struct memblock_region *reg;
- unsigned long mstart, mend, msize;
+ int ret = 1;
+
+ fw_dump.boot_mem_regs_cnt = 0;
+ last_end = 0;
+ hole_size = 0;
+ cur_size = 0;
for_each_memblock(memory, reg) {
- mstart = max_t(unsigned long, base, reg->base);
- mend = reg->base + reg->size;
- mend = min(base + size, mend);
-
- if (mstart < mend) {
- msize = mend - mstart;
- memblock_reserve(mstart, msize);
- pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
- (msize >> 20), mstart);
+ base = reg->base;
+ size = reg->size;
+ hole_size += (base - last_end);
+
+ if ((cur_size + size) >= mem_size) {
+ size = (mem_size - cur_size);
+ ret = add_boot_mem_regions(base, size);
+ break;
}
+
+ mem_size -= size;
+ cur_size += size;
+ ret = add_boot_mem_regions(base, size);
+ if (!ret)
+ break;
+
+ last_end = base + size;
}
+ fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
+
+ return ret;
}
int __init fadump_reserve_mem(void)
{
- unsigned long base, size, memory_boundary;
+ u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
+ bool is_memblock_bottom_up = memblock_bottom_up();
+ int ret = 1;
if (!fw_dump.fadump_enabled)
return 0;
if (!fw_dump.fadump_supported) {
- printk(KERN_INFO "Firmware-assisted dump is not supported on"
- " this hardware\n");
- fw_dump.fadump_enabled = 0;
- return 0;
+ pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
+ goto error_out;
}
+
/*
* Initialize boot memory size
* If dump is active then we have already calculated the size during
* first kernel.
*/
- if (fdm_active)
- fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
- else {
- fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+ if (!fw_dump.dump_active) {
+ fw_dump.boot_memory_size =
+ PAGE_ALIGN(fadump_calculate_reserve_size());
#ifdef CONFIG_CMA
- if (!fw_dump.nocma)
+ if (!fw_dump.nocma) {
+ align = FADUMP_CMA_ALIGNMENT;
fw_dump.boot_memory_size =
- ALIGN(fw_dump.boot_memory_size,
- FADUMP_CMA_ALIGNMENT);
+ ALIGN(fw_dump.boot_memory_size, align);
+ }
#endif
+
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ if (fw_dump.boot_memory_size < bootmem_min) {
+ pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
+ fw_dump.boot_memory_size, bootmem_min);
+ goto error_out;
+ }
+
+ if (!fadump_get_boot_mem_regions()) {
+ pr_err("Too many holes in boot memory area to enable fadump\n");
+ goto error_out;
+ }
}
/*
@@ -493,10 +489,13 @@ int __init fadump_reserve_mem(void)
" dump, now %#016llx\n", memory_limit);
}
if (memory_limit)
- memory_boundary = memory_limit;
+ mem_boundary = memory_limit;
else
- memory_boundary = memblock_end_of_DRAM();
+ mem_boundary = memblock_end_of_DRAM();
+ base = fw_dump.boot_mem_top;
+ size = get_fadump_area_size();
+ fw_dump.reserve_dump_area_size = size;
if (fw_dump.dump_active) {
pr_info("Firmware-assisted dump is active.\n");
@@ -510,58 +509,55 @@ int __init fadump_reserve_mem(void)
#endif
/*
* If last boot has crashed then reserve all the memory
- * above boot_memory_size so that we don't touch it until
+ * above boot memory size so that we don't touch it until
* dump is written to disk by userspace tool. This memory
- * will be released for general use once the dump is saved.
+ * can be released for general use by invalidating fadump.
*/
- base = fw_dump.boot_memory_size;
- size = memory_boundary - base;
- fadump_reserve_crash_area(base, size);
-
- fw_dump.fadumphdr_addr =
- be64_to_cpu(fdm_active->rmr_region.destination_address) +
- be64_to_cpu(fdm_active->rmr_region.source_len);
- pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- } else {
- size = get_fadump_area_size();
+ fadump_reserve_crash_area(base);
+ pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
+ pr_debug("Reserve dump area start address: 0x%lx\n",
+ fw_dump.reserve_dump_area_start);
+ } else {
/*
* Reserve memory at an offset closer to bottom of the RAM to
- * minimize the impact of memory hot-remove operation. We can't
- * use memblock_find_in_range() here since it doesn't allocate
- * from bottom to top.
+ * minimize the impact of memory hot-remove operation.
*/
- for (base = fw_dump.boot_memory_size;
- base <= (memory_boundary - size);
- base += size) {
- if (memblock_is_region_memory(base, size) &&
- !memblock_is_region_reserved(base, size))
- break;
+ memblock_set_bottom_up(true);
+ base = memblock_find_in_range(base, mem_boundary, size, align);
+
+ /* Restore the previous allocation mode */
+ memblock_set_bottom_up(is_memblock_bottom_up);
+
+ if (!base) {
+ pr_err("Failed to find memory chunk for reservation!\n");
+ goto error_out;
}
- if ((base > (memory_boundary - size)) ||
- memblock_reserve(base, size)) {
- pr_err("Failed to reserve memory\n");
- return 0;
+ fw_dump.reserve_dump_area_start = base;
+
+ /*
+ * Calculate the kernel metadata address and register it with
+ * f/w if the platform supports.
+ */
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ goto error_out;
+
+ if (memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory!\n");
+ goto error_out;
}
- pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
- "assisted dump (System RAM: %ldMB)\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
+ pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
+ (size >> 20), base, (memblock_phys_mem_size() >> 20));
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- return fadump_cma_init();
+ ret = fadump_cma_init();
}
- return 1;
-}
-unsigned long __init arch_reserved_kernel_pages(void)
-{
- return memblock_reserved_size() / PAGE_SIZE;
+ return ret;
+error_out:
+ fw_dump.fadump_enabled = 0;
+ return 0;
}
/* Look for fadump= cmdline option. */
@@ -596,61 +592,6 @@ static int __init early_fadump_reserve_mem(char *p)
}
early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-static int register_fw_dump(struct fadump_mem_struct *fdm)
-{
- int rc, err;
- unsigned int wait_time;
-
- pr_debug("Registering for firmware-assisted kernel dump...\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_REGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
-
- } while (wait_time);
-
- err = -EIO;
- switch (rc) {
- default:
- pr_err("Failed to register. Unknown Error(%d).\n", rc);
- break;
- case -1:
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Hardware Error(%d).\n", rc);
- break;
- case -3:
- if (!is_boot_memory_area_contiguous())
- pr_err("Can't have holes in boot memory area while registering fadump\n");
- else if (!is_reserved_memory_area_contiguous())
- pr_err("Can't have holes in reserved memory area while"
- " registering fadump\n");
-
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Parameter Error(%d).\n", rc);
- err = -EINVAL;
- break;
- case -9:
- printk(KERN_ERR "firmware-assisted kernel dump is already "
- " registered.");
- fw_dump.dump_registered = 1;
- err = -EEXIST;
- break;
- case 0:
- printk(KERN_INFO "firmware-assisted kernel dump registration"
- " is successful\n");
- fw_dump.dump_registered = 1;
- err = 0;
- break;
- }
- return err;
-}
-
void crash_fadump(struct pt_regs *regs, const char *str)
{
struct fadump_crash_info_header *fdh = NULL;
@@ -693,71 +634,10 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fdh->online_mask = *cpu_online_mask;
- /* Call ibm,os-term rtas call to trigger firmware assisted dump */
- rtas_os_term((char *)str);
-}
-
-#define GPR_MASK 0xffffff0000000000
-static inline int fadump_gpr_index(u64 id)
-{
- int i = -1;
- char str[3];
-
- if ((id & GPR_MASK) == REG_ID("GPR")) {
- /* get the digits at the end */
- id &= ~GPR_MASK;
- id >>= 24;
- str[2] = '\0';
- str[1] = id & 0xff;
- str[0] = (id >> 8) & 0xff;
- sscanf(str, "%d", &i);
- if (i > 31)
- i = -1;
- }
- return i;
-}
-
-static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
- u64 reg_val)
-{
- int i;
-
- i = fadump_gpr_index(reg_id);
- if (i >= 0)
- regs->gpr[i] = (unsigned long)reg_val;
- else if (reg_id == REG_ID("NIA"))
- regs->nip = (unsigned long)reg_val;
- else if (reg_id == REG_ID("MSR"))
- regs->msr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CTR"))
- regs->ctr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("LR"))
- regs->link = (unsigned long)reg_val;
- else if (reg_id == REG_ID("XER"))
- regs->xer = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CR"))
- regs->ccr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DAR"))
- regs->dar = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DSISR"))
- regs->dsisr = (unsigned long)reg_val;
-}
-
-static struct fadump_reg_entry*
-fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
-{
- memset(regs, 0, sizeof(struct pt_regs));
-
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
- fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
- be64_to_cpu(reg_entry->reg_value));
- reg_entry++;
- }
- reg_entry++;
- return reg_entry;
+ fw_dump.ops->fadump_trigger(fdh, str);
}
-static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -772,7 +652,7 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
return buf;
}
-static void fadump_update_elfcore_header(char *bufp)
+void fadump_update_elfcore_header(char *bufp)
{
struct elfhdr *elf;
struct elf_phdr *phdr;
@@ -784,7 +664,7 @@ static void fadump_update_elfcore_header(char *bufp)
phdr = (struct elf_phdr *)bufp;
if (phdr->p_type == PT_NOTE) {
- phdr->p_paddr = fw_dump.cpu_notes_buf;
+ phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr);
phdr->p_offset = phdr->p_paddr;
phdr->p_filesz = fw_dump.cpu_notes_buf_size;
phdr->p_memsz = fw_dump.cpu_notes_buf_size;
@@ -792,228 +672,100 @@ static void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_cpu_notes_buf_alloc(unsigned long size)
+static void *fadump_alloc_buffer(unsigned long size)
{
- void *vaddr;
+ unsigned long count, i;
struct page *page;
- unsigned long order, count, i;
+ void *vaddr;
- order = get_order(size);
- vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (!vaddr)
return NULL;
- count = 1 << order;
+ count = PAGE_ALIGN(size) / PAGE_SIZE;
page = virt_to_page(vaddr);
for (i = 0; i < count; i++)
- SetPageReserved(page + i);
+ mark_page_reserved(page + i);
return vaddr;
}
-static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
{
- struct page *page;
- unsigned long order, count, i;
-
- order = get_order(size);
- count = 1 << order;
- page = virt_to_page(vaddr);
- for (i = 0; i < count; i++)
- ClearPageReserved(page + i);
- __free_pages(page, order);
+ free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-/*
- * Read CPU state dump data and convert it into ELF notes.
- * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
- * used to access the data to allow for additional fields to be added without
- * affecting compatibility. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
- * 8 Byte ASCII identifier and 8 Byte register value. The register entry
- * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
- * of register value. For more details refer to PAPR document.
- *
- * Only for the crashing cpu we ignore the CPU dump data and get exact
- * state from fadump crash info structure populated by first kernel at the
- * time of crash.
- */
-static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
+s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
{
- struct fadump_reg_save_area_header *reg_header;
- struct fadump_reg_entry *reg_entry;
- struct fadump_crash_info_header *fdh = NULL;
- void *vaddr;
- unsigned long addr;
- u32 num_cpus, *note_buf;
- struct pt_regs regs;
- int i, rc = 0, cpu = 0;
-
- if (!fdm->cpu_state_data.bytes_dumped)
- return -EINVAL;
-
- addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
- vaddr = __va(addr);
-
- reg_header = vaddr;
- if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
- printk(KERN_ERR "Unable to read register save area.\n");
- return -ENOENT;
- }
- pr_debug("--------CPU State Data------------\n");
- pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
- pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
-
- vaddr += be32_to_cpu(reg_header->num_cpu_offset);
- num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
- pr_debug("NumCpus : %u\n", num_cpus);
- vaddr += sizeof(u32);
- reg_entry = (struct fadump_reg_entry *)vaddr;
-
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
- note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
- if (!note_buf) {
- printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
- "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr =
+ (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
+ if (!fw_dump.cpu_notes_buf_vaddr) {
+ pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
+ fw_dump.cpu_notes_buf_size);
return -ENOMEM;
}
- fw_dump.cpu_notes_buf = __pa(note_buf);
-
- pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
- (num_cpus * sizeof(note_buf_t)), note_buf);
-
- if (fw_dump.fadumphdr_addr)
- fdh = __va(fw_dump.fadumphdr_addr);
-
- for (i = 0; i < num_cpus; i++) {
- if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
- printk(KERN_ERR "Unable to read CPU state data\n");
- rc = -ENOENT;
- goto error_out;
- }
- /* Lower 4 bytes of reg_value contains logical cpu id */
- cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
- if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
- SKIP_TO_NEXT_CPU(reg_entry);
- continue;
- }
- pr_debug("Reading register data for cpu %d...\n", cpu);
- if (fdh && fdh->crashing_cpu == cpu) {
- regs = fdh->regs;
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- SKIP_TO_NEXT_CPU(reg_entry);
- } else {
- reg_entry++;
- reg_entry = fadump_read_registers(reg_entry, &regs);
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- }
- }
- final_note(note_buf);
- if (fdh) {
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
- }
+ pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
+ fw_dump.cpu_notes_buf_size,
+ fw_dump.cpu_notes_buf_vaddr);
return 0;
-
-error_out:
- fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- return rc;
-
}
-/*
- * Validate and process the dump data stored by firmware before exporting
- * it through '/proc/vmcore'.
- */
-static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
+void fadump_free_cpu_notes_buf(void)
{
- struct fadump_crash_info_header *fdh;
- int rc = 0;
-
- if (!fdm_active || !fw_dump.fadumphdr_addr)
- return -EINVAL;
-
- /* Check if the dump data is valid. */
- if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) ||
- (fdm_active->cpu_state_data.error_flags != 0) ||
- (fdm_active->rmr_region.error_flags != 0)) {
- printk(KERN_ERR "Dump taken by platform is not valid\n");
- return -EINVAL;
- }
- if ((fdm_active->rmr_region.bytes_dumped !=
- fdm_active->rmr_region.source_len) ||
- !fdm_active->cpu_state_data.bytes_dumped) {
- printk(KERN_ERR "Dump taken by platform is incomplete\n");
- return -EINVAL;
- }
-
- /* Validate the fadump crash info header */
- fdh = __va(fw_dump.fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- printk(KERN_ERR "Crash info header is not valid.\n");
- return -EINVAL;
- }
-
- rc = fadump_build_cpu_notes(fdm_active);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
+ if (!fw_dump.cpu_notes_buf_vaddr)
+ return;
- return 0;
+ fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr = 0;
+ fw_dump.cpu_notes_buf_size = 0;
}
-static void free_crash_memory_ranges(void)
+static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- kfree(crash_memory_ranges);
- crash_memory_ranges = NULL;
- crash_memory_ranges_size = 0;
- max_crash_mem_ranges = 0;
+ kfree(mrange_info->mem_ranges);
+ mrange_info->mem_ranges = NULL;
+ mrange_info->mem_ranges_sz = 0;
+ mrange_info->max_mem_ranges = 0;
}
/*
- * Allocate or reallocate crash memory ranges array in incremental units
+ * Allocate or reallocate mem_ranges array in incremental units
* of PAGE_SIZE.
*/
-static int allocate_crash_memory_ranges(void)
+static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- struct fad_crash_memory_ranges *new_array;
+ struct fadump_memory_range *new_array;
u64 new_size;
- new_size = crash_memory_ranges_size + PAGE_SIZE;
- pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
- new_size);
+ new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
+ pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
+ new_size, mrange_info->name);
- new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
+ new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
if (new_array == NULL) {
- pr_err("Insufficient memory for setting up crash memory ranges\n");
- free_crash_memory_ranges();
+ pr_err("Insufficient memory for setting up %s memory ranges\n",
+ mrange_info->name);
+ fadump_free_mem_ranges(mrange_info);
return -ENOMEM;
}
- crash_memory_ranges = new_array;
- crash_memory_ranges_size = new_size;
- max_crash_mem_ranges = (new_size /
- sizeof(struct fad_crash_memory_ranges));
+ mrange_info->mem_ranges = new_array;
+ mrange_info->mem_ranges_sz = new_size;
+ mrange_info->max_mem_ranges = (new_size /
+ sizeof(struct fadump_memory_range));
return 0;
}
-static inline int fadump_add_crash_memory(unsigned long long base,
- unsigned long long end)
+static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+ u64 base, u64 end)
{
- u64 start, size;
+ struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
bool is_adjacent = false;
+ u64 start, size;
if (base == end)
return 0;
@@ -1022,38 +774,41 @@ static inline int fadump_add_crash_memory(unsigned long long base,
* Fold adjacent memory ranges to bring down the memory ranges/
* PT_LOAD segments count.
*/
- if (crash_mem_ranges) {
- start = crash_memory_ranges[crash_mem_ranges - 1].base;
- size = crash_memory_ranges[crash_mem_ranges - 1].size;
+ if (mrange_info->mem_range_cnt) {
+ start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
+ size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
if ((start + size) == base)
is_adjacent = true;
}
if (!is_adjacent) {
/* resize the array on reaching the limit */
- if (crash_mem_ranges == max_crash_mem_ranges) {
+ if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
int ret;
- ret = allocate_crash_memory_ranges();
+ ret = fadump_alloc_mem_ranges(mrange_info);
if (ret)
return ret;
+
+ /* Update to the new resized array */
+ mem_ranges = mrange_info->mem_ranges;
}
start = base;
- crash_memory_ranges[crash_mem_ranges].base = start;
- crash_mem_ranges++;
+ mem_ranges[mrange_info->mem_range_cnt].base = start;
+ mrange_info->mem_range_cnt++;
}
- crash_memory_ranges[crash_mem_ranges - 1].size = (end - start);
- pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
- (crash_mem_ranges - 1), start, end - 1, (end - start));
+ mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
+ pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+ mrange_info->name, (mrange_info->mem_range_cnt - 1),
+ start, end - 1, (end - start));
return 0;
}
-static int fadump_exclude_reserved_area(unsigned long long start,
- unsigned long long end)
+static int fadump_exclude_reserved_area(u64 start, u64 end)
{
- unsigned long long ra_start, ra_end;
+ u64 ra_start, ra_end;
int ret = 0;
ra_start = fw_dump.reserve_dump_area_start;
@@ -1061,18 +816,22 @@ static int fadump_exclude_reserved_area(unsigned long long start,
if ((ra_start < end) && (ra_end > start)) {
if ((start < ra_start) && (end > ra_end)) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
if (ret)
return ret;
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
} else if (start < ra_start) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
} else if (ra_end < end) {
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
}
} else
- ret = fadump_add_crash_memory(start, end);
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
return ret;
}
@@ -1117,36 +876,36 @@ static int fadump_init_elfcore_header(char *bufp)
static int fadump_setup_crash_memory_ranges(void)
{
struct memblock_region *reg;
- unsigned long long start, end;
- int ret;
+ u64 start, end;
+ int i, ret;
pr_debug("Setup crash memory ranges.\n");
- crash_mem_ranges = 0;
+ crash_mrange_info.mem_range_cnt = 0;
/*
- * add the first memory chunk (RMA_START through boot_memory_size) as
- * a separate memory chunk. The reason is, at the time crash firmware
- * will move the content of this memory chunk to different location
- * specified during fadump registration. We need to create a separate
- * program header for this chunk with the correct offset.
+ * Boot memory region(s) registered with firmware are moved to
+ * different location at the time of crash. Create separate program
+ * header(s) for this memory chunk(s) with the correct offset.
*/
- ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
- if (ret)
- return ret;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ start = fw_dump.boot_mem_addr[i];
+ end = start + fw_dump.boot_mem_sz[i];
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
+ if (ret)
+ return ret;
+ }
for_each_memblock(memory, reg) {
- start = (unsigned long long)reg->base;
- end = start + (unsigned long long)reg->size;
+ start = (u64)reg->base;
+ end = start + (u64)reg->size;
/*
- * skip the first memory chunk that is already added (RMA_START
- * through boot_memory_size). This logic needs a relook if and
- * when RMA_START changes to a non-zero value.
+ * skip the memory chunk that is already added
+ * (0 through boot_memory_top).
*/
- BUILD_BUG_ON(RMA_START != 0);
- if (start < fw_dump.boot_memory_size) {
- if (end > fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+ if (start < fw_dump.boot_mem_top) {
+ if (end > fw_dump.boot_mem_top)
+ start = fw_dump.boot_mem_top;
else
continue;
}
@@ -1167,17 +926,35 @@ static int fadump_setup_crash_memory_ranges(void)
*/
static inline unsigned long fadump_relocate(unsigned long paddr)
{
- if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
- return be64_to_cpu(fdm.rmr_region.destination_address) + paddr;
- else
- return paddr;
+ unsigned long raddr, rstart, rend, rlast, hole_size;
+ int i;
+
+ hole_size = 0;
+ rlast = 0;
+ raddr = paddr;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ rstart = fw_dump.boot_mem_addr[i];
+ rend = rstart + fw_dump.boot_mem_sz[i];
+ hole_size += (rstart - rlast);
+
+ if (paddr >= rstart && paddr < rend) {
+ raddr += fw_dump.boot_mem_dest_addr - hole_size;
+ break;
+ }
+
+ rlast = rend;
+ }
+
+ pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
+ return raddr;
}
static int fadump_create_elfcore_headers(char *bufp)
{
- struct elfhdr *elf;
+ unsigned long long raddr, offset;
struct elf_phdr *phdr;
- int i;
+ struct elfhdr *elf;
+ int i, j;
fadump_init_elfcore_header(bufp);
elf = (struct elfhdr *)bufp;
@@ -1220,12 +997,14 @@ static int fadump_create_elfcore_headers(char *bufp)
(elf->e_phnum)++;
/* setup PT_LOAD sections. */
-
- for (i = 0; i < crash_mem_ranges; i++) {
- unsigned long long mbase, msize;
- mbase = crash_memory_ranges[i].base;
- msize = crash_memory_ranges[i].size;
-
+ j = 0;
+ offset = 0;
+ raddr = fw_dump.boot_mem_addr[0];
+ for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
+ u64 mbase, msize;
+
+ mbase = crash_mrange_info.mem_ranges[i].base;
+ msize = crash_mrange_info.mem_ranges[i].size;
if (!msize)
continue;
@@ -1235,13 +1014,17 @@ static int fadump_create_elfcore_headers(char *bufp)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = mbase;
- if (mbase == RMA_START) {
+ if (mbase == raddr) {
/*
- * The entire RMA region will be moved by firmware
- * to the specified destination_address. Hence set
- * the correct offset.
+ * The entire real memory region will be moved by
+ * firmware to the specified destination_address.
+ * Hence set the correct offset.
*/
- phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address);
+ phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
+ if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
+ offset += fw_dump.boot_mem_sz[j];
+ raddr = fw_dump.boot_mem_addr[++j];
+ }
}
phdr->p_paddr = mbase;
@@ -1263,7 +1046,6 @@ static unsigned long init_fadump_header(unsigned long addr)
if (!addr)
return 0;
- fw_dump.fadumphdr_addr = addr;
fdh = __va(addr);
addr += sizeof(struct fadump_crash_info_header);
@@ -1271,7 +1053,7 @@ static unsigned long init_fadump_header(unsigned long addr)
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
fdh->elfcorehdr_addr = addr;
/* We will set the crashing cpu id in crash_fadump() during crash. */
- fdh->crashing_cpu = CPU_UNKNOWN;
+ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
return addr;
}
@@ -1293,7 +1075,8 @@ static int register_fadump(void)
if (ret)
return ret;
- addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
+ addr = fw_dump.fadumphdr_addr;
+
/* Initialize fadump crash info header. */
addr = init_fadump_header(addr);
vaddr = __va(addr);
@@ -1302,74 +1085,27 @@ static int register_fadump(void)
fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- return register_fw_dump(&fdm);
-}
-
-static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Un-register firmware-assisted dump\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_UNREGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- printk(KERN_ERR "Failed to un-register firmware-assisted dump."
- " unexpected error(%d).\n", rc);
- return rc;
- }
- fw_dump.dump_registered = 0;
- return 0;
-}
-
-static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Invalidating firmware-assisted dump registration\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_INVALIDATE, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
- return rc;
- }
- fw_dump.dump_active = 0;
- fdm_active = NULL;
- return 0;
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+ return fw_dump.ops->fadump_register(&fw_dump);
}
void fadump_cleanup(void)
{
+ if (!fw_dump.fadump_supported)
+ return;
+
/* Invalidate the registration only if dump is active. */
if (fw_dump.dump_active) {
- /* pass the same memory dump structure provided by platform */
- fadump_invalidate_dump(fdm_active);
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+ fw_dump.ops->fadump_invalidate(&fw_dump);
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
- fadump_unregister_dump(&fdm);
- free_crash_memory_ranges();
+ fw_dump.ops->fadump_unregister(&fw_dump);
+ fadump_free_mem_ranges(&crash_mrange_info);
}
+
+ if (fw_dump.ops->fadump_cleanup)
+ fw_dump.ops->fadump_cleanup(&fw_dump);
}
static void fadump_free_reserved_memory(unsigned long start_pfn,
@@ -1394,90 +1130,197 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
/*
* Skip memory holes and free memory that was actually reserved.
*/
-static void fadump_release_reserved_area(unsigned long start, unsigned long end)
+static void fadump_release_reserved_area(u64 start, u64 end)
{
+ u64 tstart, tend, spfn, epfn;
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(start);
- unsigned long end_pfn = PHYS_PFN(end);
+ spfn = PHYS_PFN(start);
+ epfn = PHYS_PFN(end);
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
+ tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
- if (tend == end_pfn)
+ if (tend == epfn)
break;
- start_pfn = tend + 1;
+ spfn = tend;
+ }
+ }
+}
+
+/*
+ * Sort the mem ranges in-place and merge adjacent ranges
+ * to minimize the memory ranges count.
+ */
+static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
+{
+ struct fadump_memory_range *mem_ranges;
+ struct fadump_memory_range tmp_range;
+ u64 base, size;
+ int i, j, idx;
+
+ if (!reserved_mrange_info.mem_range_cnt)
+ return;
+
+ /* Sort the memory ranges */
+ mem_ranges = mrange_info->mem_ranges;
+ for (i = 0; i < mrange_info->mem_range_cnt; i++) {
+ idx = i;
+ for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
+ if (mem_ranges[idx].base > mem_ranges[j].base)
+ idx = j;
+ }
+ if (idx != i) {
+ tmp_range = mem_ranges[idx];
+ mem_ranges[idx] = mem_ranges[i];
+ mem_ranges[i] = tmp_range;
+ }
+ }
+
+ /* Merge adjacent reserved ranges */
+ idx = 0;
+ for (i = 1; i < mrange_info->mem_range_cnt; i++) {
+ base = mem_ranges[i-1].base;
+ size = mem_ranges[i-1].size;
+ if (mem_ranges[i].base == (base + size))
+ mem_ranges[idx].size += mem_ranges[i].size;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ mem_ranges[idx] = mem_ranges[i];
}
}
+ mrange_info->mem_range_cnt = idx + 1;
}
/*
- * Release the memory that was reserved in early boot to preserve the memory
- * contents. The released memory will be available for general use.
+ * Scan reserved-ranges to consider them while reserving/releasing
+ * memory for FADump.
*/
-static void fadump_release_memory(unsigned long begin, unsigned long end)
+static inline int fadump_scan_reserved_mem_ranges(void)
{
- unsigned long ra_start, ra_end;
+ struct device_node *root;
+ const __be32 *prop;
+ int len, ret = -1;
+ unsigned long i;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return ret;
+
+ prop = of_get_property(root, "reserved-ranges", &len);
+ if (!prop)
+ return ret;
+
+ /*
+ * Each reserved range is an (address,size) pair, 2 cells each,
+ * totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ size = of_read_number(prop + (i * 4) + 2, 2);
+
+ if (size) {
+ ret = fadump_add_mem_range(&reserved_mrange_info,
+ base, base + size);
+ if (ret < 0) {
+ pr_warn("some reserved ranges are ignored!\n");
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Release the memory that was reserved during early boot to preserve the
+ * crash'ed kernel's memory contents except reserved dump area (permanent
+ * reservation) and reserved ranges used by F/W. The released memory will
+ * be available for general use.
+ */
+static void fadump_release_memory(u64 begin, u64 end)
+{
+ u64 ra_start, ra_end, tstart;
+ int i, ret;
+
+ fadump_scan_reserved_mem_ranges();
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
/*
- * exclude the dump reserve area. Will reuse it for next
- * fadump registration.
+ * Add reserved dump area to reserved ranges list
+ * and exclude all these ranges while releasing memory.
*/
- if (begin < ra_end && end > ra_start) {
- if (begin < ra_start)
- fadump_release_reserved_area(begin, ra_start);
- if (end > ra_end)
- fadump_release_reserved_area(ra_end, end);
- } else
- fadump_release_reserved_area(begin, end);
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+ if (ret != 0) {
+ /*
+ * Not enough memory to setup reserved ranges but the system is
+ * running shortage of memory. So, release all the memory except
+ * Reserved dump area (reused for next fadump registration).
+ */
+ if (begin < ra_end && end > ra_start) {
+ if (begin < ra_start)
+ fadump_release_reserved_area(begin, ra_start);
+ if (end > ra_end)
+ fadump_release_reserved_area(ra_end, end);
+ } else
+ fadump_release_reserved_area(begin, end);
+
+ return;
+ }
+
+ /* Get the reserved ranges list in order first. */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
+
+ /* Exclude reserved ranges and release remaining memory */
+ tstart = begin;
+ for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
+ ra_start = reserved_mrange_info.mem_ranges[i].base;
+ ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
+
+ if (tstart >= ra_end)
+ continue;
+
+ if (tstart < ra_start)
+ fadump_release_reserved_area(tstart, ra_start);
+ tstart = ra_end;
+ }
+
+ if (tstart < end)
+ fadump_release_reserved_area(tstart, end);
}
static void fadump_invalidate_release_mem(void)
{
- unsigned long reserved_area_start, reserved_area_end;
- unsigned long destination_address;
-
mutex_lock(&fadump_mutex);
if (!fw_dump.dump_active) {
mutex_unlock(&fadump_mutex);
return;
}
- destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
fadump_cleanup();
mutex_unlock(&fadump_mutex);
+ fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
+ fadump_free_cpu_notes_buf();
+
/*
- * Save the current reserved memory bounds we will require them
- * later for releasing the memory for general use.
- */
- reserved_area_start = fw_dump.reserve_dump_area_start;
- reserved_area_end = reserved_area_start +
- fw_dump.reserve_dump_area_size;
- /*
- * Setup reserve_dump_area_start and its size so that we can
- * reuse this reserved memory for Re-registration.
+ * Setup kernel metadata and initialize the kernel dump
+ * memory structure for FADump re-registration.
*/
- fw_dump.reserve_dump_area_start = destination_address;
- fw_dump.reserve_dump_area_size = get_fadump_area_size();
-
- fadump_release_memory(reserved_area_start, reserved_area_end);
- if (fw_dump.cpu_notes_buf) {
- fadump_cpu_notes_buf_free(
- (unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- }
- /* Initialize the kernel dump memory structure for FAD registration. */
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ pr_warn("Failed to setup kernel metadata!\n");
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
static ssize_t fadump_release_memory_store(struct kobject *kobj,
@@ -1528,7 +1371,7 @@ static ssize_t fadump_register_store(struct kobject *kobj,
int ret = 0;
int input = -1;
- if (!fw_dump.fadump_enabled || fdm_active)
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
return -EPERM;
if (kstrtoint(buf, 0, &input))
@@ -1541,13 +1384,15 @@ static ssize_t fadump_register_store(struct kobject *kobj,
if (fw_dump.dump_registered == 0) {
goto unlock_out;
}
+
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ pr_debug("Un-register firmware-assisted dump\n");
+ fw_dump.ops->fadump_unregister(&fw_dump);
break;
case 1:
if (fw_dump.dump_registered == 1) {
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ fw_dump.ops->fadump_unregister(&fw_dump);
}
/* Register Firmware-assisted dump */
ret = register_fadump();
@@ -1564,62 +1409,12 @@ unlock_out:
static int fadump_region_show(struct seq_file *m, void *private)
{
- const struct fadump_mem_struct *fdm_ptr;
-
if (!fw_dump.fadump_enabled)
return 0;
mutex_lock(&fadump_mutex);
- if (fdm_active)
- fdm_ptr = fdm_active;
- else {
- mutex_unlock(&fadump_mutex);
- fdm_ptr = &fdm;
- }
-
- seq_printf(m,
- "CPU : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
- be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
- seq_printf(m,
- "HPTE: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->hpte_region.destination_address),
- be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
- be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->hpte_region.source_len),
- be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
- seq_printf(m,
- "DUMP: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->rmr_region.destination_address),
- be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
- be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->rmr_region.source_len),
- be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
-
- if (!fdm_active ||
- (fw_dump.reserve_dump_area_start ==
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
- goto out;
-
- /* Dump is active. Show reserved memory region. */
- seq_printf(m,
- " : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- (unsigned long long)fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start);
-out:
- if (fdm_active)
- mutex_unlock(&fadump_mutex);
+ fw_dump.ops->fadump_region_show(&fw_dump, m);
+ mutex_unlock(&fadump_mutex);
return 0;
}
@@ -1671,16 +1466,15 @@ static void fadump_init_files(void)
*/
int __init setup_fadump(void)
{
- if (!fw_dump.fadump_enabled)
- return 0;
-
- if (!fw_dump.fadump_supported) {
- printk(KERN_ERR "Firmware-assisted dump is not supported on"
- " this hardware\n");
+ if (!fw_dump.fadump_supported)
return 0;
- }
+ fadump_init_files();
fadump_show_config();
+
+ if (!fw_dump.fadump_enabled)
+ return 1;
+
/*
* If dump data is available then see if it is valid and prepare for
* saving it to the disk.
@@ -1690,14 +1484,75 @@ int __init setup_fadump(void)
* if dump process fails then invalidate the registration
* and release memory before proceeding for re-registration.
*/
- if (process_fadump(fdm_active) < 0)
+ if (fw_dump.ops->fadump_process(&fw_dump) < 0)
fadump_invalidate_release_mem();
}
/* Initialize the kernel dump memory structure for FAD registration. */
else if (fw_dump.reserve_dump_area_size)
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
- fadump_init_files();
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
return 1;
}
subsys_initcall(setup_fadump);
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
+ return 0;
+
+ opal_fadump_dt_scan(&fw_dump, node);
+ return 1;
+}
+
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * preserve crash data. The subsequent memory preserving kernel boot
+ * is likely to process this crash data.
+ */
+int __init fadump_reserve_mem(void)
+{
+ if (fw_dump.dump_active) {
+ /*
+ * If last boot has crashed then reserve all the memory
+ * above boot memory to preserve crash data.
+ */
+ pr_info("Preserving crash data for processing in next boot.\n");
+ fadump_reserve_crash_area(fw_dump.boot_mem_top);
+ } else
+ pr_debug("FADump-aware kernel..\n");
+
+ return 1;
+}
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(u64 base)
+{
+ struct memblock_region *reg;
+ u64 mstart, msize;
+
+ for_each_memblock(memory, reg) {
+ mstart = reg->base;
+ msize = reg->size;
+
+ if ((mstart + msize) < base)
+ continue;
+
+ if (mstart < base) {
+ msize -= (base - mstart);
+ mstart = base;
+ }
+
+ pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
+ (msize >> 20), mstart);
+ memblock_reserve(mstart, msize);
+ }
+}
+
+unsigned long __init arch_reserved_kernel_pages(void)
+{
+ return memblock_reserved_size() / PAGE_SIZE;
+}
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 0bb991ddd264..3235a8da6af7 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -94,6 +94,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
/* enable use of FP after return */
#ifdef CONFIG_PPC32
mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+#ifdef CONFIG_VMAP_STACK
+ tovirt(r5, r5)
+#endif
lwz r4,THREAD_FPEXC_MODE(r5)
ori r9,r9,MSR_FP /* enable FP for current */
or r9,r9,r4
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index ea065282b303..8bccce6544b5 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -153,35 +153,24 @@ skpinv: addi r6,r6,1 /* Increment */
tlbivax 0,r9
TLBSYNC
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED MAS2_M
-#else
-#define M_IF_NEEDED 0
-#endif
-
#if defined(ENTRY_MAPPING_BOOT_SETUP)
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
+/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */
lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
mtspr SPRN_MAS0,r6
lis r6,(MAS1_VALID|MAS1_IPROT)@h
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
mtspr SPRN_MAS1,r6
- lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h
- ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l
+ lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+ ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+ and r6,r6,r20
+ ori r6,r6,MAS2_M_IF_NEEDED@l
mtspr SPRN_MAS2,r6
mtspr SPRN_MAS3,r8
tlbwe
-/* 7. Jump to KERNELBASE mapping */
- lis r6,(KERNELBASE & ~0xfff)@h
- ori r6,r6,(KERNELBASE & ~0xfff)@l
- rlwinm r7,r25,0,0x03ffffff
- add r6,r7,r6
+/* 7. Jump to kernstart_virt_addr mapping */
+ mr r6,r20
#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
/*
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index f255e22184b4..0493fcac6409 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -34,7 +34,16 @@
#include "head_32.h"
-/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+/* 601 only have IBAT */
+#ifdef CONFIG_PPC_BOOK3S_601
+#define LOAD_BAT(n, reg, RA, RB) \
+ li RA,0; \
+ mtspr SPRN_IBAT##n##U,RA; \
+ lwz RA,(n*16)+0(reg); \
+ lwz RB,(n*16)+4(reg); \
+ mtspr SPRN_IBAT##n##U,RA; \
+ mtspr SPRN_IBAT##n##L,RB
+#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -44,12 +53,11 @@
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB; \
- beq 1f; \
lwz RA,(n*16)+8(reg); \
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
- mtspr SPRN_DBAT##n##L,RB; \
-1:
+ mtspr SPRN_DBAT##n##L,RB
+#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
@@ -264,16 +272,21 @@ __secondary_hold_acknowledge:
*/
. = 0x200
DO_KVM 0x200
- mtspr SPRN_SPRG_SCRATCH0,r10
- mtspr SPRN_SPRG_SCRATCH1,r11
- mfcr r10
+MachineCheck:
+ EXCEPTION_PROLOG_0
+#ifdef CONFIG_VMAP_STACK
+ li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r11
+ isync
+#endif
#ifdef CONFIG_PPC_CHRP
mfspr r11, SPRN_SPRG_THREAD
+ tovirt_vmstack r11, r11
lwz r11, RTAS_SP(r11)
cmpwi cr1, r11, 0
bne cr1, 7f
#endif /* CONFIG_PPC_CHRP */
- EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_1 for_rtas=1
7: EXCEPTION_PROLOG_2
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_CHRP
@@ -288,24 +301,21 @@ __secondary_hold_acknowledge:
. = 0x300
DO_KVM 0x300
DataAccess:
- EXCEPTION_PROLOG
- mfspr r10,SPRN_DSISR
- stw r10,_DSISR(r11)
+ EXCEPTION_PROLOG handle_dar_dsisr=1
+ get_and_save_dar_dsisr_on_stack r4, r5, r11
+BEGIN_MMU_FTR_SECTION
#ifdef CONFIG_PPC_KUAP
- andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+ andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
#else
- andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
+ andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
#endif
- bne 1f /* if not, try to put a PTE */
- mfspr r4,SPRN_DAR /* into the hash table */
- rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
-BEGIN_MMU_FTR_SECTION
+ bne handle_page_fault_tramp_2 /* if not, try to put a PTE */
+ rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */
bl hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-1: lwz r5,_DSISR(r11) /* get DSISR value */
- mfspr r4,SPRN_DAR
- EXC_XFER_LITE(0x300, handle_page_fault)
-
+ b handle_page_fault_tramp_1
+FTR_SECTION_ELSE
+ b handle_page_fault_tramp_2
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
/* Instruction access exception. */
. = 0x400
@@ -321,6 +331,7 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
1: mr r4,r12
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+ stw r4, _DAR(r11)
EXC_XFER_LITE(0x400, handle_page_fault)
/* External interrupt */
@@ -330,11 +341,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
. = 0x600
DO_KVM 0x600
Alignment:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
+ EXCEPTION_PROLOG handle_dar_dsisr=1
+ save_dar_dsisr_on_stack r4, r5, r11
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x600, alignment_exception)
@@ -557,9 +565,9 @@ DataStoreTLBMiss:
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
- li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
#else
- li r1, _PAGE_RW | _PAGE_PRESENT
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
#endif
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
@@ -637,6 +645,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
. = 0x3000
+handle_page_fault_tramp_1:
+ lwz r4, _DAR(r11)
+ lwz r5, _DSISR(r11)
+ /* fall through */
+handle_page_fault_tramp_2:
+ EXC_XFER_LITE(0x300, handle_page_fault)
+
+stack_overflow:
+ vmap_stack_overflow_exception
+
AltiVecUnavailable:
EXCEPTION_PROLOG
#ifdef CONFIG_ALTIVEC
@@ -820,9 +838,6 @@ load_up_mmu:
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
- mfpvr r3
- srwi r3,r3,16
- cmpwi r3,1
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -897,9 +912,11 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
+#ifdef CONFIG_KASAN
BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
/*
* Go back to running unmapped so we can load up new values
@@ -910,6 +927,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
ori r4,r4,2f@l
tophys(r4,r4)
li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+
+ .align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
SYNC
@@ -996,11 +1015,8 @@ EXPORT_SYMBOL(switch_mmu_context)
*/
clear_bats:
li r10,0
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi r9, 1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1009,7 +1025,7 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-1:
+#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1054,6 +1070,8 @@ _ENTRY(update_bats)
rlwinm r0, r6, 0, ~MSR_RI
rlwinm r0, r0, 0, ~MSR_EE
mtmsr r0
+
+ .align 4
mtspr SPRN_SRR0, r4
mtspr SPRN_SRR1, r3
SYNC
@@ -1093,6 +1111,8 @@ mmu_off:
andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */
beqlr
andc r3,r3,r0
+
+ .align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
sync
@@ -1104,10 +1124,7 @@ mmu_off:
*/
initial_bats:
lis r11,PAGE_OFFSET@h
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- bne 4f
+#ifdef CONFIG_PPC_BOOK3S_601
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
@@ -1120,10 +1137,8 @@ initial_bats:
addis r8,r8,0x800000@h
mtspr SPRN_IBAT2U,r11
mtspr SPRN_IBAT2L,r8
- isync
- blr
-
-4: tophys(r8,r11)
+#else
+ tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
#else
@@ -1135,10 +1150,10 @@ initial_bats:
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
+#endif
isync
blr
-
#ifdef CONFIG_BOOTX_TEXT
setup_disp_bat:
/*
@@ -1153,15 +1168,13 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
- blr
-1: mtspr SPRN_IBAT3L,r8
+#else
+ mtspr SPRN_IBAT3L,r8
mtspr SPRN_IBAT3U,r11
+#endif
blr
#endif /* CONFIG_BOOTX_TEXT */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..a6a5fbbf8504 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -5,46 +5,65 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
/*
- * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
- */
-.macro __LOAD_MSR_KERNEL r, x
-.if \x >= 0x8000
- lis \r, (\x)@h
- ori \r, \r, (\x)@l
-.else
- li \r, (\x)
-.endif
-.endm
-#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
-
-/*
* Exception entry code. This code runs with address translation
* turned off, i.e. using physical addresses.
* We assume sprg3 has the physical address of the current
* task's thread_struct.
*/
+.macro EXCEPTION_PROLOG handle_dar_dsisr=0
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 handle_dar_dsisr=\handle_dar_dsisr
+.endm
-.macro EXCEPTION_PROLOG
+.macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
mtspr SPRN_SPRG_SCRATCH0,r10
mtspr SPRN_SPRG_SCRATCH1,r11
+#ifdef CONFIG_VMAP_STACK
+ mfspr r10, SPRN_SPRG_THREAD
+ .if \handle_dar_dsisr
+ mfspr r11, SPRN_DAR
+ stw r11, DAR(r10)
+ mfspr r11, SPRN_DSISR
+ stw r11, DSISR(r10)
+ .endif
+ mfspr r11, SPRN_SRR0
+ stw r11, SRR0(r10)
+#endif
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+#ifdef CONFIG_VMAP_STACK
+ stw r11, SRR1(r10)
+#endif
mfcr r10
- EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
+ andi. r11, r11, MSR_PR
.endm
-.macro EXCEPTION_PROLOG_1
- mfspr r11,SPRN_SRR1 /* check whether user or kernel */
- andi. r11,r11,MSR_PR
+.macro EXCEPTION_PROLOG_1 for_rtas=0
+#ifdef CONFIG_VMAP_STACK
+ .ifeq \for_rtas
+ li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r11
+ isync
+ .endif
+ subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */
+#else
tophys(r11,r1) /* use tophys(r1) if kernel */
+ subi r11, r11, INT_FRAME_SIZE /* alloc exc. frame */
+#endif
beq 1f
mfspr r11,SPRN_SPRG_THREAD
+ tovirt_vmstack r11, r11
lwz r11,TASK_STACK-THREAD(r11)
- addi r11,r11,THREAD_SIZE
- tophys(r11,r11)
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+ addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+ tophys_novmstack r11, r11
+1:
+#ifdef CONFIG_VMAP_STACK
+ mtcrf 0x7f, r11
+ bt 32 - THREAD_ALIGN_SHIFT, stack_overflow
+#endif
.endm
-.macro EXCEPTION_PROLOG_2
+.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
stw r10,_CCR(r11) /* save registers */
stw r12,GPR12(r11)
stw r9,GPR9(r11)
@@ -54,16 +73,33 @@
stw r12,GPR11(r11)
mflr r10
stw r10,_LINK(r11)
+#ifdef CONFIG_VMAP_STACK
+ mfspr r12, SPRN_SPRG_THREAD
+ tovirt(r12, r12)
+ .if \handle_dar_dsisr
+ lwz r10, DAR(r12)
+ stw r10, _DAR(r11)
+ lwz r10, DSISR(r12)
+ stw r10, _DSISR(r11)
+ .endif
+ lwz r9, SRR1(r12)
+ lwz r12, SRR0(r12)
+#else
mfspr r12,SPRN_SRR0
mfspr r9,SPRN_SRR1
+#endif
stw r1,GPR1(r11)
stw r1,0(r11)
- tovirt(r1,r11) /* set new kernel sp */
+ tovirt_novmstack r1, r11 /* set new kernel sp */
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
+#ifdef CONFIG_VMAP_STACK
+ li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */
+#else
li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
- MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ mtmsr r10 /* (except for mach check in rtas) */
#endif
stw r0,GPR0(r11)
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -75,25 +111,46 @@
.macro SYSCALL_ENTRY trapno
mfspr r12,SPRN_SPRG_THREAD
+#ifdef CONFIG_VMAP_STACK
+ mfspr r9, SPRN_SRR0
+ mfspr r11, SPRN_SRR1
+ stw r9, SRR0(r12)
+ stw r11, SRR1(r12)
+#endif
mfcr r10
lwz r11,TASK_STACK-THREAD(r12)
- mflr r9
- addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE
rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
- tophys(r11,r11)
+ addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+#ifdef CONFIG_VMAP_STACK
+ li r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r9
+ isync
+#endif
+ tovirt_vmstack r12, r12
+ tophys_novmstack r11, r11
+ mflr r9
stw r10,_CCR(r11) /* save registers */
+ stw r9, _LINK(r11)
+#ifdef CONFIG_VMAP_STACK
+ lwz r10, SRR0(r12)
+ lwz r9, SRR1(r12)
+#else
mfspr r10,SPRN_SRR0
- stw r9,_LINK(r11)
mfspr r9,SPRN_SRR1
+#endif
stw r1,GPR1(r11)
stw r1,0(r11)
- tovirt(r1,r11) /* set new kernel sp */
+ tovirt_novmstack r1, r11 /* set new kernel sp */
stw r10,_NIP(r11)
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
- MTMSRD(r10) /* (except for mach check in rtas) */
+#ifdef CONFIG_VMAP_STACK
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */
+#else
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+#endif
+ mtmsr r10 /* (except for mach check in rtas) */
#endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
stw r2,GPR2(r11)
@@ -131,7 +188,7 @@
#endif
3:
- tovirt(r2, r2) /* set r2 to current */
+ tovirt_novmstack r2, r2 /* set r2 to current */
lis r11, transfer_to_syscall@h
ori r11, r11, transfer_to_syscall@l
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -140,10 +197,10 @@
* otherwise we might risk taking an interrupt before we tell lockdep
* they are enabled.
*/
- LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
rlwimi r10, r9, 0, MSR_EE
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
#endif
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -154,6 +211,54 @@
RFI /* jump to handler, enable MMU */
.endm
+.macro save_dar_dsisr_on_stack reg1, reg2, sp
+#ifndef CONFIG_VMAP_STACK
+ mfspr \reg1, SPRN_DAR
+ mfspr \reg2, SPRN_DSISR
+ stw \reg1, _DAR(\sp)
+ stw \reg2, _DSISR(\sp)
+#endif
+.endm
+
+.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp
+#ifdef CONFIG_VMAP_STACK
+ lwz \reg1, _DAR(\sp)
+ lwz \reg2, _DSISR(\sp)
+#else
+ save_dar_dsisr_on_stack \reg1, \reg2, \sp
+#endif
+.endm
+
+.macro tovirt_vmstack dst, src
+#ifdef CONFIG_VMAP_STACK
+ tovirt(\dst, \src)
+#else
+ .ifnc \dst, \src
+ mr \dst, \src
+ .endif
+#endif
+.endm
+
+.macro tovirt_novmstack dst, src
+#ifndef CONFIG_VMAP_STACK
+ tovirt(\dst, \src)
+#else
+ .ifnc \dst, \src
+ mr \dst, \src
+ .endif
+#endif
+.endm
+
+.macro tophys_novmstack dst, src
+#ifndef CONFIG_VMAP_STACK
+ tophys(\dst, \src)
+#else
+ .ifnc \dst, \src
+ mr \dst, \src
+ .endif
+#endif
+.endm
+
/*
* Note: code which follows this uses cr0.eq (set if from kernel),
* r11, r12 (SRR0), and r9 (SRR1).
@@ -187,7 +292,7 @@ label:
#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
- LOAD_MSR_KERNEL(r10, msr); \
+ LOAD_REG_IMMEDIATE(r10, msr); \
bl tfer; \
.long hdlr; \
.long ret
@@ -200,4 +305,28 @@ label:
EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
ret_from_except)
+.macro vmap_stack_overflow_exception
+#ifdef CONFIG_VMAP_STACK
+#ifdef CONFIG_SMP
+ mfspr r11, SPRN_SPRG_THREAD
+ tovirt(r11, r11)
+ lwz r11, TASK_CPU - THREAD(r11)
+ slwi r11, r11, 3
+ addis r11, r11, emergency_ctx@ha
+#else
+ lis r11, emergency_ctx@ha
+#endif
+ lwz r11, emergency_ctx@l(r11)
+ cmpwi cr1, r11, 0
+ bne cr1, 1f
+ lis r11, init_thread_union@ha
+ addi r11, r11, init_thread_union@l
+1: addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+ EXCEPTION_PROLOG_2
+ SAVE_NVGPRS(r11)
+ addi r3, r1, STACK_FRAME_OVERHEAD
+ EXC_XFER_STD(0, stack_overflow_exception)
+#endif
+.endm
+
#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 585ea1976550..9bb663977e84 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -313,6 +313,7 @@ _ENTRY(saved_ksp_limit)
START_EXCEPTION(0x0400, InstructionAccess)
EXCEPTION_PROLOG
mr r4,r12 /* Pass SRR0 as arg2 */
+ stw r4, _DEAR(r11)
li r5,0 /* Pass zero as arg3 */
EXC_XFER_LITE(0x400, handle_page_fault)
@@ -676,6 +677,7 @@ DataAccess:
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ stw r4, _DEAR(r11)
EXC_XFER_LITE(0x300, handle_page_fault)
/* Other PowerPC processors, namely those derived from the 6xx-series
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 91d297e696dd..ad79fddb974d 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -182,7 +182,8 @@ __secondary_hold:
isync
bctr
#else
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
#endif
CLOSE_FIXED_SECTION(first_256B)
@@ -635,7 +636,7 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+ LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
@@ -998,7 +999,8 @@ start_here_common:
bl start_kernel
/* Not reached */
- BUG_OPCODE
+ trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
/*
* We put a few things here that have to be page-aligned.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5ab9178c2347..9922306ae512 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -15,6 +15,7 @@
*/
#include <linux/init.h>
+#include <linux/magic.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -126,56 +127,36 @@ instruction_counter:
/* Machine check */
. = 0x200
MachineCheck:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- li r5,RPN_PATTERN
- mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
+ EXCEPTION_PROLOG handle_dar_dsisr=1
+ save_dar_dsisr_on_stack r4, r5, r11
+ li r6, RPN_PATTERN
+ mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x200, machine_check_exception)
-/* Data access exception.
- * This is "never generated" by the MPC8xx.
- */
- . = 0x300
-DataAccess:
-
-/* Instruction access exception.
- * This is "never generated" by the MPC8xx.
- */
- . = 0x400
-InstructionAccess:
-
/* External interrupt */
EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
/* Alignment exception */
. = 0x600
Alignment:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- li r5,RPN_PATTERN
- mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
+ EXCEPTION_PROLOG handle_dar_dsisr=1
+ save_dar_dsisr_on_stack r4, r5, r11
+ li r6, RPN_PATTERN
+ mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x600, alignment_exception)
+ b .Lalignment_exception_ool
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
-/* No FPU on MPC8xx. This exception is not supposed to happen.
-*/
- EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD)
-
/* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
+ /* With VMAP_STACK there's not enough room for this at 0x600 */
+ . = 0xa00
+.Lalignment_exception_ool:
+ EXC_XFER_STD(0x600, alignment_exception)
/* System call */
. = 0xc00
@@ -184,25 +165,12 @@ SystemCall:
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD)
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
-/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
-#ifdef CONFIG_PERF_EVENTS
- patch_site 0f, patch__dtlbmiss_perf
-0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- addi r10, r10, 1
- stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- rfi
-#endif
-
. = 0x1100
/*
* For the MPC8xx, this is a software tablewalk to load the instruction
@@ -342,8 +310,8 @@ ITLBMissLinear:
. = 0x1200
DataStoreTLBMiss:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
+ mtspr SPRN_DAR, r10
+ mtspr SPRN_M_TW, r11
mfcr r11
/* If we are faulting a kernel address, we have to use the
@@ -408,10 +376,10 @@ DataStoreTLBMiss:
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
/* Restore registers */
- mtspr SPRN_DAR, r11 /* Tag DAR */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+0: mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__dtlbmiss_exit_1
@@ -427,10 +395,10 @@ DTLBMissIMMR:
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+0: mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__dtlbmiss_exit_2
@@ -464,10 +432,10 @@ DTLBMissLinear:
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+0: mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__dtlbmiss_exit_3
@@ -485,6 +453,7 @@ InstructionTLBError:
tlbie r4
/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
.Litlbie:
+ stw r4, _DAR(r11)
EXC_XFER_LITE(0x400, handle_page_fault)
/* This is the data TLB error on the MPC8xx. This could be due to
@@ -493,58 +462,69 @@ InstructionTLBError:
*/
. = 0x1400
DataTLBError:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
- mfcr r10
-
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr r11, SPRN_DAR
- cmpwi cr0, r11, RPN_PATTERN
- beq- FixupDAR /* must be a buggy dcbX, icbi insn. */
+ cmpwi cr1, r11, RPN_PATTERN
+ beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */
DARFixed:/* Return from dcbx instruction bug workaround */
+#ifdef CONFIG_VMAP_STACK
+ li r11, RPN_PATTERN
+ mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */
+#endif
EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- mfspr r4,SPRN_DAR
+ EXCEPTION_PROLOG_2 handle_dar_dsisr=1
+ get_and_save_dar_dsisr_on_stack r4, r5, r11
andis. r10,r5,DSISR_NOHPTE@h
beq+ .Ldtlbie
tlbie r4
.Ldtlbie:
+#ifndef CONFIG_VMAP_STACK
li r10,RPN_PATTERN
mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
+#endif
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
+/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
+ rfi
+#endif
+
+stack_overflow:
+ vmap_stack_overflow_exception
/* On the MPC8xx, these next four traps are used for development
* support of breakpoints and such. Someday I will get around to
* using them.
*/
- . = 0x1c00
-DataBreakpoint:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
- mfcr r10
- mfspr r11, SPRN_SRR0
- cmplwi cr0, r11, (.Ldtlbie - PAGE_OFFSET)@l
- cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
- beq- cr0, 11f
- beq- cr7, 11f
+do_databreakpoint:
EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
+ EXCEPTION_PROLOG_2 handle_dar_dsisr=1
addi r3,r1,STACK_FRAME_OVERHEAD
mfspr r4,SPRN_BAR
stw r4,_DAR(r11)
+#ifdef CONFIG_VMAP_STACK
+ lwz r5,_DSISR(r11)
+#else
mfspr r5,SPRN_DSISR
+#endif
EXC_XFER_STD(0x1c00, do_break)
-11:
+
+ . = 0x1c00
+DataBreakpoint:
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+ mfspr r11, SPRN_SRR0
+ cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
+ cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
+ cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq
+ bne cr1, do_databreakpoint
mtcr r10
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
@@ -574,17 +554,15 @@ InstructionBreakpoint:
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
*/
- /* define if you don't want to use self modifying code */
-#define NO_SELF_MODIFYING_CODE
FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_M_TW, r10
/* fetch instruction from memory. */
mfspr r10, SPRN_SRR0
mtspr SPRN_MD_EPN, r10
rlwinm r11, r10, 16, 0xfff8
- cmpli cr0, r11, PAGE_OFFSET@h
+ cmpli cr1, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TWB /* Get level 1 table */
- blt+ 3f
+ blt+ cr1, 3f
rlwinm r11, r10, 16, 0xfff8
0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
@@ -599,7 +577,7 @@ FixupDAR:/* Entry point for dcbx workaround. */
3:
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
mtspr SPRN_MD_TWC, r11
- mtcr r11
+ mtcrf 0x01, r11
mfspr r11, SPRN_MD_TWC
lwz r11, 0(r11) /* Get the pte */
bt 28,200f /* bit 28 = Large page (8M) */
@@ -612,16 +590,16 @@ FixupDAR:/* Entry point for dcbx workaround. */
* no need to include them here */
xoris r10, r11, 0x7c00 /* check if major OP code is 31 */
rlwinm r10, r10, 0, 21, 5
- cmpwi cr0, r10, 2028 /* Is dcbz? */
- beq+ 142f
- cmpwi cr0, r10, 940 /* Is dcbi? */
- beq+ 142f
- cmpwi cr0, r10, 108 /* Is dcbst? */
- beq+ 144f /* Fix up store bit! */
- cmpwi cr0, r10, 172 /* Is dcbf? */
- beq+ 142f
- cmpwi cr0, r10, 1964 /* Is icbi? */
- beq+ 142f
+ cmpwi cr1, r10, 2028 /* Is dcbz? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 940 /* Is dcbi? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 108 /* Is dcbst? */
+ beq+ cr1, 144f /* Fix up store bit! */
+ cmpwi cr1, r10, 172 /* Is dcbf? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 1964 /* Is icbi? */
+ beq+ cr1, 142f
141: mfspr r10,SPRN_M_TW
b DARFixed /* Nope, go back to normal TLB processing */
@@ -639,27 +617,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
142: /* continue, it was a dcbx, dcbi instruction. */
-#ifndef NO_SELF_MODIFYING_CODE
- andis. r10,r11,0x1f /* test if reg RA is r0 */
- li r10,modified_instr@l
- dcbtst r0,r10 /* touch for store */
- rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
- oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
- ori r11,r11,532
- stw r11,0(r10) /* store add/and instruction */
- dcbf 0,r10 /* flush new instr. to memory. */
- icbi 0,r10 /* invalidate instr. cache line */
- mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */
- mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */
- isync /* Wait until new instr is loaded from memory */
-modified_instr:
- .space 4 /* this is where the add instr. is stored */
- bne+ 143f
- subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
-143: mtdar r10 /* store faulting EA in DAR */
- mfspr r10,SPRN_M_TW
- b DARFixed /* Go back to normal TLB handling */
-#else
mfctr r10
mtdar r10 /* save ctr reg in DAR */
rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
@@ -701,8 +658,9 @@ modified_instr:
add r10, r10, r30 ;b 151f
add r10, r10, r31
151:
- rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */
- beq 152f /* if reg RA is zero, don't add it */
+ rlwinm r11,r11,19,24,28 /* offset into jump table for reg RA */
+ cmpwi cr1, r11, 0
+ beq cr1, 152f /* if reg RA is zero, don't add it */
addi r11, r11, 150b@l /* add start of table */
mtctr r11 /* load ctr with jump address */
rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */
@@ -710,7 +668,14 @@ modified_instr:
152:
mfdar r11
mtctr r11 /* restore ctr reg from DAR */
+#ifdef CONFIG_VMAP_STACK
+ mfspr r11, SPRN_SPRG_THREAD
+ stw r10, DAR(r11)
+ mfspr r10, SPRN_DSISR
+ stw r10, DSISR(r11)
+#else
mtdar r10 /* save fault EA to DAR */
+#endif
mfspr r10,SPRN_M_TW
b DARFixed /* Go back to normal TLB handling */
@@ -723,7 +688,6 @@ modified_instr:
add r10, r10, r11 /* add it */
mfctr r11 /* restore r11 */
b 151b
-#endif
/*
* This is where the main kernel code starts.
@@ -741,6 +705,9 @@ start_here:
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
+ lis r0, STACK_END_MAGIC@h
+ ori r0, r0, STACK_END_MAGIC@l
+ stw r0, 0(r1)
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 2ae635df9026..37fc84ed90e3 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -467,6 +467,7 @@ label:
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
+ stw r4, _DEAR(r11); \
EXC_XFER_LITE(0x0300, handle_page_fault)
#define INSTRUCTION_STORAGE_EXCEPTION \
@@ -475,6 +476,7 @@ label:
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mr r4,r12; /* Pass SRR0 as arg2 */ \
+ stw r4, _DEAR(r11); \
li r5,0; /* Pass zero as arg3 */ \
EXC_XFER_LITE(0x0400, handle_page_fault)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index adf0505dbe02..840af004041e 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -155,6 +155,8 @@ _ENTRY(_start);
*/
_ENTRY(__early_start)
+ LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr)
+ lwz r20,0(r20)
#define ENTRY_MAPPING_BOOT_SETUP
#include "fsl_booke_entry_mapping.S"
@@ -238,6 +240,9 @@ set_ivor:
bl early_init
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
#ifdef CONFIG_RELOCATABLE
mr r3,r30
mr r4,r31
@@ -264,9 +269,6 @@ set_ivor:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
-#ifdef CONFIG_KASAN
- bl kasan_early_init
-#endif
mr r3,r30
mr r4,r31
bl machine_init
@@ -277,8 +279,8 @@ set_ivor:
ori r6, r6, swapper_pg_dir@l
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- lis r4, KERNELBASE@h
- ori r4, r4, KERNELBASE@l
+ lis r3, kernstart_virt_addr@ha
+ lwz r4, kernstart_virt_addr@l(r3)
stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */
stw r6, 0(r5)
@@ -376,6 +378,7 @@ interrupt_base:
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
andis. r10,r5,(ESR_ILK|ESR_DLK)@h
bne 1f
+ stw r4, _DEAR(r11)
EXC_XFER_LITE(0x0300, handle_page_fault)
1:
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1067,7 +1070,12 @@ __secondary_start:
mr r5,r25 /* phys kernel start */
rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */
subf r4,r5,r4 /* memstart_addr - phys kernel start */
- li r5,0 /* no device tree */
+ lis r7,KERNELBASE@h
+ ori r7,r7,KERNELBASE@l
+ cmpw r20,r7 /* if kernstart_virt_addr != KERNELBASE, randomized */
+ beq 2f
+ li r4,0
+2: li r5,0 /* no device tree */
li r6,0 /* not boot cpu */
bl restore_to_as0
@@ -1115,6 +1123,54 @@ __secondary_hold_acknowledge:
#endif
/*
+ * Create a 64M tlb by address and entry
+ * r3 - entry
+ * r4 - virtual address
+ * r5/r6 - physical address
+ */
+_GLOBAL(create_kaslr_tlb_entry)
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */
+ mtspr SPRN_MAS0,r7 /* Write MAS0 */
+
+ lis r3,(MAS1_VALID|MAS1_IPROT)@h
+ ori r3,r3,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+ mtspr SPRN_MAS1,r3 /* Write MAS1 */
+
+ lis r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+ ori r3,r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+ and r3,r3,r4
+ ori r3,r3,MAS2_M_IF_NEEDED@l
+ mtspr SPRN_MAS2,r3 /* Write MAS2(EPN) */
+
+#ifdef CONFIG_PHYS_64BIT
+ ori r8,r6,(MAS3_SW|MAS3_SR|MAS3_SX)
+ mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */
+ mtspr SPRN_MAS7,r5
+#else
+ ori r8,r5,(MAS3_SW|MAS3_SR|MAS3_SX)
+ mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */
+#endif
+
+ tlbwe /* Write TLB */
+ isync
+ sync
+ blr
+
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+ mfmsr r7
+ rlwinm r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r7
+ rfi
+
+/*
* Create a tlb entry with the same effective and physical address as
* the tlb entry used by the current running code. But set the TS to 1.
* Then switch to the address space 1. It will return with the r3 set to
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index c8d1fa2e9d53..2462cd7c565c 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -127,15 +127,61 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
}
/*
+ * Watchpoint match range is always doubleword(8 bytes) aligned on
+ * powerpc. If the given range is crossing doubleword boundary, we
+ * need to increase the length such that next doubleword also get
+ * covered. Ex,
+ *
+ * address len = 6 bytes
+ * |=========.
+ * |------------v--|------v--------|
+ * | | | | | | | | | | | | | | | | |
+ * |---------------|---------------|
+ * <---8 bytes--->
+ *
+ * In this case, we should configure hw as:
+ * start_addr = address & ~HW_BREAKPOINT_ALIGN
+ * len = 16 bytes
+ *
+ * @start_addr and @end_addr are inclusive.
+ */
+static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
+{
+ u16 max_len = DABR_MAX_LEN;
+ u16 hw_len;
+ unsigned long start_addr, end_addr;
+
+ start_addr = hw->address & ~HW_BREAKPOINT_ALIGN;
+ end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN;
+ hw_len = end_addr - start_addr + 1;
+
+ if (dawr_enabled()) {
+ max_len = DAWR_MAX_LEN;
+ /* DAWR region can't cross 512 bytes boundary */
+ if ((start_addr >> 9) != (end_addr >> 9))
+ return -EINVAL;
+ } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ /* 8xx can setup a range without limitation */
+ max_len = U16_MAX;
+ }
+
+ if (hw_len > max_len)
+ return -EINVAL;
+
+ hw->hw_len = hw_len;
+ return 0;
+}
+
+/*
* Validate the arch-specific HW Breakpoint register settings
*/
int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw)
{
- int ret = -EINVAL, length_max;
+ int ret = -EINVAL;
- if (!bp)
+ if (!bp || !attr->bp_len)
return ret;
hw->type = HW_BRK_TYPE_TRANSLATE;
@@ -155,26 +201,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
hw->address = attr->bp_addr;
hw->len = attr->bp_len;
- /*
- * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
- * and breakpoint addresses are aligned to nearest double-word
- * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
- * 'symbolsize' should satisfy the check below.
- */
if (!ppc_breakpoint_available())
return -ENODEV;
- length_max = 8; /* DABR */
- if (dawr_enabled()) {
- length_max = 512 ; /* 64 doublewords */
- /* DAWR region can't cross 512 boundary */
- if ((attr->bp_addr >> 9) !=
- ((attr->bp_addr + attr->bp_len - 1) >> 9))
- return -EINVAL;
- }
- if (hw->len >
- (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
- return -EINVAL;
- return 0;
+
+ return hw_breakpoint_validate_len(hw);
}
/*
@@ -195,20 +225,80 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
tsk->thread.last_hit_ubp = NULL;
}
+static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ return ((info->address <= dar) && (dar - info->address < info->len));
+}
+
+static bool
+dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info)
+{
+ return ((dar <= info->address + info->len - 1) &&
+ (dar + size - 1 >= info->address));
+}
+
/*
* Handle debug exception notifications.
*/
+static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
+ struct arch_hw_breakpoint *info)
+{
+ unsigned int instr = 0;
+ int ret, type, size;
+ struct instruction_op op;
+ unsigned long addr = info->address;
+
+ if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
+ goto fail;
+
+ ret = analyse_instr(&op, regs, instr);
+ type = GETTYPE(op.type);
+ size = GETSIZE(op.type);
+
+ if (!ret && (type == LARX || type == STCX)) {
+ printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
+ " Breakpoint at 0x%lx will be disabled.\n", addr);
+ goto disable;
+ }
+
+ /*
+ * If it's extraneous event, we still need to emulate/single-
+ * step the instruction, but we don't generate an event.
+ */
+ if (size && !dar_range_overlaps(regs->dar, size, info))
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ current->thread.last_hit_ubp = bp;
+ regs->msr |= MSR_SE;
+ return false;
+ }
+
+ if (!emulate_step(regs, instr))
+ goto fail;
+
+ return true;
+
+fail:
+ /*
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
+ */
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+ "0x%lx will be disabled.", addr);
+
+disable:
+ perf_event_disable_inatomic(bp);
+ return false;
+}
+
int hw_breakpoint_handler(struct die_args *args)
{
int rc = NOTIFY_STOP;
struct perf_event *bp;
struct pt_regs *regs = args->regs;
-#ifndef CONFIG_PPC_8xx
- int stepped = 1;
- unsigned int instr;
-#endif
struct arch_hw_breakpoint *info;
- unsigned long dar = regs->dar;
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
@@ -240,43 +330,14 @@ int hw_breakpoint_handler(struct die_args *args)
goto out;
}
- /*
- * Verify if dar lies within the address range occupied by the symbol
- * being watched to filter extraneous exceptions. If it doesn't,
- * we still need to single-step the instruction, but we don't
- * generate an event.
- */
info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (!((bp->attr.bp_addr <= dar) &&
- (dar - bp->attr.bp_addr < bp->attr.bp_len)))
+ if (!dar_within_range(regs->dar, info))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-#ifndef CONFIG_PPC_8xx
- /* Do not emulate user-space instructions, instead single-step them */
- if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info))
goto out;
- }
-
- stepped = 0;
- instr = 0;
- if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
- stepped = emulate_step(regs, instr);
/*
- * emulate_step() could not execute it. We've failed in reliably
- * handling the hw-breakpoint. Unregister it and throw a warning
- * message to let the user know about it.
- */
- if (!stepped) {
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", info->address);
- perf_event_disable_inatomic(bp);
- goto out;
- }
-#endif
- /*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
*/
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index a36fd053c3db..422e31d2f5a2 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -77,6 +77,31 @@ void arch_cpu_idle(void)
int powersave_nap;
+#ifdef CONFIG_PPC_970_NAP
+void power4_idle(void)
+{
+ if (!cpu_has_feature(CPU_FTR_CAN_NAP))
+ return;
+
+ if (!powersave_nap)
+ return;
+
+ if (!prep_irq_for_idle())
+ return;
+
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ asm volatile("DSSALL ; sync" ::: "memory");
+
+ power4_idle_nap();
+
+ /*
+ * power4_idle_nap returns with interrupts enabled (soft and hard).
+ * to our caller with interrupts enabled (soft and hard). Our caller
+ * can cope with either interrupts disabled or enabled upon return.
+ */
+}
+#endif
+
#ifdef CONFIG_SYSCTL
/*
* Register the sysctl to set/clear powersave_nap.
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index d32751994a62..22f249b6f58d 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -15,7 +15,9 @@
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
#include <asm/cpuidle.h>
+#include <asm/thread_info.h> /* TLF_NAPPING */
+#ifdef CONFIG_PPC_P7_NAP
/*
* Desired PSSCR in r3
*
@@ -181,4 +183,22 @@ _GLOBAL(isa206_idle_insn_mayloss)
bne 2f
IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
+#endif
+#ifdef CONFIG_PPC_970_NAP
+_GLOBAL(power4_idle_nap)
+ LOAD_REG_IMMEDIATE(r7, MSR_KERNEL|MSR_EE|MSR_POW)
+ ld r9,PACA_THREAD_INFO(r13)
+ ld r8,TI_LOCAL_FLAGS(r9)
+ ori r8,r8,_TLF_NAPPING
+ std r8,TI_LOCAL_FLAGS(r9)
+ /*
+ * NAPPING bit is set, from this point onward power4_fixup_nap
+ * will cause exceptions to return to power4_idle_nap_return.
+ */
+1: sync
+ isync
+ mtmsrd r7
+ isync
+ b 1b
+#endif
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
deleted file mode 100644
index 33c625329078..000000000000
--- a/arch/powerpc/kernel/idle_power4.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains the power_save function for 970-family CPUs.
- */
-
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/irqflags.h>
-#include <asm/hw_irq.h>
-#include <asm/feature-fixups.h>
-
-#undef DEBUG
-
- .text
-
-_GLOBAL(power4_idle)
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
-
- /* This sequence is similar to prep_irq_for_idle() */
-
- /* Hard disable interrupts */
- mfmsr r7
- rldicl r0,r7,48,1
- rotldi r0,r0,16
- mtmsrd r0,1
-
- /* Check if something happened while soft-disabled */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- 2f
-
- /*
- * Soft-enable interrupts. This will make power4_fixup_nap return
- * to our caller with interrupts enabled (soft and hard). The caller
- * can cope with either interrupts disabled or enabled upon return.
- */
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Tell the tracer interrupts are on, because idle responds to them. */
- mflr r0
- std r0,16(r1)
- stdu r1,-128(r1)
- bl trace_hardirqs_on
- addi r1,r1,128
- ld r0,16(r1)
- mtlr r0
- mfmsr r7
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
- li r0,IRQS_ENABLED
- stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */
-BEGIN_FTR_SECTION
- DSSALL
- sync
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- ld r9, PACA_THREAD_INFO(r13)
- ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
- ori r8,r8,_TLF_NAPPING /* so when we take an exception */
- std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
- ori r7,r7,MSR_EE
- oris r7,r7,MSR_POW@h
-1: sync
- isync
- mtmsrd r7
- isync
- b 1b
-
-2: /* Return if an interrupt had happened while soft disabled */
- /* Set the HARD_DIS flag because interrupts are now hard disabled */
- ori r0,r0,PACA_IRQ_HARD_DIS
- stb r0,PACAIRQHAPPENED(r13)
- blr
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
new file mode 100644
index 000000000000..e34116255ced
--- /dev/null
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+
+#include <linux/ima.h>
+#include <asm/secure_boot.h>
+
+bool arch_ima_get_secureboot(void)
+{
+ return is_ppc_secureboot_enabled();
+}
+
+/*
+ * The "secure_rules" are enabled only on "secureboot" enabled systems.
+ * These rules verify the file signatures against known good values.
+ * The "appraise_type=imasig|modsig" option allows the known good signature
+ * to be stored as an xattr or as an appended signature.
+ *
+ * To avoid duplicate signature verification as much as possible, the IMA
+ * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE
+ * is not enabled.
+ */
+static const char *const secure_rules[] = {
+ "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG_FORCE
+ "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#endif
+ NULL
+};
+
+/*
+ * The "trusted_rules" are enabled only on "trustedboot" enabled systems.
+ * These rules add the kexec kernel image and kernel modules file hashes to
+ * the IMA measurement list.
+ */
+static const char *const trusted_rules[] = {
+ "measure func=KEXEC_KERNEL_CHECK",
+ "measure func=MODULE_CHECK",
+ NULL
+};
+
+/*
+ * The "secure_and_trusted_rules" contains rules for both the secure boot and
+ * trusted boot. The "template=ima-modsig" option includes the appended
+ * signature, when available, in the IMA measurement list.
+ */
+static const char *const secure_and_trusted_rules[] = {
+ "measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
+ "measure func=MODULE_CHECK template=ima-modsig",
+ "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG_FORCE
+ "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#endif
+ NULL
+};
+
+/*
+ * Returns the relevant IMA arch-specific policies based on the system secure
+ * boot state.
+ */
+const char *const *arch_get_ima_policy(void)
+{
+ if (is_ppc_secureboot_enabled()) {
+ if (IS_ENABLED(CONFIG_MODULE_SIG))
+ set_module_sig_enforced();
+
+ if (is_ppc_trustedboot_enabled())
+ return secure_and_trusted_rules;
+ else
+ return secure_rules;
+ } else if (is_ppc_trustedboot_enabled()) {
+ return trusted_rules;
+ }
+
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c
deleted file mode 100644
index 720e50e490b6..000000000000
--- a/arch/powerpc/kernel/ima_kexec.c
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2016 IBM Corporation
- *
- * Authors:
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
- */
-
-#include <linux/slab.h>
-#include <linux/kexec.h>
-#include <linux/of.h>
-#include <linux/memblock.h>
-#include <linux/libfdt.h>
-
-static int get_addr_size_cells(int *addr_cells, int *size_cells)
-{
- struct device_node *root;
-
- root = of_find_node_by_path("/");
- if (!root)
- return -EINVAL;
-
- *addr_cells = of_n_addr_cells(root);
- *size_cells = of_n_size_cells(root);
-
- of_node_put(root);
-
- return 0;
-}
-
-static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
- size_t *size)
-{
- int ret, addr_cells, size_cells;
-
- ret = get_addr_size_cells(&addr_cells, &size_cells);
- if (ret)
- return ret;
-
- if (len < 4 * (addr_cells + size_cells))
- return -ENOENT;
-
- *addr = of_read_number(prop, addr_cells);
- *size = of_read_number(prop + 4 * addr_cells, size_cells);
-
- return 0;
-}
-
-/**
- * ima_get_kexec_buffer - get IMA buffer from the previous kernel
- * @addr: On successful return, set to point to the buffer contents.
- * @size: On successful return, set to the buffer size.
- *
- * Return: 0 on success, negative errno on error.
- */
-int ima_get_kexec_buffer(void **addr, size_t *size)
-{
- int ret, len;
- unsigned long tmp_addr;
- size_t tmp_size;
- const void *prop;
-
- prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
- if (!prop)
- return -ENOENT;
-
- ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
- if (ret)
- return ret;
-
- *addr = __va(tmp_addr);
- *size = tmp_size;
-
- return 0;
-}
-
-/**
- * ima_free_kexec_buffer - free memory used by the IMA buffer
- */
-int ima_free_kexec_buffer(void)
-{
- int ret;
- unsigned long addr;
- size_t size;
- struct property *prop;
-
- prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
- if (!prop)
- return -ENOENT;
-
- ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
- if (ret)
- return ret;
-
- ret = of_remove_property(of_chosen, prop);
- if (ret)
- return ret;
-
- return memblock_free(addr, size);
-
-}
-
-/**
- * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
- *
- * The IMA measurement buffer is of no use to a subsequent kernel, so we always
- * remove it from the device tree.
- */
-void remove_ima_buffer(void *fdt, int chosen_node)
-{
- int ret, len;
- unsigned long addr;
- size_t size;
- const void *prop;
-
- prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
- if (!prop)
- return;
-
- ret = do_get_kexec_buffer(prop, len, &addr, &size);
- fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
- if (ret)
- return;
-
- ret = delete_fdt_mem_rsv(fdt, addr, size);
- if (!ret)
- pr_debug("Removed old IMA buffer reservation.\n");
-}
-
-#ifdef CONFIG_IMA_KEXEC
-/**
- * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
- *
- * Architectures should use this function to pass on the IMA buffer
- * information to the next kernel.
- *
- * Return: 0 on success, negative errno on error.
- */
-int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
- size_t size)
-{
- image->arch.ima_buffer_addr = load_addr;
- image->arch.ima_buffer_size = size;
-
- return 0;
-}
-
-static int write_number(void *p, u64 value, int cells)
-{
- if (cells == 1) {
- u32 tmp;
-
- if (value > U32_MAX)
- return -EINVAL;
-
- tmp = cpu_to_be32(value);
- memcpy(p, &tmp, sizeof(tmp));
- } else if (cells == 2) {
- u64 tmp;
-
- tmp = cpu_to_be64(value);
- memcpy(p, &tmp, sizeof(tmp));
- } else
- return -EINVAL;
-
- return 0;
-}
-
-/**
- * setup_ima_buffer - add IMA buffer information to the fdt
- * @image: kexec image being loaded.
- * @fdt: Flattened device tree for the next kernel.
- * @chosen_node: Offset to the chosen node.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
-{
- int ret, addr_cells, size_cells, entry_size;
- u8 value[16];
-
- remove_ima_buffer(fdt, chosen_node);
- if (!image->arch.ima_buffer_size)
- return 0;
-
- ret = get_addr_size_cells(&addr_cells, &size_cells);
- if (ret)
- return ret;
-
- entry_size = 4 * (addr_cells + size_cells);
-
- if (entry_size > sizeof(value))
- return -EINVAL;
-
- ret = write_number(value, image->arch.ima_buffer_addr, addr_cells);
- if (ret)
- return ret;
-
- ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size,
- size_cells);
- if (ret)
- return ret;
-
- ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value,
- entry_size);
- if (ret < 0)
- return -EINVAL;
-
- ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr,
- image->arch.ima_buffer_size);
- if (ret)
- return -EINVAL;
-
- pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n",
- image->arch.ima_buffer_addr, image->arch.ima_buffer_size);
-
- return 0;
-}
-#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index fbd2d0007c52..0276bc8c8969 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -149,8 +149,8 @@ static const struct ppc_pci_io iowa_pci_io = {
};
#ifdef CONFIG_PPC_INDIRECT_MMIO
-static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller)
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
void __iomem *res = __ioremap_caller(addr, size, prot, caller);
@@ -163,20 +163,17 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
}
return res;
}
-#else /* CONFIG_PPC_INDIRECT_MMIO */
-#define iowa_ioremap NULL
#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+bool io_workaround_inited;
+
/* Enable IO workaround */
static void io_workaround_init(void)
{
- static int io_workaround_inited;
-
if (io_workaround_inited)
return;
ppc_pci_io = iowa_pci_io;
- ppc_md.ioremap = iowa_ioremap;
- io_workaround_inited = 1;
+ io_workaround_inited = true;
}
/* Register new bus to support workaround */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0a67ce9f827e..9704f3f76e63 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -633,11 +633,54 @@ static void iommu_table_clear(struct iommu_table *tbl)
#endif
}
+static void iommu_table_reserve_pages(struct iommu_table *tbl,
+ unsigned long res_start, unsigned long res_end)
+{
+ int i;
+
+ WARN_ON_ONCE(res_end < res_start);
+ /*
+ * Reserve page 0 so it will not be used for any mappings.
+ * This avoids buggy drivers that consider page 0 to be invalid
+ * to crash the machine or even lose data.
+ */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ tbl->it_reserved_start = res_start;
+ tbl->it_reserved_end = res_end;
+
+ /* Check if res_start..res_end isn't empty and overlaps the table */
+ if (res_start && res_end &&
+ (tbl->it_offset + tbl->it_size < res_start ||
+ res_end < tbl->it_offset))
+ return;
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ set_bit(i - tbl->it_offset, tbl->it_map);
+}
+
+static void iommu_table_release_pages(struct iommu_table *tbl)
+{
+ int i;
+
+ /*
+ * In case we have reserved the first bit, we should not emit
+ * the warning below.
+ */
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ clear_bit(i - tbl->it_offset, tbl->it_map);
+}
+
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
+ unsigned long res_start, unsigned long res_end)
{
unsigned long sz;
static int welcomed = 0;
@@ -656,13 +699,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
tbl->it_map = page_address(page);
memset(tbl->it_map, 0, sz);
- /*
- * Reserve page 0 so it will not be used for any mappings.
- * This avoids buggy drivers that consider page 0 to be invalid
- * to crash the machine or even lose data.
- */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, res_start, res_end);
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
@@ -714,12 +751,7 @@ static void iommu_table_free(struct kref *kref)
return;
}
- /*
- * In case we have reserved the first bit, we should not emit
- * the warning below.
- */
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
@@ -981,29 +1013,32 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
- ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
-
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
&size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
- /* if (unlikely(ret))
- pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
- __func__, hwaddr, entry << tbl->it_page_shift,
- hwaddr, ret); */
-
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
+
+void iommu_tce_kill(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages, false);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_kill);
int iommu_take_ownership(struct iommu_table *tbl)
{
@@ -1017,22 +1052,21 @@ int iommu_take_ownership(struct iommu_table *tbl)
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
- if (!tbl->it_ops->exchange)
+ if (!tbl->it_ops->xchg_no_kill)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock);
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
ret = -EBUSY;
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
} else {
memset(tbl->it_map, 0xff, sz);
}
@@ -1055,9 +1089,8 @@ void iommu_release_ownership(struct iommu_table *tbl)
memset(tbl->it_map, 0, sz);
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
for (i = 0; i < tbl->nr_pools; i++)
spin_unlock(&tbl->pools[i].lock);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..5c9b11878555 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -50,6 +50,7 @@
#include <linux/debugfs.h>
#include <linux/of.h>
#include <linux/of_irq.h>
+#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <asm/io.h>
@@ -619,8 +620,6 @@ void __do_irq(struct pt_regs *regs)
trace_irq_entry(regs);
- check_stack_overflow();
-
/*
* Query the platform PIC for the interrupt & ack it.
*
@@ -652,6 +651,8 @@ void do_IRQ(struct pt_regs *regs)
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
+ check_stack_overflow();
+
/* Already there ? */
if (unlikely(cursp == irqsp || cursp == sirqsp)) {
__do_irq(regs);
@@ -664,8 +665,29 @@ void do_IRQ(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+static void *__init alloc_vm_stack(void)
+{
+ return __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START,
+ VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL,
+ 0, NUMA_NO_NODE, (void*)_RET_IP_);
+}
+
+static void __init vmap_irqstack_init(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ softirq_ctx[i] = alloc_vm_stack();
+ hardirq_ctx[i] = alloc_vm_stack();
+ }
+}
+
+
void __init init_IRQ(void)
{
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ vmap_irqstack_init();
+
if (ppc_md.init_IRQ)
ppc_md.init_IRQ();
}
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
deleted file mode 100644
index 83cf7b852876..000000000000
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ /dev/null
@@ -1,660 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Load ELF vmlinux file for the kexec_file_load syscall.
- *
- * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
- * Copyright (C) 2004 IBM Corp.
- * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
- * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
- * Copyright (C) 2016 IBM Corporation
- *
- * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
- * Heavily modified for the kernel by
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- */
-
-#define pr_fmt(fmt) "kexec_elf: " fmt
-
-#include <linux/elf.h>
-#include <linux/kexec.h>
-#include <linux/libfdt.h>
-#include <linux/module.h>
-#include <linux/of_fdt.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-#define PURGATORY_STACK_SIZE (16 * 1024)
-
-#define elf_addr_to_cpu elf64_to_cpu
-
-#ifndef Elf_Rel
-#define Elf_Rel Elf64_Rel
-#endif /* Elf_Rel */
-
-struct elf_info {
- /*
- * Where the ELF binary contents are kept.
- * Memory managed by the user of the struct.
- */
- const char *buffer;
-
- const struct elfhdr *ehdr;
- const struct elf_phdr *proghdrs;
- struct elf_shdr *sechdrs;
-};
-
-static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
-{
- return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-}
-
-static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le64_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be64_to_cpu(value);
-
- return value;
-}
-
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le16_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be16_to_cpu(value);
-
- return value;
-}
-
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le32_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be32_to_cpu(value);
-
- return value;
-}
-
-/**
- * elf_is_ehdr_sane - check that it is safe to use the ELF header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
-{
- if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
- pr_debug("Bad program header size.\n");
- return false;
- } else if (ehdr->e_shnum > 0 &&
- ehdr->e_shentsize != sizeof(struct elf_shdr)) {
- pr_debug("Bad section header size.\n");
- return false;
- } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
- ehdr->e_version != EV_CURRENT) {
- pr_debug("Unknown ELF version.\n");
- return false;
- }
-
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- size_t phdr_size;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- /* Sanity check the program header table location. */
- if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
- pr_debug("Program headers at invalid location.\n");
- return false;
- } else if (ehdr->e_phoff + phdr_size > buf_len) {
- pr_debug("Program headers truncated.\n");
- return false;
- }
- }
-
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- size_t shdr_size;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
-
- /* Sanity check the section header table location. */
- if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
- pr_debug("Section headers at invalid location.\n");
- return false;
- } else if (ehdr->e_shoff + shdr_size > buf_len) {
- pr_debug("Section headers truncated.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
-{
- struct elfhdr *buf_ehdr;
-
- if (len < sizeof(*buf_ehdr)) {
- pr_debug("Buffer is too small to hold ELF header.\n");
- return -ENOEXEC;
- }
-
- memset(ehdr, 0, sizeof(*ehdr));
- memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
- if (!elf_is_elf_file(ehdr)) {
- pr_debug("No ELF header magic.\n");
- return -ENOEXEC;
- }
-
- if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
- pr_debug("Not a supported ELF class.\n");
- return -ENOEXEC;
- } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
- ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
- pr_debug("Not a supported ELF data format.\n");
- return -ENOEXEC;
- }
-
- buf_ehdr = (struct elfhdr *) buf;
- if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
- pr_debug("Bad ELF header size.\n");
- return -ENOEXEC;
- }
-
- ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type);
- ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
- ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version);
- ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
- ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
- ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
- ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
- ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
- ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
- ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
- ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
- ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
-
- return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_is_phdr_sane - check that it is safe to use the program header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
-{
-
- if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
- pr_debug("ELF segment location wraps around.\n");
- return false;
- } else if (phdr->p_offset + phdr->p_filesz > buf_len) {
- pr_debug("ELF segment not in file.\n");
- return false;
- } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
- pr_debug("ELF segment address wraps around.\n");
- return false;
- }
-
- return true;
-}
-
-static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- /* Override the const in proghdrs, we are the ones doing the loading. */
- struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
- const char *pbuf;
- struct elf_phdr *buf_phdr;
-
- pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
- buf_phdr = (struct elf_phdr *) pbuf;
-
- phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
- phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
- phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
- phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
- phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
- phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
- phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
-
- return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_phdrs - read the program headers from the buffer
- *
- * This function assumes that the program header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_phdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t phdr_size, i;
- const struct elfhdr *ehdr = elf_info->ehdr;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
- if (!elf_info->proghdrs)
- return -ENOMEM;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- int ret;
-
- ret = elf_read_phdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->proghdrs);
- elf_info->proghdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
- bool size_ok;
-
- /* SHT_NULL headers have undefined values, so we can't check them. */
- if (shdr->sh_type == SHT_NULL)
- return true;
-
- /* Now verify sh_entsize */
- switch (shdr->sh_type) {
- case SHT_SYMTAB:
- size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
- break;
- case SHT_RELA:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
- break;
- case SHT_DYNAMIC:
- size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
- break;
- case SHT_REL:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
- break;
- case SHT_NOTE:
- case SHT_PROGBITS:
- case SHT_HASH:
- case SHT_NOBITS:
- default:
- /*
- * This is a section whose entsize requirements
- * I don't care about. If I don't know about
- * the section I can't care about it's entsize
- * requirements.
- */
- size_ok = true;
- break;
- }
-
- if (!size_ok) {
- pr_debug("ELF section with wrong entry size.\n");
- return false;
- } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
- pr_debug("ELF section address wraps around.\n");
- return false;
- }
-
- if (shdr->sh_type != SHT_NOBITS) {
- if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
- pr_debug("ELF section location wraps around.\n");
- return false;
- } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
- pr_debug("ELF section not in file.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- struct elf_shdr *shdr = &elf_info->sechdrs[idx];
- const struct elfhdr *ehdr = elf_info->ehdr;
- const char *sbuf;
- struct elf_shdr *buf_shdr;
-
- sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
- buf_shdr = (struct elf_shdr *) sbuf;
-
- shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name);
- shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type);
- shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
- shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
- shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link);
- shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
- shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
- shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
- shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
- return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t shdr_size, i;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
- elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
- if (!elf_info->sechdrs)
- return -ENOMEM;
-
- for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
- int ret;
-
- ret = elf_read_shdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->sechdrs);
- elf_info->sechdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
- * @buf: Buffer to read ELF file from.
- * @len: Size of @buf.
- * @ehdr: Pointer to existing struct which will be populated.
- * @elf_info: Pointer to existing struct which will be populated.
- *
- * This function allows reading ELF files with different byte order than
- * the kernel, byte-swapping the fields as needed.
- *
- * Return:
- * On success returns 0, and the caller should call elf_free_info(elf_info) to
- * free the memory allocated for the section and program headers.
- */
-int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int ret;
-
- ret = elf_read_ehdr(buf, len, ehdr);
- if (ret)
- return ret;
-
- elf_info->buffer = buf;
- elf_info->ehdr = ehdr;
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- ret = elf_read_phdrs(buf, len, elf_info);
- if (ret)
- return ret;
- }
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- ret = elf_read_shdrs(buf, len, elf_info);
- if (ret) {
- kfree(elf_info->proghdrs);
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_free_info - free memory allocated by elf_read_from_buffer
- */
-void elf_free_info(struct elf_info *elf_info)
-{
- kfree(elf_info->proghdrs);
- kfree(elf_info->sechdrs);
- memset(elf_info, 0, sizeof(*elf_info));
-}
-/**
- * build_elf_exec_info - read ELF executable and check that we can use it
- */
-static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int i;
- int ret;
-
- ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
- if (ret)
- return ret;
-
- /* Big endian vmlinux has type ET_DYN. */
- if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
- pr_err("Not an ELF executable.\n");
- goto error;
- } else if (!elf_info->proghdrs) {
- pr_err("No ELF program header.\n");
- goto error;
- }
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- /*
- * Kexec does not support loading interpreters.
- * In addition this check keeps us from attempting
- * to kexec ordinay executables.
- */
- if (elf_info->proghdrs[i].p_type == PT_INTERP) {
- pr_err("Requires an ELF interpreter.\n");
- goto error;
- }
- }
-
- return 0;
-error:
- elf_free_info(elf_info);
- return -ENOEXEC;
-}
-
-static int elf64_probe(const char *buf, unsigned long len)
-{
- struct elfhdr ehdr;
- struct elf_info elf_info;
- int ret;
-
- ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
- if (ret)
- return ret;
-
- elf_free_info(&elf_info);
-
- return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_exec_load - load ELF executable image
- * @lowest_load_addr: On return, will be the address where the first PT_LOAD
- * section will be loaded in memory.
- *
- * Return:
- * 0 on success, negative value on failure.
- */
-static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
- struct elf_info *elf_info,
- unsigned long *lowest_load_addr)
-{
- unsigned long base = 0, lowest_addr = UINT_MAX;
- int ret;
- size_t i;
- struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
- .top_down = false };
-
- /* Read in the PT_LOAD segments. */
- for (i = 0; i < ehdr->e_phnum; i++) {
- unsigned long load_addr;
- size_t size;
- const struct elf_phdr *phdr;
-
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- size = phdr->p_filesz;
- if (size > phdr->p_memsz)
- size = phdr->p_memsz;
-
- kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
- kbuf.bufsz = size;
- kbuf.memsz = phdr->p_memsz;
- kbuf.buf_align = phdr->p_align;
- kbuf.buf_min = phdr->p_paddr + base;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- load_addr = kbuf.mem;
-
- if (load_addr < lowest_addr)
- lowest_addr = load_addr;
- }
-
- /* Update entry point to reflect new load address. */
- ehdr->e_entry += base;
-
- *lowest_load_addr = lowest_addr;
- ret = 0;
- out:
- return ret;
-}
-
-static void *elf64_load(struct kimage *image, char *kernel_buf,
- unsigned long kernel_len, char *initrd,
- unsigned long initrd_len, char *cmdline,
- unsigned long cmdline_len)
-{
- int ret;
- unsigned int fdt_size;
- unsigned long kernel_load_addr;
- unsigned long initrd_load_addr = 0, fdt_load_addr;
- void *fdt;
- const void *slave_code;
- struct elfhdr ehdr;
- struct elf_info elf_info;
- struct kexec_buf kbuf = { .image = image, .buf_min = 0,
- .buf_max = ppc64_rma_size };
- struct kexec_buf pbuf = { .image = image, .buf_min = 0,
- .buf_max = ppc64_rma_size, .top_down = true,
- .mem = KEXEC_BUF_MEM_UNKNOWN };
-
- ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
- if (ret)
- goto out;
-
- ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
- if (ret)
- goto out;
-
- pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
-
- ret = kexec_load_purgatory(image, &pbuf);
- if (ret) {
- pr_err("Loading purgatory failed.\n");
- goto out;
- }
-
- pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
-
- if (initrd != NULL) {
- kbuf.buffer = initrd;
- kbuf.bufsz = kbuf.memsz = initrd_len;
- kbuf.buf_align = PAGE_SIZE;
- kbuf.top_down = false;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- initrd_load_addr = kbuf.mem;
-
- pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
- }
-
- fdt_size = fdt_totalsize(initial_boot_params) * 2;
- fdt = kmalloc(fdt_size, GFP_KERNEL);
- if (!fdt) {
- pr_err("Not enough memory for the device tree.\n");
- ret = -ENOMEM;
- goto out;
- }
- ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
- if (ret < 0) {
- pr_err("Error setting up the new device tree.\n");
- ret = -EINVAL;
- goto out;
- }
-
- ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
- if (ret)
- goto out;
-
- fdt_pack(fdt);
-
- kbuf.buffer = fdt;
- kbuf.bufsz = kbuf.memsz = fdt_size;
- kbuf.buf_align = PAGE_SIZE;
- kbuf.top_down = true;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- fdt_load_addr = kbuf.mem;
-
- pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
-
- slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
- ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
- fdt_load_addr);
- if (ret)
- pr_err("Error setting up the purgatory.\n");
-
-out:
- elf_free_info(&elf_info);
-
- /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
- return ret ? ERR_PTR(ret) : fdt;
-}
-
-const struct kexec_file_ops kexec_elf64_ops = {
- .probe = elf64_probe,
- .load = elf64_load,
-};
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index b7b3a5e4e224..617eba82531c 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -64,16 +64,17 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
-char kvm_tmp[1024 * 1024];
+extern char kvm_tmp[];
+extern char kvm_tmp_end[];
static int kvm_tmp_index;
-static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
{
*inst = new_inst;
flush_icache_range((ulong)inst, (ulong)inst + 4);
}
-static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -82,7 +83,7 @@ static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -91,12 +92,12 @@ static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
}
-static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
@@ -105,17 +106,17 @@ static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
}
-static void kvm_patch_ins_nop(u32 *inst)
+static void __init kvm_patch_ins_nop(u32 *inst)
{
kvm_patch_ins(inst, KVM_INST_NOP);
}
-static void kvm_patch_ins_b(u32 *inst, int addr)
+static void __init kvm_patch_ins_b(u32 *inst, int addr)
{
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
/* On relocatable kernels interrupts handlers and our code
@@ -128,11 +129,11 @@ static void kvm_patch_ins_b(u32 *inst, int addr)
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
}
-static u32 *kvm_alloc(int len)
+static u32 * __init kvm_alloc(int len)
{
u32 *p;
- if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
kvm_tmp_index, len);
kvm_patching_worked = false;
@@ -151,7 +152,7 @@ extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
extern u32 kvm_emulate_mtmsrd_len;
extern u32 kvm_emulate_mtmsrd[];
-static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -204,7 +205,7 @@ extern u32 kvm_emulate_mtmsr_orig_ins_offs;
extern u32 kvm_emulate_mtmsr_len;
extern u32 kvm_emulate_mtmsr[];
-static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -265,7 +266,7 @@ extern u32 kvm_emulate_wrtee_orig_ins_offs;
extern u32 kvm_emulate_wrtee_len;
extern u32 kvm_emulate_wrtee[];
-static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
@@ -322,7 +323,7 @@ extern u32 kvm_emulate_wrteei_0_branch_offs;
extern u32 kvm_emulate_wrteei_0_len;
extern u32 kvm_emulate_wrteei_0[];
-static void kvm_patch_ins_wrteei_0(u32 *inst)
+static void __init kvm_patch_ins_wrteei_0(u32 *inst)
{
u32 *p;
int distance_start;
@@ -363,7 +364,7 @@ extern u32 kvm_emulate_mtsrin_orig_ins_offs;
extern u32 kvm_emulate_mtsrin_len;
extern u32 kvm_emulate_mtsrin[];
-static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
{
u32 *p;
int distance_start;
@@ -399,7 +400,7 @@ static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
#endif
-static void kvm_map_magic_page(void *data)
+static void __init kvm_map_magic_page(void *data)
{
u32 *features = data;
@@ -414,7 +415,7 @@ static void kvm_map_magic_page(void *data)
*features = out[0];
}
-static void kvm_check_ins(u32 *inst, u32 features)
+static void __init kvm_check_ins(u32 *inst, u32 features)
{
u32 _inst = *inst;
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@@ -658,7 +659,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
extern u32 kvm_template_start[];
extern u32 kvm_template_end[];
-static void kvm_use_magic_page(void)
+static void __init kvm_use_magic_page(void)
{
u32 *p;
u32 *start, *end;
@@ -699,25 +700,13 @@ static void kvm_use_magic_page(void)
kvm_patching_worked ? "worked" : "failed");
}
-static __init void kvm_free_tmp(void)
-{
- /*
- * Inform kmemleak about the hole in the .bss section since the
- * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
- */
- kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
- ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
- free_reserved_area(&kvm_tmp[kvm_tmp_index],
- &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
-}
-
static int __init kvm_guest_init(void)
{
if (!kvm_para_available())
- goto free_tmp;
+ return 0;
if (!epapr_paravirt_enabled)
- goto free_tmp;
+ return 0;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
kvm_use_magic_page();
@@ -727,9 +716,6 @@ static int __init kvm_guest_init(void)
powersave_nap = 1;
#endif
-free_tmp:
- kvm_free_tmp();
-
return 0;
}
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index eb2568f583ae..7af6f8b50c5d 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -192,6 +192,8 @@ kvm_emulate_mtmsr_orig_ins_offs:
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+#ifdef CONFIG_BOOKE
+
/* also used for wrteei 1 */
.global kvm_emulate_wrtee
kvm_emulate_wrtee:
@@ -285,6 +287,10 @@ kvm_emulate_wrteei_0_branch_offs:
kvm_emulate_wrteei_0_len:
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+#endif /* CONFIG_BOOKE */
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@@ -334,5 +340,15 @@ kvm_emulate_mtsrin_orig_ins_offs:
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+ .balign 4
+ .global kvm_tmp
+kvm_tmp:
+ .space (64 * 1024)
+
+.global kvm_tmp_end
+kvm_tmp_end:
+
.global kvm_template_end
kvm_template_end:
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 7cea5978f21f..f061e06e9f51 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -479,8 +479,10 @@ static void __init fixup_port_irq(int index,
port->irq = virq;
#ifdef CONFIG_SERIAL_8250_FSL
- if (of_device_is_compatible(np, "fsl,ns16550"))
+ if (of_device_is_compatible(np, "fsl,ns16550")) {
port->handle_irq = fsl8250_handle_irq;
+ port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
+ }
#endif
}
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
deleted file mode 100644
index c4ed328a7b96..000000000000
--- a/arch/powerpc/kernel/machine_kexec.c
+++ /dev/null
@@ -1,279 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Code to handle transition of Linux booting another kernel.
- *
- * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * Copyright (C) 2005 IBM Corporation.
- */
-
-#include <linux/kexec.h>
-#include <linux/reboot.h>
-#include <linux/threads.h>
-#include <linux/memblock.h>
-#include <linux/of.h>
-#include <linux/irq.h>
-#include <linux/ftrace.h>
-
-#include <asm/kdump.h>
-#include <asm/machdep.h>
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/sections.h>
-
-void machine_kexec_mask_interrupts(void) {
- unsigned int i;
- struct irq_desc *desc;
-
- for_each_irq_desc(i, desc) {
- struct irq_chip *chip;
-
- chip = irq_desc_get_chip(desc);
- if (!chip)
- continue;
-
- if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
- chip->irq_eoi(&desc->irq_data);
-
- if (chip->irq_mask)
- chip->irq_mask(&desc->irq_data);
-
- if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
- chip->irq_disable(&desc->irq_data);
- }
-}
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
- default_machine_crash_shutdown(regs);
-}
-
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
- if (ppc_md.machine_kexec_prepare)
- return ppc_md.machine_kexec_prepare(image);
- else
- return default_machine_kexec_prepare(image);
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-}
-
-void arch_crash_save_vmcoreinfo(void)
-{
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- VMCOREINFO_SYMBOL(node_data);
- VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
- VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
- VMCOREINFO_SYMBOL(vmemmap_list);
- VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
- VMCOREINFO_SYMBOL(mmu_psize_defs);
- VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
- VMCOREINFO_OFFSET(vmemmap_backing, list);
- VMCOREINFO_OFFSET(vmemmap_backing, phys);
- VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
- VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
- VMCOREINFO_OFFSET(mmu_psize_def, shift);
-#endif
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-void machine_kexec(struct kimage *image)
-{
- int save_ftrace_enabled;
-
- save_ftrace_enabled = __ftrace_enabled_save();
- this_cpu_disable_ftrace();
-
- if (ppc_md.machine_kexec)
- ppc_md.machine_kexec(image);
- else
- default_machine_kexec(image);
-
- this_cpu_enable_ftrace();
- __ftrace_enabled_restore(save_ftrace_enabled);
-
- /* Fall back to normal restart if we're still alive. */
- machine_restart(NULL);
- for(;;);
-}
-
-void __init reserve_crashkernel(void)
-{
- unsigned long long crash_size, crash_base;
- int ret;
-
- /* use common parsing */
- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
- &crash_size, &crash_base);
- if (ret == 0 && crash_size > 0) {
- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- }
-
- if (crashk_res.end == crashk_res.start) {
- crashk_res.start = crashk_res.end = 0;
- return;
- }
-
- /* We might have got these values via the command line or the
- * device tree, either way sanitise them now. */
-
- crash_size = resource_size(&crashk_res);
-
-#ifndef CONFIG_NONSTATIC_KERNEL
- if (crashk_res.start != KDUMP_KERNELBASE)
- printk("Crash kernel location must be 0x%x\n",
- KDUMP_KERNELBASE);
-
- crashk_res.start = KDUMP_KERNELBASE;
-#else
- if (!crashk_res.start) {
-#ifdef CONFIG_PPC64
- /*
- * On 64bit we split the RMO in half but cap it at half of
- * a small SLB (128MB) since the crash kernel needs to place
- * itself and some stacks to be in the first segment.
- */
- crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
-#else
- crashk_res.start = KDUMP_KERNELBASE;
-#endif
- }
-
- crash_base = PAGE_ALIGN(crashk_res.start);
- if (crash_base != crashk_res.start) {
- printk("Crash kernel base must be aligned to 0x%lx\n",
- PAGE_SIZE);
- crashk_res.start = crash_base;
- }
-
-#endif
- crash_size = PAGE_ALIGN(crash_size);
- crashk_res.end = crashk_res.start + crash_size - 1;
-
- /* The crash region must not overlap the current kernel */
- if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
- printk(KERN_WARNING
- "Crash kernel can not overlap current kernel\n");
- crashk_res.start = crashk_res.end = 0;
- return;
- }
-
- /* Crash kernel trumps memory limit */
- if (memory_limit && memory_limit <= crashk_res.end) {
- memory_limit = crashk_res.end + 1;
- printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
- memory_limit);
- }
-
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
- "for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crashk_res.start >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
-
- if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
- memblock_reserve(crashk_res.start, crash_size)) {
- pr_err("Failed to reserve memory for crashkernel!\n");
- crashk_res.start = crashk_res.end = 0;
- return;
- }
-}
-
-int overlaps_crashkernel(unsigned long start, unsigned long size)
-{
- return (start + size) > crashk_res.start && start <= crashk_res.end;
-}
-
-/* Values we need to export to the second kernel via the device tree. */
-static phys_addr_t kernel_end;
-static phys_addr_t crashk_base;
-static phys_addr_t crashk_size;
-static unsigned long long mem_limit;
-
-static struct property kernel_end_prop = {
- .name = "linux,kernel-end",
- .length = sizeof(phys_addr_t),
- .value = &kernel_end,
-};
-
-static struct property crashk_base_prop = {
- .name = "linux,crashkernel-base",
- .length = sizeof(phys_addr_t),
- .value = &crashk_base
-};
-
-static struct property crashk_size_prop = {
- .name = "linux,crashkernel-size",
- .length = sizeof(phys_addr_t),
- .value = &crashk_size,
-};
-
-static struct property memory_limit_prop = {
- .name = "linux,memory-limit",
- .length = sizeof(unsigned long long),
- .value = &mem_limit,
-};
-
-#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG)
-
-static void __init export_crashk_values(struct device_node *node)
-{
- /* There might be existing crash kernel properties, but we can't
- * be sure what's in them, so remove them. */
- of_remove_property(node, of_find_property(node,
- "linux,crashkernel-base", NULL));
- of_remove_property(node, of_find_property(node,
- "linux,crashkernel-size", NULL));
-
- if (crashk_res.start != 0) {
- crashk_base = cpu_to_be_ulong(crashk_res.start),
- of_add_property(node, &crashk_base_prop);
- crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
- of_add_property(node, &crashk_size_prop);
- }
-
- /*
- * memory_limit is required by the kexec-tools to limit the
- * crash regions to the actual memory used.
- */
- mem_limit = cpu_to_be_ulong(memory_limit);
- of_update_property(node, &memory_limit_prop);
-}
-
-static int __init kexec_setup(void)
-{
- struct device_node *node;
-
- node = of_find_node_by_path("/chosen");
- if (!node)
- return -ENOENT;
-
- /* remove any stale properties so ours can be found */
- of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
-
- /* information needed by userspace when using default_machine_kexec */
- kernel_end = cpu_to_be_ulong(__pa(_end));
- of_add_property(node, &kernel_end_prop);
-
- export_crashk_values(node);
-
- of_node_put(node);
- return 0;
-}
-late_initcall(kexec_setup);
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
deleted file mode 100644
index bf9f1f906d64..000000000000
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * PPC32 code to handle Linux booting another kernel.
- *
- * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * Copyright (C) 2005 IBM Corporation.
- */
-
-#include <linux/kexec.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <asm/cacheflush.h>
-#include <asm/hw_irq.h>
-#include <asm/io.h>
-
-typedef void (*relocate_new_kernel_t)(
- unsigned long indirection_page,
- unsigned long reboot_code_buffer,
- unsigned long start_address) __noreturn;
-
-/*
- * This is a generic machine_kexec function suitable at least for
- * non-OpenFirmware embedded platforms.
- * It merely copies the image relocation code to the control page and
- * jumps to it.
- * A platform specific function may just call this one.
- */
-void default_machine_kexec(struct kimage *image)
-{
- extern const unsigned int relocate_new_kernel_size;
- unsigned long page_list;
- unsigned long reboot_code_buffer, reboot_code_buffer_phys;
- relocate_new_kernel_t rnk;
-
- /* Interrupts aren't acceptable while we reboot */
- local_irq_disable();
-
- /* mask each interrupt so we are in a more sane state for the
- * kexec kernel */
- machine_kexec_mask_interrupts();
-
- page_list = image->head;
-
- /* we need both effective and real address here */
- reboot_code_buffer =
- (unsigned long)page_address(image->control_code_page);
- reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
-
- /* copy our kernel relocation code to the control code page */
- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
- relocate_new_kernel_size);
-
- flush_icache_range(reboot_code_buffer,
- reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
- printk(KERN_INFO "Bye!\n");
-
- if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
- relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
-
- /* now call it */
- rnk = (relocate_new_kernel_t) reboot_code_buffer;
- (*rnk)(page_list, reboot_code_buffer_phys, image->start);
-}
-
-int default_machine_kexec_prepare(struct kimage *image)
-{
- return 0;
-}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
deleted file mode 100644
index 18481b0e2788..000000000000
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ /dev/null
@@ -1,408 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * PPC64 code to handle Linux booting another kernel.
- *
- * Copyright (C) 2004-2005, IBM Corp.
- *
- * Created by: Milton D Miller II
- */
-
-
-#include <linux/kexec.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-#include <linux/init_task.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/cpu.h>
-#include <linux/hardirq.h>
-
-#include <asm/page.h>
-#include <asm/current.h>
-#include <asm/machdep.h>
-#include <asm/cacheflush.h>
-#include <asm/firmware.h>
-#include <asm/paca.h>
-#include <asm/mmu.h>
-#include <asm/sections.h> /* _end */
-#include <asm/prom.h>
-#include <asm/smp.h>
-#include <asm/hw_breakpoint.h>
-#include <asm/asm-prototypes.h>
-
-int default_machine_kexec_prepare(struct kimage *image)
-{
- int i;
- unsigned long begin, end; /* limits of segment */
- unsigned long low, high; /* limits of blocked memory range */
- struct device_node *node;
- const unsigned long *basep;
- const unsigned int *sizep;
-
- /*
- * Since we use the kernel fault handlers and paging code to
- * handle the virtual mode, we must make sure no destination
- * overlaps kernel static data or bss.
- */
- for (i = 0; i < image->nr_segments; i++)
- if (image->segment[i].mem < __pa(_end))
- return -ETXTBSY;
-
- /* We also should not overwrite the tce tables */
- for_each_node_by_type(node, "pci") {
- basep = of_get_property(node, "linux,tce-base", NULL);
- sizep = of_get_property(node, "linux,tce-size", NULL);
- if (basep == NULL || sizep == NULL)
- continue;
-
- low = *basep;
- high = low + (*sizep);
-
- for (i = 0; i < image->nr_segments; i++) {
- begin = image->segment[i].mem;
- end = begin + image->segment[i].memsz;
-
- if ((begin < high) && (end > low))
- return -ETXTBSY;
- }
- }
-
- return 0;
-}
-
-static void copy_segments(unsigned long ind)
-{
- unsigned long entry;
- unsigned long *ptr;
- void *dest;
- void *addr;
-
- /*
- * We rely on kexec_load to create a lists that properly
- * initializes these pointers before they are used.
- * We will still crash if the list is wrong, but at least
- * the compiler will be quiet.
- */
- ptr = NULL;
- dest = NULL;
-
- for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
- addr = __va(entry & PAGE_MASK);
-
- switch (entry & IND_FLAGS) {
- case IND_DESTINATION:
- dest = addr;
- break;
- case IND_INDIRECTION:
- ptr = addr;
- break;
- case IND_SOURCE:
- copy_page(dest, addr);
- dest += PAGE_SIZE;
- }
- }
-}
-
-void kexec_copy_flush(struct kimage *image)
-{
- long i, nr_segments = image->nr_segments;
- struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
-
- /* save the ranges on the stack to efficiently flush the icache */
- memcpy(ranges, image->segment, sizeof(ranges));
-
- /*
- * After this call we may not use anything allocated in dynamic
- * memory, including *image.
- *
- * Only globals and the stack are allowed.
- */
- copy_segments(image->head);
-
- /*
- * we need to clear the icache for all dest pages sometime,
- * including ones that were in place on the original copy
- */
- for (i = 0; i < nr_segments; i++)
- flush_icache_range((unsigned long)__va(ranges[i].mem),
- (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
-}
-
-#ifdef CONFIG_SMP
-
-static int kexec_all_irq_disabled = 0;
-
-static void kexec_smp_down(void *arg)
-{
- local_irq_disable();
- hard_irq_disable();
-
- mb(); /* make sure our irqs are disabled before we say they are */
- get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
- while(kexec_all_irq_disabled == 0)
- cpu_relax();
- mb(); /* make sure all irqs are disabled before this */
- hw_breakpoint_disable();
- /*
- * Now every CPU has IRQs off, we can clear out any pending
- * IPIs and be sure that no more will come in after this.
- */
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(0, 1);
-
- kexec_smp_wait();
- /* NOTREACHED */
-}
-
-static void kexec_prepare_cpus_wait(int wait_state)
-{
- int my_cpu, i, notified=-1;
-
- hw_breakpoint_disable();
- my_cpu = get_cpu();
- /* Make sure each CPU has at least made it to the state we need.
- *
- * FIXME: There is a (slim) chance of a problem if not all of the CPUs
- * are correctly onlined. If somehow we start a CPU on boot with RTAS
- * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
- * time, the boot CPU will timeout. If it does eventually execute
- * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
- * written) and get into a peculiar state.
- * If the platform supports smp_ops->take_timebase(), the secondary CPU
- * will probably be spinning in there. If not (i.e. pseries), the
- * secondary will continue on and try to online itself/idle/etc. If it
- * survives that, we need to find these
- * possible-but-not-online-but-should-be CPUs and chaperone them into
- * kexec_smp_wait().
- */
- for_each_online_cpu(i) {
- if (i == my_cpu)
- continue;
-
- while (paca_ptrs[i]->kexec_state < wait_state) {
- barrier();
- if (i != notified) {
- printk(KERN_INFO "kexec: waiting for cpu %d "
- "(physical %d) to enter %i state\n",
- i, paca_ptrs[i]->hw_cpu_id, wait_state);
- notified = i;
- }
- }
- }
- mb();
-}
-
-/*
- * We need to make sure each present CPU is online. The next kernel will scan
- * the device tree and assume primary threads are online and query secondary
- * threads via RTAS to online them if required. If we don't online primary
- * threads, they will be stuck. However, we also online secondary threads as we
- * may be using 'cede offline'. In this case RTAS doesn't see the secondary
- * threads as offline -- and again, these CPUs will be stuck.
- *
- * So, we online all CPUs that should be running, including secondary threads.
- */
-static void wake_offline_cpus(void)
-{
- int cpu = 0;
-
- for_each_present_cpu(cpu) {
- if (!cpu_online(cpu)) {
- printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
- cpu);
- WARN_ON(cpu_up(cpu));
- }
- }
-}
-
-static void kexec_prepare_cpus(void)
-{
- wake_offline_cpus();
- smp_call_function(kexec_smp_down, NULL, /* wait */0);
- local_irq_disable();
- hard_irq_disable();
-
- mb(); /* make sure IRQs are disabled before we say they are */
- get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
-
- kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
- /* we are sure every CPU has IRQs off at this point */
- kexec_all_irq_disabled = 1;
-
- /*
- * Before removing MMU mappings make sure all CPUs have entered real
- * mode:
- */
- kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
-
- /* after we tell the others to go down */
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(0, 0);
-
- put_cpu();
-}
-
-#else /* ! SMP */
-
-static void kexec_prepare_cpus(void)
-{
- /*
- * move the secondarys to us so that we can copy
- * the new kernel 0-0x100 safely
- *
- * do this if kexec in setup.c ?
- *
- * We need to release the cpus if we are ever going from an
- * UP to an SMP kernel.
- */
- smp_release_cpus();
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(0, 0);
- local_irq_disable();
- hard_irq_disable();
-}
-
-#endif /* SMP */
-
-/*
- * kexec thread structure and stack.
- *
- * We need to make sure that this is 16384-byte aligned due to the
- * way process stacks are handled. It also must be statically allocated
- * or allocated as part of the kimage, because everything else may be
- * overwritten when we copy the kexec image. We piggyback on the
- * "init_task" linker section here to statically allocate a stack.
- *
- * We could use a smaller stack if we don't care about anything using
- * current, but that audit has not been performed.
- */
-static union thread_union kexec_stack __init_task_data =
- { };
-
-/*
- * For similar reasons to the stack above, the kexecing CPU needs to be on a
- * static PACA; we switch to kexec_paca.
- */
-struct paca_struct kexec_paca;
-
-/* Our assembly helper, in misc_64.S */
-extern void kexec_sequence(void *newstack, unsigned long start,
- void *image, void *control,
- void (*clear_all)(void),
- bool copy_with_mmu_off) __noreturn;
-
-/* too late to fail here */
-void default_machine_kexec(struct kimage *image)
-{
- bool copy_with_mmu_off;
-
- /* prepare control code if any */
-
- /*
- * If the kexec boot is the normal one, need to shutdown other cpus
- * into our wait loop and quiesce interrupts.
- * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
- * stopping other CPUs and collecting their pt_regs is done before
- * using debugger IPI.
- */
-
- if (!kdump_in_progress())
- kexec_prepare_cpus();
-
- printk("kexec: Starting switchover sequence.\n");
-
- /* switch to a staticly allocated stack. Based on irq stack code.
- * We setup preempt_count to avoid using VMX in memcpy.
- * XXX: the task struct will likely be invalid once we do the copy!
- */
- current_thread_info()->flags = 0;
- current_thread_info()->preempt_count = HARDIRQ_OFFSET;
-
- /* We need a static PACA, too; copy this CPU's PACA over and switch to
- * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
- * non-static data.
- */
- memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
- kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
-#ifdef CONFIG_PPC_PSERIES
- kexec_paca.lppaca_ptr = NULL;
-#endif
- paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
-
- setup_paca(&kexec_paca);
-
- /*
- * The lppaca should be unregistered at this point so the HV won't
- * touch it. In the case of a crash, none of the lppacas are
- * unregistered so there is not much we can do about it here.
- */
-
- /*
- * On Book3S, the copy must happen with the MMU off if we are either
- * using Radix page tables or we are not in an LPAR since we can
- * overwrite the page tables while copying.
- *
- * In an LPAR, we keep the MMU on otherwise we can't access beyond
- * the RMA. On BookE there is no real MMU off mode, so we have to
- * keep it enabled as well (but then we have bolted TLB entries).
- */
-#ifdef CONFIG_PPC_BOOK3E
- copy_with_mmu_off = false;
-#else
- copy_with_mmu_off = radix_enabled() ||
- !(firmware_has_feature(FW_FEATURE_LPAR) ||
- firmware_has_feature(FW_FEATURE_PS3_LV1));
-#endif
-
- /* Some things are best done in assembly. Finding globals with
- * a toc is easier in C, so pass in what we can.
- */
- kexec_sequence(&kexec_stack, image->start, image,
- page_address(image->control_code_page),
- mmu_cleanup_all, copy_with_mmu_off);
- /* NOTREACHED */
-}
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/* Values we need to export to the second kernel via the device tree. */
-static unsigned long htab_base;
-static unsigned long htab_size;
-
-static struct property htab_base_prop = {
- .name = "linux,htab-base",
- .length = sizeof(unsigned long),
- .value = &htab_base,
-};
-
-static struct property htab_size_prop = {
- .name = "linux,htab-size",
- .length = sizeof(unsigned long),
- .value = &htab_size,
-};
-
-static int __init export_htab_values(void)
-{
- struct device_node *node;
-
- /* On machines with no htab htab_address is NULL */
- if (!htab_address)
- return -ENODEV;
-
- node = of_find_node_by_path("/chosen");
- if (!node)
- return -ENODEV;
-
- /* remove any stale propertys so ours can be found */
- of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
- of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
-
- htab_base = cpu_to_be64(__pa(htab_address));
- of_add_property(node, &htab_base_prop);
- htab_size = cpu_to_be64(htab_size_bytes);
- of_add_property(node, &htab_size_prop);
-
- of_node_put(node);
- return 0;
-}
-late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
deleted file mode 100644
index 143c91724617..000000000000
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ /dev/null
@@ -1,254 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ppc64 code to implement the kexec_file_load syscall
- *
- * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
- * Copyright (C) 2004 IBM Corp.
- * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
- * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
- * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
- * Copyright (C) 2016 IBM Corporation
- *
- * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
- * Heavily modified for the kernel by
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- */
-
-#include <linux/slab.h>
-#include <linux/kexec.h>
-#include <linux/of_fdt.h>
-#include <linux/libfdt.h>
-#include <asm/ima.h>
-
-#define SLAVE_CODE_SIZE 256
-
-const struct kexec_file_ops * const kexec_file_loaders[] = {
- &kexec_elf64_ops,
- NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
- /* We don't support crash kernels yet. */
- if (image->type == KEXEC_TYPE_CRASH)
- return -EOPNOTSUPP;
-
- return kexec_image_probe_default(image, buf, buf_len);
-}
-
-/**
- * setup_purgatory - initialize the purgatory's global variables
- * @image: kexec image.
- * @slave_code: Slave code for the purgatory.
- * @fdt: Flattened device tree for the next kernel.
- * @kernel_load_addr: Address where the kernel is loaded.
- * @fdt_load_addr: Address where the flattened device tree is loaded.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_purgatory(struct kimage *image, const void *slave_code,
- const void *fdt, unsigned long kernel_load_addr,
- unsigned long fdt_load_addr)
-{
- unsigned int *slave_code_buf, master_entry;
- int ret;
-
- slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
- if (!slave_code_buf)
- return -ENOMEM;
-
- /* Get the slave code from the new kernel and put it in purgatory. */
- ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
- slave_code_buf, SLAVE_CODE_SIZE,
- true);
- if (ret) {
- kfree(slave_code_buf);
- return ret;
- }
-
- master_entry = slave_code_buf[0];
- memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
- slave_code_buf[0] = master_entry;
- ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
- slave_code_buf, SLAVE_CODE_SIZE,
- false);
- kfree(slave_code_buf);
-
- ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
- sizeof(kernel_load_addr), false);
- if (ret)
- return ret;
- ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
- sizeof(fdt_load_addr), false);
- if (ret)
- return ret;
-
- return 0;
-}
-
-/**
- * delete_fdt_mem_rsv - delete memory reservation with given address and size
- *
- * Return: 0 on success, or negative errno on error.
- */
-int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
-{
- int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
-
- for (i = 0; i < num_rsvs; i++) {
- uint64_t rsv_start, rsv_size;
-
- ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
- if (ret) {
- pr_err("Malformed device tree.\n");
- return -EINVAL;
- }
-
- if (rsv_start == start && rsv_size == size) {
- ret = fdt_del_mem_rsv(fdt, i);
- if (ret) {
- pr_err("Error deleting device tree reservation.\n");
- return -EINVAL;
- }
-
- return 0;
- }
- }
-
- return -ENOENT;
-}
-
-/*
- * setup_new_fdt - modify /chosen and memory reservation for the next kernel
- * @image: kexec image being loaded.
- * @fdt: Flattened device tree for the next kernel.
- * @initrd_load_addr: Address where the next initrd will be loaded.
- * @initrd_len: Size of the next initrd, or 0 if there will be none.
- * @cmdline: Command line for the next kernel, or NULL if there will
- * be none.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_new_fdt(const struct kimage *image, void *fdt,
- unsigned long initrd_load_addr, unsigned long initrd_len,
- const char *cmdline)
-{
- int ret, chosen_node;
- const void *prop;
-
- /* Remove memory reservation for the current device tree. */
- ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
- fdt_totalsize(initial_boot_params));
- if (ret == 0)
- pr_debug("Removed old device tree reservation.\n");
- else if (ret != -ENOENT)
- return ret;
-
- chosen_node = fdt_path_offset(fdt, "/chosen");
- if (chosen_node == -FDT_ERR_NOTFOUND) {
- chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
- "chosen");
- if (chosen_node < 0) {
- pr_err("Error creating /chosen.\n");
- return -EINVAL;
- }
- } else if (chosen_node < 0) {
- pr_err("Malformed device tree: error reading /chosen.\n");
- return -EINVAL;
- }
-
- /* Did we boot using an initrd? */
- prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
- if (prop) {
- uint64_t tmp_start, tmp_end, tmp_size;
-
- tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
-
- prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
- if (!prop) {
- pr_err("Malformed device tree.\n");
- return -EINVAL;
- }
- tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
-
- /*
- * kexec reserves exact initrd size, while firmware may
- * reserve a multiple of PAGE_SIZE, so check for both.
- */
- tmp_size = tmp_end - tmp_start;
- ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
- if (ret == -ENOENT)
- ret = delete_fdt_mem_rsv(fdt, tmp_start,
- round_up(tmp_size, PAGE_SIZE));
- if (ret == 0)
- pr_debug("Removed old initrd reservation.\n");
- else if (ret != -ENOENT)
- return ret;
-
- /* If there's no new initrd, delete the old initrd's info. */
- if (initrd_len == 0) {
- ret = fdt_delprop(fdt, chosen_node,
- "linux,initrd-start");
- if (ret) {
- pr_err("Error deleting linux,initrd-start.\n");
- return -EINVAL;
- }
-
- ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
- if (ret) {
- pr_err("Error deleting linux,initrd-end.\n");
- return -EINVAL;
- }
- }
- }
-
- if (initrd_len) {
- ret = fdt_setprop_u64(fdt, chosen_node,
- "linux,initrd-start",
- initrd_load_addr);
- if (ret < 0)
- goto err;
-
- /* initrd-end is the first address after the initrd image. */
- ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
- initrd_load_addr + initrd_len);
- if (ret < 0)
- goto err;
-
- ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
- if (ret) {
- pr_err("Error reserving initrd memory: %s\n",
- fdt_strerror(ret));
- return -EINVAL;
- }
- }
-
- if (cmdline != NULL) {
- ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
- if (ret < 0)
- goto err;
- } else {
- ret = fdt_delprop(fdt, chosen_node, "bootargs");
- if (ret && ret != -FDT_ERR_NOTFOUND) {
- pr_err("Error deleting bootargs.\n");
- return -EINVAL;
- }
- }
-
- ret = setup_ima_buffer(image, fdt, chosen_node);
- if (ret) {
- pr_err("Error setting up the new device tree.\n");
- return ret;
- }
-
- ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
- if (ret)
- goto err;
-
- return 0;
-
-err:
- pr_err("Error setting up the new device tree.\n");
- return -EINVAL;
-}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b18df633eae9..34c1001e9e8b 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -33,13 +33,18 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
mce_ue_event_queue);
static void machine_check_process_queued_event(struct irq_work *work);
-void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_check_ue_irq_work(struct irq_work *work);
+static void machine_check_ue_event(struct machine_check_event *evt);
static void machine_process_ue_event(struct work_struct *work);
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+static struct irq_work mce_ue_event_irq_work = {
+ .func = machine_check_ue_irq_work,
+};
+
DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
static void mce_set_error_info(struct machine_check_event *mce,
@@ -144,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
+ mce->u.ue_error.ignore_event = mce_err->ignore_event;
machine_check_ue_event(mce);
}
}
@@ -199,11 +205,15 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+static void machine_check_ue_irq_work(struct irq_work *work)
+{
+ schedule_work(&mce_ue_event_work);
+}
/*
* Queue up the MCE event which then can be handled later.
*/
-void machine_check_ue_event(struct machine_check_event *evt)
+static void machine_check_ue_event(struct machine_check_event *evt)
{
int index;
@@ -216,7 +226,7 @@ void machine_check_ue_event(struct machine_check_event *evt)
memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
/* Queue work to process this event later. */
- schedule_work(&mce_ue_event_work);
+ irq_work_queue(&mce_ue_event_irq_work);
}
/*
@@ -257,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
/*
* This should probably queued elsewhere, but
* oh! well
+ *
+ * Don't report this machine check because the caller has a
+ * asked us to ignore the event, it has a fixup handler which
+ * will do the appropriate error handling and reporting.
*/
if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_ue_count);
+ continue;
+ }
+
if (evt->u.ue_error.physical_address_provided) {
unsigned long pfn;
@@ -292,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(&mce_event_queue[index]);
+
+ if (evt->error_type == MCE_ERROR_TYPE_UE &&
+ evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_queue_count);
+ continue;
+ }
machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
@@ -300,7 +325,7 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
- const char *level, *sevstr, *subtype, *err_type;
+ const char *level, *sevstr, *subtype, *err_type, *initiator;
uint64_t ea = 0, pa = 0;
int n = 0;
char dar_str[50];
@@ -385,6 +410,28 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
+ switch(evt->initiator) {
+ case MCE_INITIATOR_CPU:
+ initiator = "CPU";
+ break;
+ case MCE_INITIATOR_PCI:
+ initiator = "PCI";
+ break;
+ case MCE_INITIATOR_ISA:
+ initiator = "ISA";
+ break;
+ case MCE_INITIATOR_MEMORY:
+ initiator = "Memory";
+ break;
+ case MCE_INITIATOR_POWERMGM:
+ initiator = "Power Management";
+ break;
+ case MCE_INITIATOR_UNKNOWN:
+ default:
+ initiator = "Unknown";
+ break;
+ }
+
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
err_type = "UE";
@@ -451,6 +498,14 @@ void machine_check_print_event_info(struct machine_check_event *evt,
if (evt->u.link_error.effective_address_provided)
ea = evt->u.link_error.effective_address;
break;
+ case MCE_ERROR_TYPE_DCACHE:
+ err_type = "D-Cache";
+ subtype = "Unknown";
+ break;
+ case MCE_ERROR_TYPE_ICACHE:
+ err_type = "I-Cache";
+ subtype = "Unknown";
+ break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
err_type = "Unknown";
@@ -483,9 +538,17 @@ void machine_check_print_event_info(struct machine_check_event *evt,
level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
}
+ printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
+
subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Display faulty slb contents for SLB errors. */
+ if (evt->error_type == MCE_ERROR_TYPE_SLB)
+ slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index a814d2dfb5b0..1cbf7f1a4e3d 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/extable.h>
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
@@ -18,6 +19,7 @@
#include <asm/pte-walk.h>
#include <asm/sstep.h>
#include <asm/exception-64s.h>
+#include <asm/extable.h>
/*
* Convert an address related to an mm to a PFN. NOTE: we are in real
@@ -26,7 +28,8 @@
unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
- unsigned long flags;
+ unsigned int shift;
+ unsigned long pfn, flags;
struct mm_struct *mm;
if (user_mode(regs))
@@ -35,14 +38,23 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
mm = &init_mm;
local_irq_save(flags);
- if (mm == current->mm)
- ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
- else
- ptep = find_init_mm_pte(addr, NULL);
+ ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+
+ if (!ptep || pte_special(*ptep)) {
+ pfn = ULONG_MAX;
+ goto out;
+ }
+
+ if (shift <= PAGE_SHIFT)
+ pfn = pte_pfn(*ptep);
+ else {
+ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+ pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
+ }
+
+out:
local_irq_restore(flags);
- if (!ptep || pte_special(*ptep))
- return ULONG_MAX;
- return pte_pfn(*ptep);
+ return pfn;
}
/* flush SLBs and reload */
@@ -344,7 +356,7 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, false, 0, 0, 0, 0, 0 } };
-static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
{
/*
@@ -397,6 +409,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -482,6 +496,8 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
@@ -541,7 +557,8 @@ static int mce_handle_derror(struct pt_regs *regs,
* kernel/exception-64s.h
*/
if (get_paca()->in_mce < MAX_MCE_DEPTH)
- mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
+ mce_find_instr_ea_and_phys(regs, addr,
+ phys_addr);
}
found = 1;
}
@@ -558,9 +575,18 @@ static int mce_handle_derror(struct pt_regs *regs,
return 0;
}
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
{
long handled = 0;
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs->nip = extable_fixup(entry);
+ return 1;
+ }
/*
* On specific SCOM read via MMIO we may get a machine check
@@ -593,7 +619,7 @@ static long mce_handle_error(struct pt_regs *regs,
&phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
+ handled = mce_handle_ue_error(regs, &mce_err);
save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index fe4bd321730e..d80212be8698 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -6,11 +6,6 @@
* Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
* and Paul Mackerras.
*
- * kexec bits:
- * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * PPC44x port. Copyright (C) 2011, IBM Corporation
- * Author: Suzuki Poulose <suzuki@in.ibm.com>
*/
#include <linux/sys.h>
@@ -25,7 +20,6 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/processor.h>
-#include <asm/kexec.h>
#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/export.h>
@@ -292,22 +286,20 @@ _GLOBAL(flush_instruction_cache)
iccci 0,r3
#endif
#elif defined(CONFIG_FSL_BOOKE)
-BEGIN_FTR_SECTION
+#ifdef CONFIG_E200
mfspr r3,SPRN_L1CSR0
ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
/* msync; isync recommended here */
mtspr SPRN_L1CSR0,r3
isync
blr
-END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+#endif
mfspr r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr SPRN_L1CSR1,r3
+#elif defined(CONFIG_PPC_BOOK3S_601)
+ blr /* for 601, do nothing */
#else
- mfspr r3,SPRN_PVR
- rlwinm r3,r3,16,16,31
- cmpwi 0,r3,1
- beqlr /* for 601, do nothing */
/* 603/604 processor - use invalidate-all bit in HID0 */
mfspr r3,SPRN_HID0
ori r3,r3,HID0_ICFI
@@ -319,123 +311,6 @@ EXPORT_SYMBOL(flush_instruction_cache)
#endif /* CONFIG_PPC_8xx */
/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- * This is a no-op on the 601.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(flush_icache_range)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT
- subf r4,r3,r4
- addi r4,r4,L1_CACHE_BYTES - 1
- srwi. r4,r4,L1_CACHE_SHIFT
- beqlr
- mtctr r4
- mr r6,r3
-1: dcbst 0,r3
- addi r3,r3,L1_CACHE_BYTES
- bdnz 1b
- sync /* wait for dcbst's to get to ram */
-#ifndef CONFIG_44x
- mtctr r4
-2: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 2b
-#else
- /* Flash invalidate on 44x because we are passed kmapped addresses and
- this doesn't work for userspace pages due to the virtually tagged
- icache. Sigh. */
- iccci 0, r0
-#endif
- sync /* additional sync needed on g4 */
- isync
- blr
-_ASM_NOKPROBE_SYMBOL(flush_icache_range)
-EXPORT_SYMBOL(flush_icache_range)
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- * This is a no-op on the 601 which has a unified cache.
- *
- * void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
- li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
- mtctr r4
- mr r6,r3
-0: dcbst 0,r3 /* Write line to ram */
- addi r3,r3,L1_CACHE_BYTES
- bdnz 0b
- sync
-#ifdef CONFIG_44x
- /* We don't flush the icache on 44x. Those have a virtual icache
- * and we don't have access to the virtual address here (it's
- * not the page vaddr but where it's mapped in user space). The
- * flushing of the icache on these is handled elsewhere, when
- * a change in the address space occurs, before returning to
- * user space
- */
-BEGIN_MMU_FTR_SECTION
- blr
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
-#endif /* CONFIG_44x */
- mtctr r4
-1: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 1b
- sync
- isync
- blr
-
-#ifndef CONFIG_BOOKE
-/*
- * Flush a particular page from the data cache to RAM, identified
- * by its physical address. We turn off the MMU so we can just use
- * the physical address (this may be a highmem page without a kernel
- * mapping).
- *
- * void __flush_dcache_icache_phys(unsigned long physaddr)
- */
-_GLOBAL(__flush_dcache_icache_phys)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- mfmsr r10
- rlwinm r0,r10,0,28,26 /* clear DR */
- mtmsr r0
- isync
- rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
- li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
- mtctr r4
- mr r6,r3
-0: dcbst 0,r3 /* Write line to ram */
- addi r3,r3,L1_CACHE_BYTES
- bdnz 0b
- sync
- mtctr r4
-1: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 1b
- sync
- mtmsr r10 /* restore DR */
- isync
- blr
-#endif /* CONFIG_BOOKE */
-
-/*
* Copy a whole page. We use the dcbz instruction on the destination
* to reduce memory traffic (it eliminates the unnecessary reads of
* the destination into cache). This requires that the destination
@@ -452,7 +327,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
stwu r9,16(r3)
_GLOBAL(copy_page)
+ rlwinm r5, r3, 0, L1_CACHE_BYTES - 1
addi r3,r3,-4
+
+0: twnei r5, 0 /* WARN if r3 is not cache aligned */
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
addi r4,r4,-4
li r5,4
@@ -608,488 +488,3 @@ _GLOBAL(start_secondary_resume)
*/
_GLOBAL(__main)
blr
-
-#ifdef CONFIG_KEXEC_CORE
- /*
- * Must be relocatable PIC code callable as a C function.
- */
- .globl relocate_new_kernel
-relocate_new_kernel:
- /* r3 = page_list */
- /* r4 = reboot_code_buffer */
- /* r5 = start_address */
-
-#ifdef CONFIG_FSL_BOOKE
-
- mr r29, r3
- mr r30, r4
- mr r31, r5
-
-#define ENTRY_MAPPING_KEXEC_SETUP
-#include "fsl_booke_entry_mapping.S"
-#undef ENTRY_MAPPING_KEXEC_SETUP
-
- mr r3, r29
- mr r4, r30
- mr r5, r31
-
- li r0, 0
-#elif defined(CONFIG_44x)
-
- /* Save our parameters */
- mr r29, r3
- mr r30, r4
- mr r31, r5
-
-#ifdef CONFIG_PPC_47x
- /* Check for 47x cores */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmplwi cr0,r3,PVR_476FPE@h
- beq setup_map_47x
- cmplwi cr0,r3,PVR_476@h
- beq setup_map_47x
- cmplwi cr0,r3,PVR_476_ISS@h
- beq setup_map_47x
-#endif /* CONFIG_PPC_47x */
-
-/*
- * Code for setting up 1:1 mapping for PPC440x for KEXEC
- *
- * We cannot switch off the MMU on PPC44x.
- * So we:
- * 1) Invalidate all the mappings except the one we are running from.
- * 2) Create a tmp mapping for our code in the other address space(TS) and
- * jump to it. Invalidate the entry we started in.
- * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
- * 4) Jump to the 1:1 mapping in original TS.
- * 5) Invalidate the tmp mapping.
- *
- * - Based on the kexec support code for FSL BookE
- *
- */
-
- /*
- * Load the PID with kernel PID (0).
- * Also load our MSR_IS and TID to MMUCR for TLB search.
- */
- li r3, 0
- mtspr SPRN_PID, r3
- mfmsr r4
- andi. r4,r4,MSR_IS@l
- beq wmmucr
- oris r3,r3,PPC44x_MMUCR_STS@h
-wmmucr:
- mtspr SPRN_MMUCR,r3
- sync
-
- /*
- * Invalidate all the TLB entries except the current entry
- * where we are running from
- */
- bl 0f /* Find our address */
-0: mflr r5 /* Make it accessible */
- tlbsx r23,0,r5 /* Find entry we are in */
- li r4,0 /* Start at TLB entry 0 */
- li r3,0 /* Set PAGEID inval value */
-1: cmpw r23,r4 /* Is this our entry? */
- beq skip /* If so, skip the inval */
- tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
-skip:
- addi r4,r4,1 /* Increment */
- cmpwi r4,64 /* Are we done? */
- bne 1b /* If not, repeat */
- isync
-
- /* Create a temp mapping and jump to it */
- andi. r6, r23, 1 /* Find the index to use */
- addi r24, r6, 1 /* r24 will contain 1 or 2 */
-
- mfmsr r9 /* get the MSR */
- rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */
- xori r7, r5, 1 /* Use the other address space */
-
- /* Read the current mapping entries */
- tlbre r3, r23, PPC44x_TLB_PAGEID
- tlbre r4, r23, PPC44x_TLB_XLAT
- tlbre r5, r23, PPC44x_TLB_ATTRIB
-
- /* Save our current XLAT entry */
- mr r25, r4
-
- /* Extract the TLB PageSize */
- li r10, 1 /* r10 will hold PageSize */
- rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */
-
- /* XXX: As of now we use 256M, 4K pages */
- cmpwi r11, PPC44x_TLB_256M
- bne tlb_4k
- rotlwi r10, r10, 28 /* r10 = 256M */
- b write_out
-tlb_4k:
- cmpwi r11, PPC44x_TLB_4K
- bne default
- rotlwi r10, r10, 12 /* r10 = 4K */
- b write_out
-default:
- rotlwi r10, r10, 10 /* r10 = 1K */
-
-write_out:
- /*
- * Write out the tmp 1:1 mapping for this code in other address space
- * Fixup EPN = RPN , TS=other address space
- */
- insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */
-
- /* Write out the tmp mapping entries */
- tlbwe r3, r24, PPC44x_TLB_PAGEID
- tlbwe r4, r24, PPC44x_TLB_XLAT
- tlbwe r5, r24, PPC44x_TLB_ATTRIB
-
- subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */
- not r10, r11 /* Mask for PageNum */
-
- /* Switch to other address space in MSR */
- insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
-
- bl 1f
-1: mflr r8
- addi r8, r8, (2f-1b) /* Find the target offset */
-
- /* Jump to the tmp mapping */
- mtspr SPRN_SRR0, r8
- mtspr SPRN_SRR1, r9
- rfi
-
-2:
- /* Invalidate the entry we were executing from */
- li r3, 0
- tlbwe r3, r23, PPC44x_TLB_PAGEID
-
- /* attribute fields. rwx for SUPERVISOR mode */
- li r5, 0
- ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
- /* Create 1:1 mapping in 256M pages */
- xori r7, r7, 1 /* Revert back to Original TS */
-
- li r8, 0 /* PageNumber */
- li r6, 3 /* TLB Index, start at 3 */
-
-next_tlb:
- rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */
- mr r4, r3 /* RPN = EPN */
- ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
- insrwi r3, r7, 1, 23 /* Set TS from r7 */
-
- tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */
- tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */
- tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */
-
- addi r8, r8, 1 /* Increment PN */
- addi r6, r6, 1 /* Increment TLB Index */
- cmpwi r8, 8 /* Are we done ? */
- bne next_tlb
- isync
-
- /* Jump to the new mapping 1:1 */
- li r9,0
- insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
-
- bl 1f
-1: mflr r8
- and r8, r8, r11 /* Get our offset within page */
- addi r8, r8, (2f-1b)
-
- and r5, r25, r10 /* Get our target PageNum */
- or r8, r8, r5 /* Target jump address */
-
- mtspr SPRN_SRR0, r8
- mtspr SPRN_SRR1, r9
- rfi
-2:
- /* Invalidate the tmp entry we used */
- li r3, 0
- tlbwe r3, r24, PPC44x_TLB_PAGEID
- sync
- b ppc44x_map_done
-
-#ifdef CONFIG_PPC_47x
-
- /* 1:1 mapping for 47x */
-
-setup_map_47x:
-
- /*
- * Load the kernel pid (0) to PID and also to MMUCR[TID].
- * Also set the MSR IS->MMUCR STS
- */
- li r3, 0
- mtspr SPRN_PID, r3 /* Set PID */
- mfmsr r4 /* Get MSR */
- andi. r4, r4, MSR_IS@l /* TS=1? */
- beq 1f /* If not, leave STS=0 */
- oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */
-1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */
- sync
-
- /* Find the entry we are running from */
- bl 2f
-2: mflr r23
- tlbsx r23, 0, r23
- tlbre r24, r23, 0 /* TLB Word 0 */
- tlbre r25, r23, 1 /* TLB Word 1 */
- tlbre r26, r23, 2 /* TLB Word 2 */
-
-
- /*
- * Invalidates all the tlb entries by writing to 256 RPNs(r4)
- * of 4k page size in all 4 ways (0-3 in r3).
- * This would invalidate the entire UTLB including the one we are
- * running from. However the shadow TLB entries would help us
- * to continue the execution, until we flush them (rfi/isync).
- */
- addis r3, 0, 0x8000 /* specify the way */
- addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */
- addi r5, 0, 0
- b clear_utlb_entry
-
- /* Align the loop to speed things up. from head_44x.S */
- .align 6
-
-clear_utlb_entry:
-
- tlbwe r4, r3, 0
- tlbwe r5, r3, 1
- tlbwe r5, r3, 2
- addis r3, r3, 0x2000 /* Increment the way */
- cmpwi r3, 0
- bne clear_utlb_entry
- addis r3, 0, 0x8000
- addis r4, r4, 0x100 /* Increment the EPN */
- cmpwi r4, 0
- bne clear_utlb_entry
-
- /* Create the entries in the other address space */
- mfmsr r5
- rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */
- xori r7, r7, 1 /* r7 = !TS */
-
- insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */
-
- /*
- * write out the TLB entries for the tmp mapping
- * Use way '0' so that we could easily invalidate it later.
- */
- lis r3, 0x8000 /* Way '0' */
-
- tlbwe r24, r3, 0
- tlbwe r25, r3, 1
- tlbwe r26, r3, 2
-
- /* Update the msr to the new TS */
- insrwi r5, r7, 1, 26
-
- bl 1f
-1: mflr r6
- addi r6, r6, (2f-1b)
-
- mtspr SPRN_SRR0, r6
- mtspr SPRN_SRR1, r5
- rfi
-
- /*
- * Now we are in the tmp address space.
- * Create a 1:1 mapping for 0-2GiB in the original TS.
- */
-2:
- li r3, 0
- li r4, 0 /* TLB Word 0 */
- li r5, 0 /* TLB Word 1 */
- li r6, 0
- ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */
-
- li r8, 0 /* PageIndex */
-
- xori r7, r7, 1 /* revert back to original TS */
-
-write_utlb:
- rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */
- /* ERPN = 0 as we don't use memory above 2G */
-
- mr r4, r5 /* EPN = RPN */
- ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
- insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */
-
- tlbwe r4, r3, 0 /* Write out the entries */
- tlbwe r5, r3, 1
- tlbwe r6, r3, 2
- addi r8, r8, 1
- cmpwi r8, 8 /* Have we completed ? */
- bne write_utlb
-
- /* make sure we complete the TLB write up */
- isync
-
- /*
- * Prepare to jump to the 1:1 mapping.
- * 1) Extract page size of the tmp mapping
- * DSIZ = TLB_Word0[22:27]
- * 2) Calculate the physical address of the address
- * to jump to.
- */
- rlwinm r10, r24, 0, 22, 27
-
- cmpwi r10, PPC47x_TLB0_4K
- bne 0f
- li r10, 0x1000 /* r10 = 4k */
- bl 1f
-
-0:
- /* Defaults to 256M */
- lis r10, 0x1000
-
- bl 1f
-1: mflr r4
- addi r4, r4, (2f-1b) /* virtual address of 2f */
-
- subi r11, r10, 1 /* offsetmask = Pagesize - 1 */
- not r10, r11 /* Pagemask = ~(offsetmask) */
-
- and r5, r25, r10 /* Physical page */
- and r6, r4, r11 /* offset within the current page */
-
- or r5, r5, r6 /* Physical address for 2f */
-
- /* Switch the TS in MSR to the original one */
- mfmsr r8
- insrwi r8, r7, 1, 26
-
- mtspr SPRN_SRR1, r8
- mtspr SPRN_SRR0, r5
- rfi
-
-2:
- /* Invalidate the tmp mapping */
- lis r3, 0x8000 /* Way '0' */
-
- clrrwi r24, r24, 12 /* Clear the valid bit */
- tlbwe r24, r3, 0
- tlbwe r25, r3, 1
- tlbwe r26, r3, 2
-
- /* Make sure we complete the TLB write and flush the shadow TLB */
- isync
-
-#endif
-
-ppc44x_map_done:
-
-
- /* Restore the parameters */
- mr r3, r29
- mr r4, r30
- mr r5, r31
-
- li r0, 0
-#else
- li r0, 0
-
- /*
- * Set Machine Status Register to a known status,
- * switch the MMU off and jump to 1: in a single step.
- */
-
- mr r8, r0
- ori r8, r8, MSR_RI|MSR_ME
- mtspr SPRN_SRR1, r8
- addi r8, r4, 1f - relocate_new_kernel
- mtspr SPRN_SRR0, r8
- sync
- rfi
-
-1:
-#endif
- /* from this point address translation is turned off */
- /* and interrupts are disabled */
-
- /* set a new stack at the bottom of our page... */
- /* (not really needed now) */
- addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
- stw r0, 0(r1)
-
- /* Do the copies */
- li r6, 0 /* checksum */
- mr r0, r3
- b 1f
-
-0: /* top, read another word for the indirection page */
- lwzu r0, 4(r3)
-
-1:
- /* is it a destination page? (r8) */
- rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
- beq 2f
-
- rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */
- b 0b
-
-2: /* is it an indirection page? (r3) */
- rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
- beq 2f
-
- rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */
- subi r3, r3, 4
- b 0b
-
-2: /* are we done? */
- rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
- beq 2f
- b 3f
-
-2: /* is it a source page? (r9) */
- rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
- beq 0b
-
- rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */
-
- li r7, PAGE_SIZE / 4
- mtctr r7
- subi r9, r9, 4
- subi r8, r8, 4
-9:
- lwzu r0, 4(r9) /* do the copy */
- xor r6, r6, r0
- stwu r0, 4(r8)
- dcbst 0, r8
- sync
- icbi 0, r8
- bdnz 9b
-
- addi r9, r9, 4
- addi r8, r8, 4
- b 0b
-
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * execute a serializing instruction here.
- */
- isync
- sync
-
- mfspr r3, SPRN_PIR /* current core we are running on */
- mr r4, r5 /* load physical address of chunk called */
-
- /* jump to the entry point, usually the setup routine */
- mtlr r5
- blrl
-
-1: b 1b
-
-relocate_new_kernel_end:
-
- .globl relocate_new_kernel_size
-relocate_new_kernel_size:
- .long relocate_new_kernel_end - relocate_new_kernel
-#endif
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..1864605eca29 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -49,108 +49,6 @@ _GLOBAL(call_do_irq)
mtlr r0
blr
- .section ".toc","aw"
-PPC64_CACHES:
- .tc ppc64_caches[TC],ppc64_caches
- .section ".text"
-
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start through stop-1 inclusive
- */
-
-_GLOBAL_TOC(flush_icache_range)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- * and in some cases i-cache and d-cache line sizes differ from
- * each other.
- */
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-1: dcbst 0,r6
- add r6,r6,r7
- bdnz 1b
- sync
-
-/* Now invalidate the instruction cache */
-
- lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5
- lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-2: icbi 0,r6
- add r6,r6,r7
- bdnz 2b
- isync
- blr
-_ASM_NOKPROBE_SYMBOL(flush_icache_range)
-EXPORT_SYMBOL(flush_icache_range)
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- * void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- */
-
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-
-/* Flush the dcache */
- ld r7,PPC64_CACHES@toc(r2)
- clrrdi r3,r3,PAGE_SHIFT /* Page align */
- lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */
- lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */
- mr r6,r3
- mtctr r4
-0: dcbst 0,r6
- add r6,r6,r5
- bdnz 0b
- sync
-
-/* Now invalidate the icache */
-
- lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */
- lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */
- mtctr r4
-1: icbi 0,r3
- add r3,r3,r5
- bdnz 1b
- isync
- blr
-
_GLOBAL(__bswapdi2)
EXPORT_SYMBOL(__bswapdi2)
srdi r8,r3,32
@@ -432,18 +330,13 @@ kexec_create_tlb:
rlwimi r9,r10,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r9) */
/* Set up a temp identity mapping v:0 to p:0 and return to it. */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED MAS2_M
-#else
-#define M_IF_NEEDED 0
-#endif
mtspr SPRN_MAS0,r9
lis r9,(MAS1_VALID|MAS1_IPROT)@h
ori r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
mtspr SPRN_MAS1,r9
- LOAD_REG_IMMEDIATE(r9, 0x0 | M_IF_NEEDED)
+ LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED)
mtspr SPRN_MAS2,r9
LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX)
diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S
new file mode 100644
index 000000000000..bcdad15395dd
--- /dev/null
+++ b/arch/powerpc/kernel/note.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#include <linux/elfnote.h>
+#include <asm/elfnote.h>
+
+/*
+ * Ultravisor-capable bit (PowerNV only).
+ *
+ * Bit 0 indicates that the powerpc kernel binary knows how to run in an
+ * ultravisor-enabled system.
+ *
+ * In an ultravisor-enabled system, some machine resources are now controlled
+ * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
+ * being run on a machine with ultravisor, the kernel will probably crash
+ * trying to access ultravisor resources. For instance, it may crash in early
+ * boot trying to set the partition table entry 0.
+ *
+ * In an ultravisor-enabled system, a bootloader could warn the user or prevent
+ * the kernel from being run if the PowerPC ultravisor capability doesn't exist
+ * or the Ultravisor-capable bit is not set.
+ */
+#ifdef CONFIG_PPC_POWERNV
+#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
+#else
+#define PPCCAP_ULTRAVISOR_BIT 0
+#endif
+
+/*
+ * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
+ * can be used to advertise kernel capabilities to userland.
+ */
+#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
+
+ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
+ .long PPC_CAPABILITIES_BITMAP)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e3ad8aa4730d..949eceb254d8 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -14,6 +14,8 @@
#include <asm/sections.h>
#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
#include "setup.h"
@@ -52,6 +54,43 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
#ifdef CONFIG_PPC_PSERIES
+#define LPPACA_SIZE 0x400
+
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
+ static unsigned long shared_lppaca_size;
+ static void *shared_lppaca;
+ void *ptr;
+
+ if (!shared_lppaca) {
+ memblock_set_bottom_up(true);
+
+ shared_lppaca =
+ memblock_alloc_try_nid(shared_lppaca_total_size,
+ PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!shared_lppaca)
+ panic("cannot allocate shared data");
+
+ memblock_set_bottom_up(false);
+ uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
+ shared_lppaca_total_size >> PAGE_SHIFT);
+ }
+
+ ptr = shared_lppaca + shared_lppaca_size;
+ shared_lppaca_size += size;
+
+ /*
+ * This is very early in boot, so no harm done if the kernel crashes at
+ * this point.
+ */
+ BUG_ON(shared_lppaca_size >= shared_lppaca_total_size);
+
+ return ptr;
+}
+
/*
* See asm/lppaca.h for more detail.
*
@@ -65,7 +104,7 @@ static inline void init_lppaca(struct lppaca *lppaca)
*lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(0x400),
+ .size = cpu_to_be16(LPPACA_SIZE),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
@@ -75,19 +114,22 @@ static inline void init_lppaca(struct lppaca *lppaca)
static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
- size_t size = 0x400;
- BUILD_BUG_ON(size < sizeof(struct lppaca));
+ BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
if (early_cpu_has_feature(CPU_FTR_HVMODE))
return NULL;
- lp = alloc_paca_data(size, 0x400, limit, cpu);
+ if (is_secure_guest())
+ lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu);
+ else
+ lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
+
init_lppaca(lp);
return lp;
}
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f627e15bb43c..c6c03416a151 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -261,12 +261,6 @@ int pcibios_sriov_disable(struct pci_dev *pdev)
#endif /* CONFIG_PCI_IOV */
-void pcibios_bus_add_device(struct pci_dev *pdev)
-{
- if (ppc_md.pcibios_bus_add_device)
- ppc_md.pcibios_bus_add_device(pdev);
-}
-
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -964,7 +958,7 @@ void pcibios_setup_bus_self(struct pci_bus *bus)
phb->controller_ops.dma_bus_setup(bus);
}
-static void pcibios_setup_device(struct pci_dev *dev)
+void pcibios_bus_add_device(struct pci_dev *dev)
{
struct pci_controller *phb;
/* Fixup NUMA node as it may not be setup yet by the generic
@@ -985,17 +979,13 @@ static void pcibios_setup_device(struct pci_dev *dev)
pci_read_irq_line(dev);
if (ppc_md.pci_irq_fixup)
ppc_md.pci_irq_fixup(dev);
+
+ if (ppc_md.pcibios_bus_add_device)
+ ppc_md.pcibios_bus_add_device(dev);
}
int pcibios_add_device(struct pci_dev *dev)
{
- /*
- * We can only call pcibios_setup_device() after bus setup is complete,
- * since some of the platform specific DMA setup code depends on it.
- */
- if (dev->bus->is_added)
- pcibios_setup_device(dev);
-
#ifdef CONFIG_PCI_IOV
if (ppc_md.pcibios_fixup_sriov)
ppc_md.pcibios_fixup_sriov(dev);
@@ -1004,24 +994,6 @@ int pcibios_add_device(struct pci_dev *dev)
return 0;
}
-void pcibios_setup_bus_devices(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- pr_debug("PCI: Fixup bus devices %d (%s)\n",
- bus->number, bus->self ? pci_name(bus->self) : "PHB");
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- /* Cardbus can call us to add new devices to a bus, so ignore
- * those who are already fully discovered
- */
- if (pci_dev_is_added(dev))
- continue;
-
- pcibios_setup_device(dev);
- }
-}
-
void pcibios_set_master(struct pci_dev *dev)
{
/* No special bus mastering setup handling */
@@ -1037,19 +1009,9 @@ void pcibios_fixup_bus(struct pci_bus *bus)
/* Now fixup the bus bus */
pcibios_setup_bus_self(bus);
-
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
}
EXPORT_SYMBOL(pcibios_fixup_bus);
-void pci_fixup_cardbus(struct pci_bus *bus)
-{
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
-}
-
-
static int skip_isa_ioresource_align(struct pci_dev *dev)
{
if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) &&
@@ -1379,10 +1341,6 @@ void __init pcibios_resource_survey(void)
pr_debug("PCI: Assigning unassigned resources...\n");
pci_assign_unassigned_resources();
}
-
- /* Call machine dependent fixup */
- if (ppc_md.pcibios_fixup)
- ppc_md.pcibios_fixup();
}
/* This is used by the PCI hotplug driver to allocate resource
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 0b0cf8168b47..d6a67f814983 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -55,11 +55,18 @@ EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
void pcibios_release_device(struct pci_dev *dev)
{
struct pci_controller *phb = pci_bus_to_host(dev->bus);
+ struct pci_dn *pdn = pci_get_pdn(dev);
eeh_remove_device(dev);
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
+
+ /* free()ing the pci_dn has been deferred to us, do it now */
+ if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
+ pci_dbg(dev, "freeing dead pdn\n");
+ kfree(pdn);
+ }
}
/**
@@ -127,7 +134,6 @@ void pci_hp_add_devices(struct pci_bus *bus)
*/
slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
- pcibios_setup_bus_devices(bus);
max = bus->busn_res.start;
/*
* Scan bridges that are already configured. We don't touch
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 50942a1d1a5f..b49e1060a3bf 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -263,6 +263,10 @@ static int __init pcibios_init(void)
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
ppc_md.pcibios_after_init();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index b7030b1189d0..f83d1f69b1dd 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -54,14 +54,20 @@ static int __init pcibios_init(void)
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
/* Scan all of the recorded PCI controllers. */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pcibios_scan_phb(hose);
- pci_bus_add_devices(hose->bus);
- }
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Add devices. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ pci_bus_add_devices(hose->bus);
+
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
return 0;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index c4c8c237a106..4e654df55969 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -125,7 +125,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
}
#ifdef CONFIG_PCI_IOV
-static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
+static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent,
int vf_index,
int busno, int devfn)
{
@@ -151,17 +151,15 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
return pdn;
}
-#endif
-struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
+struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev)
{
-#ifdef CONFIG_PCI_IOV
struct pci_dn *parent, *pdn;
int i;
/* Only support IOV for now */
- if (!pdev->is_physfn)
- return pci_get_pdn(pdev);
+ if (WARN_ON(!pdev->is_physfn))
+ return NULL;
/* Check if VFs have been populated */
pdn = pci_get_pdn(pdev);
@@ -176,7 +174,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
struct eeh_dev *edev __maybe_unused;
- pdn = add_one_dev_pci_data(parent, i,
+ pdn = add_one_sriov_vf_pdn(parent, i,
pci_iov_virtfn_bus(pdev, i),
pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
@@ -192,31 +190,17 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
edev->physfn = pdev;
#endif /* CONFIG_EEH */
}
-#endif /* CONFIG_PCI_IOV */
-
return pci_get_pdn(pdev);
}
-void remove_dev_pci_data(struct pci_dev *pdev)
+void remove_sriov_vf_pdns(struct pci_dev *pdev)
{
-#ifdef CONFIG_PCI_IOV
struct pci_dn *parent;
struct pci_dn *pdn, *tmp;
int i;
- /*
- * VF and VF PE are created/released dynamically, so we need to
- * bind/unbind them. Otherwise the VF and VF PE would be mismatched
- * when re-enabling SR-IOV.
- */
- if (pdev->is_virtfn) {
- pdn = pci_get_pdn(pdev);
- pdn->pe_number = IODA_INVALID_PE;
- return;
- }
-
/* Only support IOV PF for now */
- if (!pdev->is_physfn)
+ if (WARN_ON(!pdev->is_physfn))
return;
/* Check if VFs have been populated */
@@ -244,9 +228,22 @@ void remove_dev_pci_data(struct pci_dev *pdev)
continue;
#ifdef CONFIG_EEH
- /* Release EEH device for the VF */
+ /*
+ * Release EEH state for this VF. The PCI core
+ * has already torn down the pci_dev for this VF, but
+ * we're responsible to removing the eeh_dev since it
+ * has the same lifetime as the pci_dn that spawned it.
+ */
edev = pdn_to_eeh_dev(pdn);
if (edev) {
+ /*
+ * We allocate pci_dn's for the totalvfs count,
+ * but only only the vfs that were activated
+ * have a configured PE.
+ */
+ if (edev->pe)
+ eeh_rmv_from_parent_pe(edev);
+
pdn->edev = NULL;
kfree(edev);
}
@@ -258,8 +255,8 @@ void remove_dev_pci_data(struct pci_dev *pdev)
kfree(pdn);
}
}
-#endif /* CONFIG_PCI_IOV */
}
+#endif /* CONFIG_PCI_IOV */
struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
struct device_node *dn)
@@ -323,6 +320,7 @@ void pci_remove_device_node_info(struct device_node *dn)
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
struct device_node *parent;
+ struct pci_dev *pdev;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -336,12 +334,28 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
+ /* Drop the parent pci_dn's ref to our backing dt node */
parent = of_get_parent(dn);
if (parent)
of_node_put(parent);
- dn->data = NULL;
- kfree(pdn);
+ /*
+ * At this point we *might* still have a pci_dev that was
+ * instantiated from this pci_dn. So defer free()ing it until
+ * the pci_dev's release function is called.
+ */
+ pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
+ pdn->busno, pdn->devfn);
+ if (pdev) {
+ /* NB: pdev has a ref to dn */
+ pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
+ pdn->flags |= PCI_DN_FLAG_DEAD;
+ } else {
+ dn->data = NULL;
+ kfree(pdn);
+ }
+
+ pci_dev_put(pdev);
}
EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 409c6c1beabf..c3024f104765 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -34,31 +34,75 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* pci_parse_of_flags - Parse the flags cell of a device tree PCI address
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
+ *
+ * PCI Bus Binding to IEEE Std 1275-1994
+ *
+ * Bit# 33222222 22221111 11111100 00000000
+ * 10987654 32109876 54321098 76543210
+ * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr
+ * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh
+ * phys.lo cell: llllllll llllllll llllllll llllllll
+ *
+ * where:
+ * n is 0 if the address is relocatable, 1 otherwise
+ * p is 1 if the addressable region is "prefetchable", 0 otherwise
+ * t is 1 if the address is aliased (for non-relocatable I/O),
+ * below 1 MB (for Memory),or below 64 KB (for relocatable I/O).
+ * ss is the space code, denoting the address space:
+ * 00 denotes Configuration Space
+ * 01 denotes I/O Space
+ * 10 denotes 32-bit-address Memory Space
+ * 11 denotes 64-bit-address Memory Space
+ * bbbbbbbb is the 8-bit Bus Number
+ * ddddd is the 5-bit Device Number
+ * fff is the 3-bit Function Number
+ * rrrrrrrr is the 8-bit Register Number
*/
+#define OF_PCI_ADDR0_SPACE(ss) (((ss)&3)<<24)
+#define OF_PCI_ADDR0_SPACE_CFG OF_PCI_ADDR0_SPACE(0)
+#define OF_PCI_ADDR0_SPACE_IO OF_PCI_ADDR0_SPACE(1)
+#define OF_PCI_ADDR0_SPACE_MMIO32 OF_PCI_ADDR0_SPACE(2)
+#define OF_PCI_ADDR0_SPACE_MMIO64 OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_SPACE_MASK OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_RELOC (1UL<<31)
+#define OF_PCI_ADDR0_PREFETCH (1UL<<30)
+#define OF_PCI_ADDR0_ALIAS (1UL<<29)
+#define OF_PCI_ADDR0_BUS 0x00FF0000UL
+#define OF_PCI_ADDR0_DEV 0x0000F800UL
+#define OF_PCI_ADDR0_FN 0x00000700UL
+#define OF_PCI_ADDR0_BARREG 0x000000FFUL
+
unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
- unsigned int flags = 0;
+ unsigned int flags = 0, as = addr0 & OF_PCI_ADDR0_SPACE_MASK;
- if (addr0 & 0x02000000) {
+ if (as == OF_PCI_ADDR0_SPACE_MMIO32 || as == OF_PCI_ADDR0_SPACE_MMIO64) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
- if (flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
- flags |= IORESOURCE_MEM_64;
- flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
- if (addr0 & 0x40000000)
- flags |= IORESOURCE_PREFETCH
- | PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+ if (as == OF_PCI_ADDR0_SPACE_MMIO64)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | IORESOURCE_MEM_64;
+
+ if (addr0 & OF_PCI_ADDR0_ALIAS)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_1M;
+
+ if (addr0 & OF_PCI_ADDR0_PREFETCH)
+ flags |= IORESOURCE_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_PREFETCH;
+
/* Note: We don't know whether the ROM has been left enabled
* by the firmware or not. We mark it as disabled (ie, we do
* not set the IORESOURCE_ROM_ENABLE flag) for now rather than
* do a config space read, it will be force-enabled if needed
*/
- if (!bridge && (addr0 & 0xff) == 0x30)
+ if (!bridge && (addr0 & OF_PCI_ADDR0_BARREG) == PCI_ROM_ADDRESS)
flags |= IORESOURCE_READONLY;
- } else if (addr0 & 0x01000000)
+
+ } else if (as == OF_PCI_ADDR0_SPACE_IO)
flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+
if (flags)
flags |= IORESOURCE_SIZEALIGN;
+
return flags;
}
@@ -370,7 +414,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
*/
if (!rescan_existing)
pcibios_setup_bus_self(bus);
- pcibios_setup_bus_devices(bus);
/* Now scan child busses */
for_each_pci_bridge(dev, bus)
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index be3758d54e59..877817471e3c 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -39,10 +39,10 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
return 0;
}
-static const struct file_operations page_map_fops = {
- .llseek = page_map_seek,
- .read = page_map_read,
- .mmap = page_map_mmap
+static const struct proc_ops page_map_proc_ops = {
+ .proc_lseek = page_map_seek,
+ .proc_read = page_map_read,
+ .proc_mmap = page_map_mmap,
};
@@ -51,7 +51,7 @@ static int __init proc_ppc64_init(void)
struct proc_dir_entry *pde;
pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
- &page_map_fops, vdso_data);
+ &page_map_proc_ops, vdso_data);
if (!pde)
return 1;
proc_set_size(pde, PAGE_SIZE);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8fc4de0d22b4..fad50db9dcf2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -101,21 +101,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
}
}
-static bool tm_active_with_fp(struct task_struct *tsk)
-{
- return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- (tsk->thread.ckpt_regs.msr & MSR_FP);
-}
-
-static bool tm_active_with_altivec(struct task_struct *tsk)
-{
- return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- (tsk->thread.ckpt_regs.msr & MSR_VEC);
-}
#else
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
-static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
-static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
bool strict_msr_control;
@@ -252,7 +239,7 @@ EXPORT_SYMBOL(enable_kernel_fp);
static int restore_fp(struct task_struct *tsk)
{
- if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
+ if (tsk->thread.load_fp) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
return 1;
@@ -334,8 +321,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
static int restore_altivec(struct task_struct *tsk)
{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
- (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
load_vr_state(&tsk->thread.vr_state);
tsk->thread.used_vr = 1;
tsk->thread.load_vec++;
@@ -497,13 +483,14 @@ void giveup_all(struct task_struct *tsk)
if (!tsk->thread.regs)
return;
+ check_if_tm_restore_required(tsk);
+
usermsr = tsk->thread.regs->msr;
if ((usermsr & msr_all_available) == 0)
return;
msr_check_and_set(msr_all_available);
- check_if_tm_restore_required(tsk);
WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
@@ -728,6 +715,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
{
thread->hw_brk.address = 0;
thread->hw_brk.type = 0;
+ thread->hw_brk.len = 0;
+ thread->hw_brk.hw_len = 0;
if (ppc_breakpoint_available())
set_breakpoint(&thread->hw_brk);
}
@@ -751,28 +740,6 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
mtspr(SPRN_DABRX, dabrx);
return 0;
}
-#elif defined(CONFIG_PPC_8xx)
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
-{
- unsigned long addr = dabr & ~HW_BRK_TYPE_DABR;
- unsigned long lctrl1 = 0x90000000; /* compare type: equal on E & F */
- unsigned long lctrl2 = 0x8e000002; /* watchpoint 1 on cmp E | F */
-
- if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
- lctrl1 |= 0xa0000;
- else if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
- lctrl1 |= 0xf0000;
- else if ((dabr & HW_BRK_TYPE_RDWR) == 0)
- lctrl2 = 0;
-
- mtspr(SPRN_LCTRL2, 0);
- mtspr(SPRN_CMPE, addr);
- mtspr(SPRN_CMPF, addr + 4);
- mtspr(SPRN_LCTRL1, lctrl1);
- mtspr(SPRN_LCTRL2, lctrl2);
-
- return 0;
-}
#else
static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
{
@@ -793,6 +760,39 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
return __set_dabr(dabr, dabrx);
}
+static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
+{
+ unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW |
+ LCTRL1_CRWF_RW;
+ unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN;
+ unsigned long start_addr = brk->address & ~HW_BREAKPOINT_ALIGN;
+ unsigned long end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN;
+
+ if (start_addr == 0)
+ lctrl2 |= LCTRL2_LW0LA_F;
+ else if (end_addr == ~0U)
+ lctrl2 |= LCTRL2_LW0LA_E;
+ else
+ lctrl2 |= LCTRL2_LW0LA_EandF;
+
+ mtspr(SPRN_LCTRL2, 0);
+
+ if ((brk->type & HW_BRK_TYPE_RDWR) == 0)
+ return 0;
+
+ if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
+ lctrl1 |= LCTRL1_CRWE_RO | LCTRL1_CRWF_RO;
+ if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
+ lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO;
+
+ mtspr(SPRN_CMPE, start_addr - 1);
+ mtspr(SPRN_CMPF, end_addr + 1);
+ mtspr(SPRN_LCTRL1, lctrl1);
+ mtspr(SPRN_LCTRL2, lctrl2);
+
+ return 0;
+}
+
void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
@@ -800,6 +800,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
if (dawr_enabled())
// Power8 or later
set_dawr(brk);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ set_breakpoint_8xx(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
// Power7 or earlier
set_dabr(brk);
@@ -829,6 +831,7 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
return false;
if (a->len != b->len)
return false;
+ /* no need to check hw_len. it's calculated from address and len */
return true;
}
@@ -1274,16 +1277,6 @@ void show_user_instructions(struct pt_regs *regs)
pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
- /*
- * Make sure the NIP points at userspace, not kernel text/data or
- * elsewhere.
- */
- if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
- pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
- current->comm, current->pid);
- return;
- }
-
seq_buf_init(&s, buf, sizeof(buf));
while (n) {
@@ -1294,7 +1287,7 @@ void show_user_instructions(struct pt_regs *regs)
for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
int instr;
- if (probe_kernel_address((const void *)pc, instr)) {
+ if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) {
seq_buf_printf(&s, "XXXXXXXX ");
continue;
}
@@ -1600,8 +1593,9 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+ unsigned long kthread_arg, struct task_struct *p,
+ unsigned long tls)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
@@ -1642,10 +1636,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
- childregs->gpr[13] = childregs->gpr[6];
+ childregs->gpr[13] = tls;
else
#endif
- childregs->gpr[2] = childregs->gpr[6];
+ childregs->gpr[2] = tls;
}
f = ret_from_fork;
@@ -2046,10 +2040,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
int count = 0;
int firstframe = 1;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- struct ftrace_ret_stack *ret_stack;
- extern void return_to_handler(void);
- unsigned long rth = (unsigned long)return_to_handler;
- int curr_frame = 0;
+ unsigned long ret_addr;
+ int ftrace_idx = 0;
#endif
if (tsk == NULL)
@@ -2078,15 +2070,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
if (!firstframe || ip != lr) {
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if ((ip == rth) && curr_frame >= 0) {
- ret_stack = ftrace_graph_get_ret_stack(current,
- curr_frame++);
- if (ret_stack)
- pr_cont(" (%pS)",
- (void *)ret_stack->ret);
- else
- curr_frame = -1;
- }
+ ret_addr = ftrace_graph_ret_addr(current,
+ &ftrace_idx, ip, stack);
+ if (ret_addr != ip)
+ pr_cont(" (%pS)", (void *)ret_addr);
#endif
if (firstframe)
pr_cont(" (unreliable)");
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7159e791a70d..6620f37abe73 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -55,6 +55,7 @@
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
#include <asm/drmem.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -702,9 +703,12 @@ void __init early_init_devtree(void *params)
#ifdef CONFIG_PPC_POWERNV
/* Some machines might need OPAL info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_opal, NULL);
+
+ /* Scan tree for ultravisor feature */
+ of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
#endif
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/* scan tree to see if dump is active during last boot */
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
#endif
@@ -731,7 +735,7 @@ void __init early_init_devtree(void *params)
if (PHYSICAL_START > MEMORY_START)
memblock_reserve(MEMORY_START, 0x8000);
reserve_kdump_trampoline();
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/*
* If we fail to reserve memory for firmware-assisted dump then
* fallback to kexec based kdump.
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 514707ef6779..577345382b23 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -40,6 +40,7 @@
#include <asm/sections.h>
#include <asm/machdep.h>
#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
#include <linux/linux_logo.h>
@@ -94,7 +95,7 @@ static int of_workarounds __prombss;
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
__FILE__, __LINE__); \
- __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
+ __builtin_trap(); \
} while (0)
#ifdef DEBUG_PROM
@@ -171,6 +172,10 @@ static bool __prombss prom_radix_disable;
static bool __prombss prom_xive_disable;
#endif
+#ifdef CONFIG_PPC_SVM
+static bool __prombss prom_svm_enable;
+#endif
+
struct platform_support {
bool hash_mmu;
bool radix_mmu;
@@ -298,16 +303,24 @@ static char __init *prom_strstr(const char *s1, const char *s2)
return NULL;
}
-static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
+static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
{
- size_t ret = prom_strlen(src);
+ size_t dsize = prom_strlen(dest);
+ size_t len = prom_strlen(src);
+ size_t res = dsize + len;
+
+ /* This would be a bug */
+ if (dsize >= count)
+ return count;
+
+ dest += dsize;
+ count -= dsize;
+ if (len >= count)
+ len = count-1;
+ memcpy(dest, src, len);
+ dest[len] = 0;
+ return res;
- if (size) {
- size_t len = (ret >= size) ? size - 1 : ret;
- memcpy(dest, src, len);
- dest[len] = '\0';
- }
- return ret;
}
#ifdef CONFIG_PPC_PSERIES
@@ -759,10 +772,14 @@ static void __init early_cmdline_parse(void)
prom_cmd_line[0] = 0;
p = prom_cmd_line;
- if ((long)prom.chosen > 0)
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0)
l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
- if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */
- prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line));
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
+ prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
+ sizeof(prom_cmd_line));
+
prom_printf("command line: %s\n", prom_cmd_line);
#ifdef CONFIG_PPC64
@@ -812,6 +829,17 @@ static void __init early_cmdline_parse(void)
prom_debug("XIVE disabled from cmdline\n");
}
#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_SVM
+ opt = prom_strstr(prom_cmd_line, "svm=");
+ if (opt) {
+ bool val;
+
+ opt += sizeof("svm=") - 1;
+ if (!prom_strtobool(opt, &val))
+ prom_svm_enable = val;
+ }
+#endif /* CONFIG_PPC_SVM */
}
#ifdef CONFIG_PPC_PSERIES
@@ -1037,7 +1065,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.reserved2 = 0,
.reserved3 = 0,
.subprocessors = 1,
- .byte22 = OV5_FEAT(OV5_DRMEM_V2),
+ .byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO),
.intarch = 0,
.mmu = 0,
.hash_ext = 0,
@@ -1712,6 +1740,43 @@ static void __init prom_close_stdin(void)
}
}
+#ifdef CONFIG_PPC_SVM
+static int prom_rtas_hcall(uint64_t args)
+{
+ register uint64_t arg1 asm("r3") = H_RTAS;
+ register uint64_t arg2 asm("r4") = args;
+
+ asm volatile("sc 1\n" : "=r" (arg1) :
+ "r" (arg1),
+ "r" (arg2) :);
+ return arg1;
+}
+
+static struct rtas_args __prombss os_term_args;
+
+static void __init prom_rtas_os_term(char *str)
+{
+ phandle rtas_node;
+ __be32 val;
+ u32 token;
+
+ prom_debug("%s: start...\n", __func__);
+ rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+ prom_debug("rtas_node: %x\n", rtas_node);
+ if (!PHANDLE_VALID(rtas_node))
+ return;
+
+ val = 0;
+ prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val));
+ token = be32_to_cpu(val);
+ prom_debug("ibm,os-term: %x\n", token);
+ if (token == 0)
+ prom_panic("Could not get token for ibm,os-term\n");
+ os_term_args.token = cpu_to_be32(token);
+ prom_rtas_hcall((uint64_t)&os_term_args);
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* Allocate room for and instantiate RTAS
*/
@@ -3168,6 +3233,59 @@ static void unreloc_toc(void)
#endif
#endif
+#ifdef CONFIG_PPC_SVM
+/*
+ * Perform the Enter Secure Mode ultracall.
+ */
+static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+{
+ register unsigned long r3 asm("r3") = UV_ESM;
+ register unsigned long r4 asm("r4") = kbase;
+ register unsigned long r5 asm("r5") = fdt;
+
+ asm volatile("sc 2" : "+r"(r3) : "r"(r4), "r"(r5));
+
+ return r3;
+}
+
+/*
+ * Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
+ */
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+ int ret;
+
+ if (!prom_svm_enable)
+ return;
+
+ /* Switch to secure mode. */
+ prom_printf("Switching to secure mode.\n");
+
+ /*
+ * The ultravisor will do an integrity check of the kernel image but we
+ * relocated it so the check will fail. Restore the original image by
+ * relocating it back to the kernel virtual base address.
+ */
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate(KERNELBASE);
+
+ ret = enter_secure_mode(kbase, fdt);
+
+ /* Relocate the kernel again. */
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate(kbase);
+
+ if (ret != U_SUCCESS) {
+ prom_printf("Returned %d from switching to secure mode.\n", ret);
+ prom_rtas_os_term("Switch to secure mode failed.\n");
+ }
+}
+#else
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* We enter here early on, when the Open Firmware prom is still
* handling exceptions and the MMU hash table for us.
@@ -3366,6 +3484,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
unreloc_toc();
#endif
+ /* Move to secure memory if we're supposed to be secure guests. */
+ setup_secure_guest(kbase, hdr);
+
__start(hdr, kbase, 0, 0, 0, 0, 0);
return 0;
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 160bef0d553d..b183ab9c5107 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -26,14 +26,15 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
logo_linux_clut224 btext_prepare_BAT
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
+__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC.
+relocate"
NM="$1"
OBJ="$2"
ERROR=0
-function check_section()
+check_section()
{
file=$1
section=$2
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8c92febf5f44..25c0424e8868 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2425,7 +2425,8 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
return -EIO;
hw_brk.address = data & (~HW_BRK_TYPE_DABR);
hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
- hw_brk.len = 8;
+ hw_brk.len = DABR_MAX_LEN;
+ hw_brk.hw_len = DABR_MAX_LEN;
set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
bp = thread->ptrace_bps[0];
@@ -2439,6 +2440,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
if (bp) {
attr = bp->attr;
attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
/* Enable breakpoint */
@@ -2456,7 +2458,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
/* Create a new breakpoint request if one doesn't exist already */
hw_breakpoint_init(&attr);
attr.bp_addr = hw_brk.address;
- attr.bp_len = 8;
+ attr.bp_len = DABR_MAX_LEN;
arch_bp_generic_fields(hw_brk.type,
&attr.bp_type);
@@ -2880,18 +2882,14 @@ static long ppc_set_hwdebug(struct task_struct *child,
if ((unsigned long)bp_info->addr >= TASK_SIZE)
return -EIO;
- brk.address = bp_info->addr & ~7UL;
+ brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
brk.type = HW_BRK_TYPE_TRANSLATE;
- brk.len = 8;
+ brk.len = DABR_MAX_LEN;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
brk.type |= HW_BRK_TYPE_READ;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
brk.type |= HW_BRK_TYPE_WRITE;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- /*
- * Check if the request is for 'range' breakpoints. We can
- * support it if range < 8 bytes.
- */
if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
len = bp_info->addr2 - bp_info->addr;
else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
@@ -2904,7 +2902,7 @@ static long ppc_set_hwdebug(struct task_struct *child,
/* Create a new breakpoint request if one doesn't exist already */
hw_breakpoint_init(&attr);
- attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+ attr.bp_addr = (unsigned long)bp_info->addr;
attr.bp_len = len;
arch_bp_generic_fields(brk.type, &attr.bp_type);
@@ -3361,6 +3359,12 @@ void do_syscall_trace_leave(struct pt_regs *regs)
user_enter();
}
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
void __init pt_regs_check(void)
{
BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
@@ -3398,4 +3402,67 @@ void __init pt_regs_check(void)
offsetof(struct user_pt_regs, result));
BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+ // Now check that the pt_regs offsets match the uapi #defines
+ #define CHECK_REG(_pt, _reg) \
+ BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+ sizeof(unsigned long)));
+
+ CHECK_REG(PT_R0, gpr[0]);
+ CHECK_REG(PT_R1, gpr[1]);
+ CHECK_REG(PT_R2, gpr[2]);
+ CHECK_REG(PT_R3, gpr[3]);
+ CHECK_REG(PT_R4, gpr[4]);
+ CHECK_REG(PT_R5, gpr[5]);
+ CHECK_REG(PT_R6, gpr[6]);
+ CHECK_REG(PT_R7, gpr[7]);
+ CHECK_REG(PT_R8, gpr[8]);
+ CHECK_REG(PT_R9, gpr[9]);
+ CHECK_REG(PT_R10, gpr[10]);
+ CHECK_REG(PT_R11, gpr[11]);
+ CHECK_REG(PT_R12, gpr[12]);
+ CHECK_REG(PT_R13, gpr[13]);
+ CHECK_REG(PT_R14, gpr[14]);
+ CHECK_REG(PT_R15, gpr[15]);
+ CHECK_REG(PT_R16, gpr[16]);
+ CHECK_REG(PT_R17, gpr[17]);
+ CHECK_REG(PT_R18, gpr[18]);
+ CHECK_REG(PT_R19, gpr[19]);
+ CHECK_REG(PT_R20, gpr[20]);
+ CHECK_REG(PT_R21, gpr[21]);
+ CHECK_REG(PT_R22, gpr[22]);
+ CHECK_REG(PT_R23, gpr[23]);
+ CHECK_REG(PT_R24, gpr[24]);
+ CHECK_REG(PT_R25, gpr[25]);
+ CHECK_REG(PT_R26, gpr[26]);
+ CHECK_REG(PT_R27, gpr[27]);
+ CHECK_REG(PT_R28, gpr[28]);
+ CHECK_REG(PT_R29, gpr[29]);
+ CHECK_REG(PT_R30, gpr[30]);
+ CHECK_REG(PT_R31, gpr[31]);
+ CHECK_REG(PT_NIP, nip);
+ CHECK_REG(PT_MSR, msr);
+ CHECK_REG(PT_ORIG_R3, orig_gpr3);
+ CHECK_REG(PT_CTR, ctr);
+ CHECK_REG(PT_LNK, link);
+ CHECK_REG(PT_XER, xer);
+ CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+ CHECK_REG(PT_SOFTE, softe);
+#else
+ CHECK_REG(PT_MQ, mq);
+#endif
+ CHECK_REG(PT_TRAP, trap);
+ CHECK_REG(PT_DAR, dar);
+ CHECK_REG(PT_DSISR, dsisr);
+ CHECK_REG(PT_RESULT, result);
+ #undef CHECK_REG
+
+ BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ /*
+ * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+ * real registers.
+ */
+ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
}
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 487dcd8da4de..2d33f342a293 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -159,12 +159,12 @@ static int poweron_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_poweron_show, NULL);
}
-static const struct file_operations ppc_rtas_poweron_operations = {
- .open = poweron_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_poweron_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_poweron_proc_ops = {
+ .proc_open = poweron_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_poweron_write,
+ .proc_release = single_release,
};
static int progress_open(struct inode *inode, struct file *file)
@@ -172,12 +172,12 @@ static int progress_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_progress_show, NULL);
}
-static const struct file_operations ppc_rtas_progress_operations = {
- .open = progress_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_progress_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_progress_proc_ops = {
+ .proc_open = progress_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_progress_write,
+ .proc_release = single_release,
};
static int clock_open(struct inode *inode, struct file *file)
@@ -185,12 +185,12 @@ static int clock_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_clock_show, NULL);
}
-static const struct file_operations ppc_rtas_clock_operations = {
- .open = clock_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_clock_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_clock_proc_ops = {
+ .proc_open = clock_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_clock_write,
+ .proc_release = single_release,
};
static int tone_freq_open(struct inode *inode, struct file *file)
@@ -198,12 +198,12 @@ static int tone_freq_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_freq_show, NULL);
}
-static const struct file_operations ppc_rtas_tone_freq_operations = {
- .open = tone_freq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_tone_freq_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_tone_freq_proc_ops = {
+ .proc_open = tone_freq_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_tone_freq_write,
+ .proc_release = single_release,
};
static int tone_volume_open(struct inode *inode, struct file *file)
@@ -211,12 +211,12 @@ static int tone_volume_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_volume_show, NULL);
}
-static const struct file_operations ppc_rtas_tone_volume_operations = {
- .open = tone_volume_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_tone_volume_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_tone_volume_proc_ops = {
+ .proc_open = tone_volume_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_tone_volume_write,
+ .proc_release = single_release,
};
static int ppc_rtas_find_all_sensors(void);
@@ -238,17 +238,17 @@ static int __init proc_rtas_init(void)
return -ENODEV;
proc_create("powerpc/rtas/progress", 0644, NULL,
- &ppc_rtas_progress_operations);
+ &ppc_rtas_progress_proc_ops);
proc_create("powerpc/rtas/clock", 0644, NULL,
- &ppc_rtas_clock_operations);
+ &ppc_rtas_clock_proc_ops);
proc_create("powerpc/rtas/poweron", 0644, NULL,
- &ppc_rtas_poweron_operations);
+ &ppc_rtas_poweron_proc_ops);
proc_create_single("powerpc/rtas/sensors", 0444, NULL,
ppc_rtas_sensors_show);
proc_create("powerpc/rtas/frequency", 0644, NULL,
- &ppc_rtas_tone_freq_operations);
+ &ppc_rtas_tone_freq_proc_ops);
proc_create("powerpc/rtas/volume", 0644, NULL,
- &ppc_rtas_tone_volume_operations);
+ &ppc_rtas_tone_volume_proc_ops);
proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL,
ppc_rtas_rmo_buf_show);
return 0;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 5faf0a64c92b..c5fa251b8950 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -16,6 +16,7 @@
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -871,15 +872,17 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
return 0;
for_each_cpu(cpu, cpus) {
+ struct device *dev = get_cpu_device(cpu);
+
switch (state) {
case DOWN:
- cpuret = cpu_down(cpu);
+ cpuret = device_offline(dev);
break;
case UP:
- cpuret = cpu_up(cpu);
+ cpuret = device_online(dev);
break;
}
- if (cpuret) {
+ if (cpuret < 0) {
pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
__func__,
((state == UP) ? "up" : "down"),
@@ -896,6 +899,7 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
cpumask_clear_cpu(cpu, cpus);
}
}
+ cond_resched();
}
return ret;
@@ -922,13 +926,11 @@ int rtas_online_cpus_mask(cpumask_var_t cpus)
return ret;
}
-EXPORT_SYMBOL(rtas_online_cpus_mask);
int rtas_offline_cpus_mask(cpumask_var_t cpus)
{
return rtas_cpu_state_change_mask(DOWN, cpus);
}
-EXPORT_SYMBOL(rtas_offline_cpus_mask);
int rtas_ibm_suspend_me(u64 handle)
{
@@ -968,6 +970,8 @@ int rtas_ibm_suspend_me(u64 handle)
data.token = rtas_token("ibm,suspend-me");
data.complete = &done;
+ lock_device_hotplug();
+
/* All present CPUs must be online */
cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
cpuret = rtas_online_cpus_mask(offline_mask);
@@ -1006,6 +1010,7 @@ out_hotplug_enable:
__func__);
out:
+ unlock_device_hotplug();
free_cpumask_var(offline_mask);
return atomic_read(&data.error);
}
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 84f794782c62..a99179d83538 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -655,7 +655,7 @@ struct rtas_flash_file {
const char *filename;
const char *rtas_call_name;
int *status;
- const struct file_operations fops;
+ const struct proc_ops ops;
};
static const struct rtas_flash_file rtas_flash_files[] = {
@@ -663,36 +663,36 @@ static const struct rtas_flash_file rtas_flash_files[] = {
.filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME,
.rtas_call_name = "ibm,update-flash-64-and-reboot",
.status = &rtas_update_flash_data.status,
- .fops.read = rtas_flash_read_msg,
- .fops.write = rtas_flash_write,
- .fops.release = rtas_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = rtas_flash_read_msg,
+ .ops.proc_write = rtas_flash_write,
+ .ops.proc_release = rtas_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME,
.rtas_call_name = "ibm,update-flash-64-and-reboot",
.status = &rtas_update_flash_data.status,
- .fops.read = rtas_flash_read_num,
- .fops.write = rtas_flash_write,
- .fops.release = rtas_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = rtas_flash_read_num,
+ .ops.proc_write = rtas_flash_write,
+ .ops.proc_release = rtas_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" VALIDATE_FLASH_NAME,
.rtas_call_name = "ibm,validate-flash-image",
.status = &rtas_validate_flash_data.status,
- .fops.read = validate_flash_read,
- .fops.write = validate_flash_write,
- .fops.release = validate_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = validate_flash_read,
+ .ops.proc_write = validate_flash_write,
+ .ops.proc_release = validate_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" MANAGE_FLASH_NAME,
.rtas_call_name = "ibm,manage-flash-image",
.status = &rtas_manage_flash_data.status,
- .fops.read = manage_flash_read,
- .fops.write = manage_flash_write,
- .fops.llseek = default_llseek,
+ .ops.proc_read = manage_flash_read,
+ .ops.proc_write = manage_flash_write,
+ .ops.proc_lseek = default_llseek,
}
};
@@ -723,7 +723,7 @@ static int __init rtas_flash_init(void)
const struct rtas_flash_file *f = &rtas_flash_files[i];
int token;
- if (!proc_create(f->filename, 0600, NULL, &f->fops))
+ if (!proc_create(f->filename, 0600, NULL, &f->ops))
goto enomem;
/*
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 8d02e047f96a..89b798f8f656 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -385,12 +385,12 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
return 0;
}
-static const struct file_operations proc_rtas_log_operations = {
- .read = rtas_log_read,
- .poll = rtas_log_poll,
- .open = rtas_log_open,
- .release = rtas_log_release,
- .llseek = noop_llseek,
+static const struct proc_ops rtas_log_proc_ops = {
+ .proc_read = rtas_log_read,
+ .proc_poll = rtas_log_poll,
+ .proc_open = rtas_log_open,
+ .proc_release = rtas_log_release,
+ .proc_lseek = noop_llseek,
};
static int enable_surveillance(int timeout)
@@ -572,7 +572,7 @@ static int __init rtas_init(void)
return -ENODEV;
entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
- &proc_rtas_log_operations);
+ &rtas_log_proc_ops);
if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
new file mode 100644
index 000000000000..4b982324d368
--- /dev/null
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#include <linux/types.h>
+#include <linux/of.h>
+#include <asm/secure_boot.h>
+
+static struct device_node *get_ppc_fw_sb_node(void)
+{
+ static const struct of_device_id ids[] = {
+ { .compatible = "ibm,secureboot", },
+ { .compatible = "ibm,secureboot-v1", },
+ { .compatible = "ibm,secureboot-v2", },
+ {},
+ };
+
+ return of_find_matching_node(NULL, ids);
+}
+
+bool is_ppc_secureboot_enabled(void)
+{
+ struct device_node *node;
+ bool enabled = false;
+
+ node = get_ppc_fw_sb_node();
+ enabled = of_property_read_bool(node, "os-secureboot-enforcing");
+
+ of_node_put(node);
+
+ pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
+
+ return enabled;
+}
+
+bool is_ppc_trustedboot_enabled(void)
+{
+ struct device_node *node;
+ bool enabled = false;
+
+ node = get_ppc_fw_sb_node();
+ enabled = of_property_read_bool(node, "trusted-enabled");
+
+ of_node_put(node);
+
+ pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
+
+ return enabled;
+}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index e1c9cf079503..bd70f5be1c27 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -16,7 +16,7 @@
#include <asm/setup.h>
-unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
enum count_cache_flush_type {
COUNT_CACHE_FLUSH_NONE = 0x1,
@@ -24,11 +24,12 @@ enum count_cache_flush_type {
COUNT_CACHE_FLUSH_HW = 0x4,
};
static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+static bool link_stack_flush_enabled;
bool barrier_nospec_enabled;
static bool no_nospec;
static bool btb_flush_enabled;
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static bool no_spectrev2;
#endif
@@ -94,13 +95,14 @@ static int barrier_nospec_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec,
- barrier_nospec_get, barrier_nospec_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
+ barrier_nospec_set, "%llu\n");
static __init int barrier_nospec_debugfs_init(void)
{
- debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL,
- &fops_barrier_nospec);
+ debugfs_create_file_unsafe("barrier_nospec", 0600,
+ powerpc_debugfs_root, NULL,
+ &fops_barrier_nospec);
return 0;
}
device_initcall(barrier_nospec_debugfs_init);
@@ -108,13 +110,13 @@ device_initcall(barrier_nospec_debugfs_init);
static __init int security_feature_debugfs_init(void)
{
debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
- (u64 *)&powerpc_security_features);
+ &powerpc_security_features);
return 0;
}
device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static int __init handle_nospectre_v2(char *p)
{
no_spectrev2 = true;
@@ -122,6 +124,9 @@ static int __init handle_nospectre_v2(char *p)
return 0;
}
early_param("nospectre_v2", handle_nospectre_v2);
+#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
void setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
@@ -138,32 +143,33 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
- if (rfi_flush || thread_priv) {
+ if (rfi_flush) {
struct seq_buf s;
seq_buf_init(&s, buf, PAGE_SIZE - 1);
- seq_buf_printf(&s, "Mitigation: ");
-
- if (rfi_flush)
- seq_buf_printf(&s, "RFI Flush");
-
- if (rfi_flush && thread_priv)
- seq_buf_printf(&s, ", ");
-
+ seq_buf_printf(&s, "Mitigation: RFI Flush");
if (thread_priv)
- seq_buf_printf(&s, "L1D private per thread");
+ seq_buf_printf(&s, ", L1D private per thread");
seq_buf_printf(&s, "\n");
return s.len;
}
+ if (thread_priv)
+ return sprintf(buf, "Vulnerable: L1D private per thread\n");
+
if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
return sprintf(buf, "Not affected\n");
return sprintf(buf, "Vulnerable\n");
}
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_meltdown(dev, attr, buf);
+}
#endif
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
@@ -209,11 +215,19 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (ccd)
seq_buf_printf(&s, "Indirect branch cache disabled");
+
+ if (link_stack_flush_enabled)
+ seq_buf_printf(&s, ", Software link stack flush");
+
} else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
seq_buf_printf(&s, "Mitigation: Software count cache flush");
if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
seq_buf_printf(&s, " (hardware accelerated)");
+
+ if (link_stack_flush_enabled)
+ seq_buf_printf(&s, ", Software link stack flush");
+
} else if (btb_flush_enabled) {
seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
} else {
@@ -364,28 +378,61 @@ static int stf_barrier_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
+ "%llu\n");
static __init int stf_barrier_debugfs_init(void)
{
- debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+ debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root,
+ NULL, &fops_stf_barrier);
return 0;
}
device_initcall(stf_barrier_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
+static void no_count_cache_flush(void)
+{
+ count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+ pr_info("count-cache-flush: software flush disabled.\n");
+}
+
static void toggle_count_cache_flush(bool enable)
{
- if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) &&
+ !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK))
+ enable = false;
+
+ if (!enable) {
patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP);
- count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
- pr_info("count-cache-flush: software flush disabled.\n");
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP);
+#endif
+ pr_info("link-stack-flush: software flush disabled.\n");
+ link_stack_flush_enabled = false;
+ no_count_cache_flush();
return;
}
+ // This enables the branch from _switch to flush_count_cache
patch_branch_site(&patch__call_flush_count_cache,
(u64)&flush_count_cache, BRANCH_SET_LINK);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ // This enables the branch from guest_exit_cont to kvm_flush_link_stack
+ patch_branch_site(&patch__call_kvm_flush_link_stack,
+ (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+#endif
+
+ pr_info("link-stack-flush: software flush enabled.\n");
+ link_stack_flush_enabled = true;
+
+ // If we just need to flush the link stack, patch an early return
+ if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR);
+ no_count_cache_flush();
+ return;
+ }
+
if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
pr_info("count-cache-flush: full software flush sequence enabled.\n");
@@ -399,7 +446,26 @@ static void toggle_count_cache_flush(bool enable)
void setup_count_cache_flush(void)
{
- toggle_count_cache_flush(true);
+ bool enable = true;
+
+ if (no_spectrev2 || cpu_mitigations_off()) {
+ if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
+ security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
+ pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n");
+
+ enable = false;
+ }
+
+ /*
+ * There's no firmware feature flag/hypervisor bit to tell us we need to
+ * flush the link stack on context switch. So we set it here if we see
+ * either of the Spectre v2 mitigations that aim to protect userspace.
+ */
+ if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) ||
+ security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
+ security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
+ toggle_count_cache_flush(enable);
}
#ifdef CONFIG_DEBUG_FS
@@ -429,13 +495,14 @@ static int count_cache_flush_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
- count_cache_flush_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
+ count_cache_flush_set, "%llu\n");
static __init int count_cache_flush_debugfs_init(void)
{
- debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root,
- NULL, &fops_count_cache_flush);
+ debugfs_create_file_unsafe("count_cache_flush", 0600,
+ powerpc_debugfs_root, NULL,
+ &fops_count_cache_flush);
return 0;
}
device_initcall(count_cache_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c
new file mode 100644
index 000000000000..6a29777d6a2d
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-ops.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * This file initializes secvar operations for PowerPC Secureboot
+ */
+
+#include <linux/cache.h>
+#include <asm/secvar.h>
+
+const struct secvar_operations *secvar_ops __ro_after_init;
+
+void set_secvar_ops(const struct secvar_operations *ops)
+{
+ secvar_ops = ops;
+}
diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
new file mode 100644
index 000000000000..a0a78aba2083
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-sysfs.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 IBM Corporation <nayna@linux.ibm.com>
+ *
+ * This code exposes secure variables to user via sysfs
+ */
+
+#define pr_fmt(fmt) "secvar-sysfs: "fmt
+
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <asm/secvar.h>
+
+#define NAME_MAX_SIZE 1024
+
+static struct kobject *secvar_kobj;
+static struct kset *secvar_kset;
+
+static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ ssize_t rc = 0;
+ struct device_node *node;
+ const char *format;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+ if (!of_device_is_available(node))
+ return -ENODEV;
+
+ rc = of_property_read_string(node, "format", &format);
+ if (rc)
+ return rc;
+
+ rc = sprintf(buf, "%s\n", format);
+
+ of_node_put(node);
+
+ return rc;
+}
+
+
+static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ uint64_t dsize;
+ int rc;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+ if (rc) {
+ pr_err("Error retrieving %s variable size %d\n", kobj->name,
+ rc);
+ return rc;
+ }
+
+ return sprintf(buf, "%llu\n", dsize);
+}
+
+static ssize_t data_read(struct file *filep, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf, loff_t off,
+ size_t count)
+{
+ uint64_t dsize;
+ char *data;
+ int rc;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+ if (rc) {
+ pr_err("Error getting %s variable size %d\n", kobj->name, rc);
+ return rc;
+ }
+ pr_debug("dsize is %llu\n", dsize);
+
+ data = kzalloc(dsize, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize);
+ if (rc) {
+ pr_err("Error getting %s variable %d\n", kobj->name, rc);
+ goto data_fail;
+ }
+
+ rc = memory_read_from_buffer(buf, count, &off, data, dsize);
+
+data_fail:
+ kfree(data);
+ return rc;
+}
+
+static ssize_t update_write(struct file *filep, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf, loff_t off,
+ size_t count)
+{
+ int rc;
+
+ pr_debug("count is %ld\n", count);
+ rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count);
+ if (rc) {
+ pr_err("Error setting the %s variable %d\n", kobj->name, rc);
+ return rc;
+ }
+
+ return count;
+}
+
+static struct kobj_attribute format_attr = __ATTR_RO(format);
+
+static struct kobj_attribute size_attr = __ATTR_RO(size);
+
+static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0);
+
+static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0);
+
+static struct bin_attribute *secvar_bin_attrs[] = {
+ &data_attr,
+ &update_attr,
+ NULL,
+};
+
+static struct attribute *secvar_attrs[] = {
+ &size_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group secvar_attr_group = {
+ .attrs = secvar_attrs,
+ .bin_attrs = secvar_bin_attrs,
+};
+__ATTRIBUTE_GROUPS(secvar_attr);
+
+static struct kobj_type secvar_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = secvar_attr_groups,
+};
+
+static int update_kobj_size(void)
+{
+
+ struct device_node *node;
+ u64 varsize;
+ int rc = 0;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+ if (!of_device_is_available(node)) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ rc = of_property_read_u64(node, "max-var-size", &varsize);
+ if (rc)
+ goto out;
+
+ data_attr.size = varsize;
+ update_attr.size = varsize;
+
+out:
+ of_node_put(node);
+
+ return rc;
+}
+
+static int secvar_sysfs_load(void)
+{
+ char *name;
+ uint64_t namesize = 0;
+ struct kobject *kobj;
+ int rc;
+
+ name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ do {
+ rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE);
+ if (rc) {
+ if (rc != -ENOENT)
+ pr_err("error getting secvar from firmware %d\n",
+ rc);
+ break;
+ }
+
+ kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+ if (!kobj) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ kobject_init(kobj, &secvar_ktype);
+
+ rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name);
+ if (rc) {
+ pr_warn("kobject_add error %d for attribute: %s\n", rc,
+ name);
+ kobject_put(kobj);
+ kobj = NULL;
+ }
+
+ if (kobj)
+ kobject_uevent(kobj, KOBJ_ADD);
+
+ } while (!rc);
+
+ kfree(name);
+ return rc;
+}
+
+static int secvar_sysfs_init(void)
+{
+ int rc;
+
+ if (!secvar_ops) {
+ pr_warn("secvar: failed to retrieve secvar operations.\n");
+ return -ENODEV;
+ }
+
+ secvar_kobj = kobject_create_and_add("secvar", firmware_kobj);
+ if (!secvar_kobj) {
+ pr_err("secvar: Failed to create firmware kobj\n");
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_file(secvar_kobj, &format_attr.attr);
+ if (rc) {
+ kobject_put(secvar_kobj);
+ return -ENOMEM;
+ }
+
+ secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj);
+ if (!secvar_kset) {
+ pr_err("secvar: sysfs kobject registration failed.\n");
+ kobject_put(secvar_kobj);
+ return -ENOMEM;
+ }
+
+ rc = update_kobj_size();
+ if (rc) {
+ pr_err("Cannot read the size of the attribute\n");
+ return rc;
+ }
+
+ secvar_sysfs_load();
+
+ return 0;
+}
+
+late_initcall(secvar_sysfs_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1f8db666468d..7f8c890360fe 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -715,8 +715,28 @@ static struct notifier_block ppc_panic_block = {
.priority = INT_MIN /* may not return; must be done last */
};
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+ void *p)
+{
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+ kaslr_offset(), KERNELBASE);
+
+ return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset
+};
+
void __init setup_panic(void)
{
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+
/* PPC64 always does a hard irq disable in its panic handler */
if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
return;
@@ -778,12 +798,6 @@ void ppc_printk_progress(char *s, unsigned short hex)
pr_info("%s\n", s);
}
-void arch_setup_pdev_archdata(struct platform_device *pdev)
-{
- pdev->archdata.dma_mask = DMA_BIT_MASK(32);
- pdev->dev.dma_mask = &pdev->archdata.dma_mask;
-}
-
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
@@ -806,9 +820,15 @@ static __init void print_system_info(void)
pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
#ifdef CONFIG_PPC64
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+#ifdef CONFIG_PPC_BOOK3S
+ pr_info("vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
+#endif
#endif
- print_system_hash_info();
+ if (!early_radix_enabled())
+ print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",
@@ -929,9 +949,6 @@ void __init setup_arch(char **cmdline_p)
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
- if (IS_ENABLED(CONFIG_DUMMY_CONSOLE))
- conswitchp = &dummy_con;
-
if (ppc_md.setup_arch)
ppc_md.setup_arch();
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index c82577c4b15d..2dd0d9cb5a20 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -35,7 +35,7 @@ void exc_lvl_early_init(void);
static inline void exc_lvl_early_init(void) { };
#endif
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK)
void emergency_stack_init(void);
#else
static inline void emergency_stack_init(void) { };
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 94517e4a2723..5b49b26eb154 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -44,6 +44,7 @@
#include <asm/asm-prototypes.h>
#include <asm/kdump.h>
#include <asm/feature-fixups.h>
+#include <asm/early_ioremap.h>
#include "setup.h"
@@ -80,6 +81,8 @@ notrace void __init machine_init(u64 dt_ptr)
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
+ early_ioremap_setup();
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
@@ -137,7 +140,7 @@ arch_initcall(ppc_init);
static void *__init alloc_stack(void)
{
- void *ptr = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+ void *ptr = memblock_alloc(THREAD_SIZE, THREAD_ALIGN);
if (!ptr)
panic("cannot allocate %d bytes for stack at %pS\n",
@@ -150,6 +153,9 @@ void __init irqstack_early_init(void)
{
unsigned int i;
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ return;
+
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
@@ -158,6 +164,18 @@ void __init irqstack_early_init(void)
}
}
+#ifdef CONFIG_VMAP_STACK
+void *emergency_ctx[NR_CPUS] __ro_after_init;
+
+void __init emergency_stack_init(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i)
+ emergency_ctx[i] = alloc_stack();
+}
+#endif
+
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
void __init exc_lvl_early_init(void)
{
@@ -206,6 +224,6 @@ __init void initialize_cache_info(void)
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
ucache_bsize = 0;
- if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
ucache_bsize = icache_bsize = dcache_bsize;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 44b4c432a273..e05e6dd67ae6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -65,15 +65,10 @@
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
#include <asm/kup.h>
+#include <asm/early_ioremap.h>
#include "setup.h"
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
int spinning_secondaries;
u64 ppc64_pft_size;
@@ -305,7 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
- DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
+ udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
/*
* Do early initialization using the flattened device
@@ -338,6 +333,8 @@ void __init early_setup(unsigned long dt_ptr)
apply_feature_fixups();
setup_feature_keys();
+ early_ioremap_setup();
+
/* Initialize the hash table or TLB handling */
early_init_mmu();
@@ -362,11 +359,11 @@ void __init early_setup(unsigned long dt_ptr)
*/
this_cpu_enable_ftrace();
- DBG(" <- early_setup()\n");
+ udbg_printf(" <- %s()\n", __func__);
#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
/*
- * This needs to be done *last* (after the above DBG() even)
+ * This needs to be done *last* (after the above udbg_printf() even)
*
* Right after we return from this function, we turn on the MMU
* which means the real-mode access trick that btext does will
@@ -436,8 +433,6 @@ void smp_release_cpus(void)
if (!use_spinloop())
return;
- DBG(" -> smp_release_cpus()\n");
-
/* All secondary cpus are spinning on a common spinloop, release them
* all now so they can start to spin on their individual paca
* spinloops. For non SMP kernels, the secondary cpus never get out
@@ -456,9 +451,7 @@ void smp_release_cpus(void)
break;
udelay(1);
}
- DBG("spinning_secondaries = %d\n", spinning_secondaries);
-
- DBG(" <- smp_release_cpus()\n");
+ pr_debug("spinning_secondaries = %d\n", spinning_secondaries);
}
#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
@@ -551,8 +544,6 @@ void __init initialize_cache_info(void)
struct device_node *cpu = NULL, *l2, *l3 = NULL;
u32 pvr;
- DBG(" -> initialize_cache_info()\n");
-
/*
* All shipping POWER8 machines have a firmware bug that
* puts incorrect information in the device-tree. This will
@@ -576,10 +567,10 @@ void __init initialize_cache_info(void)
*/
if (cpu) {
if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
- DBG("Argh, can't find dcache properties !\n");
+ pr_warn("Argh, can't find dcache properties !\n");
if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
- DBG("Argh, can't find icache properties !\n");
+ pr_warn("Argh, can't find icache properties !\n");
/*
* Try to find the L2 and L3 if any. Assume they are
@@ -604,8 +595,6 @@ void __init initialize_cache_info(void)
cur_cpu_spec->dcache_bsize = dcache_bsize;
cur_cpu_spec->icache_bsize = icache_bsize;
-
- DBG(" <- initialize_cache_info()\n");
}
/*
@@ -644,7 +633,7 @@ static void *__init alloc_stack(unsigned long limit, int cpu)
BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
- ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE,
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN,
MEMBLOCK_LOW_LIMIT, limit,
early_cpu_to_node(cpu));
if (!ptr)
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 1e2276963f6d..e2a46cfed5fd 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -182,7 +182,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
- ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
+ ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
/*
* Mark stacktraces with kretprobed functions on them
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 3bfb3888e897..078608ec2e92 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -79,7 +79,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
* sys_select() with the appropriate args. -- Cort
*/
int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
{
if ( (unsigned long)n >= 4096 )
{
@@ -89,7 +89,7 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
|| __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
|| __get_user(outp, ((fd_set __user * __user *)(buffer+2)))
|| __get_user(exp, ((fd_set __user * __user *)(buffer+3)))
- || __get_user(tvp, ((struct timeval __user * __user *)(buffer+4))))
+ || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4))))
return -EFAULT;
}
return sys_select(n, inp, outp, exp, tvp);
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..35b61bfc1b1a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -517,3 +517,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 nospu clone3 ppc_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e2147d7c9e72..80a676da11cb 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -19,6 +19,7 @@
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
+#include <asm/svm.h>
#include "cacheinfo.h"
#include "setup.h"
@@ -715,6 +716,23 @@ static struct device_attribute pa6t_attrs[] = {
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
+#ifdef CONFIG_PPC_SVM
+static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", is_secure_guest());
+}
+static DEVICE_ATTR(svm, 0444, show_svm, NULL);
+
+static void create_svm_file(void)
+{
+ device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
+}
+#else
+static void create_svm_file(void)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -1058,6 +1076,8 @@ static int __init topology_init(void)
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
+ create_svm_file();
+
return 0;
}
subsys_initcall(topology_init);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..1168e8b37e30 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -232,7 +232,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
* Accumulate stolen time by scanning the dispatch trace log.
* Called on entry from user mode.
*/
-void accumulate_stolen_time(void)
+void notrace accumulate_stolen_time(void)
{
u64 sst, ust;
unsigned long save_irq_soft_mask = irq_soft_mask_return();
@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
return stime;
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
#endif
}
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
* recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
@@ -885,7 +885,7 @@ static notrace u64 timebase_read(struct clocksource *cs)
void update_vsyscall(struct timekeeper *tk)
{
- struct timespec xt;
+ struct timespec64 xt;
struct clocksource *clock = tk->tkr_mono.clock;
u32 mult = tk->tkr_mono.mult;
u32 shift = tk->tkr_mono.shift;
@@ -957,8 +957,10 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->tb_to_xs = new_tb_to_xs;
vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
- vdso_data->stamp_xtime = xt;
+ vdso_data->stamp_xtime_sec = xt.tv_sec;
+ vdso_data->stamp_xtime_nsec = xt.tv_nsec;
vdso_data->stamp_sec_fraction = frac_sec;
+ vdso_data->hrtimer_res = hrtimer_resolution;
smp_wmb();
++(vdso_data->tb_update_count);
}
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index be1ca98fce5c..7ea0ca044b65 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -944,7 +944,8 @@ int ftrace_disable_ftrace_graph_caller(void)
* Hook the return address and push it in the stack of return addrs
* in current thread info. Return the address we want to divert to.
*/
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp)
{
unsigned long return_hooker;
@@ -956,7 +957,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
return_hooker = ppc_function_entry(return_to_handler);
- if (!function_graph_enter(parent, ip, 0, NULL))
+ if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
parent = return_hooker;
out:
return parent;
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index 183f608efb81..e023ae59c429 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -50,6 +50,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 48
/* load r4 with local address */
lwz r4, 44(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 74acbf16a666..f9fd5f743eba 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -294,6 +294,7 @@ _GLOBAL(ftrace_graph_caller)
std r2, 24(r1)
ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+ addi r5, r1, 112
mfctr r4 /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflr r3 /* ftrace_caller has restored LR from stack */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index e41a7d13c99c..6708e24db0ab 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -41,6 +41,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 112
/* load r4 with local address */
ld r4, 128(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 11caa0291254..82a3438300fd 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -250,15 +250,22 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
}
NOKPROBE_SYMBOL(oops_end);
+static char *get_mmu_str(void)
+{
+ if (early_radix_enabled())
+ return " MMU=Radix";
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return " MMU=Hash";
+ return "";
+}
+
static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
- printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n",
+ printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
- PAGE_SIZE / 1024,
- early_radix_enabled() ? " MMU=Radix" : "",
- early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "",
+ PAGE_SIZE / 1024, get_mmu_str(),
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
@@ -472,6 +479,7 @@ void system_reset_exception(struct pt_regs *regs)
if (debugger(regs))
goto out;
+ kmsg_dump(KMSG_DUMP_OOPS);
/*
* A system reset is a request to dump, so we always send
* it through the crashdump code (if fadump or kdump are
@@ -1629,6 +1637,15 @@ void StackOverflow(struct pt_regs *regs)
panic("kernel stack overflow");
}
+void stack_overflow_exception(struct pt_regs *regs)
+{
+ enum ctx_state prev_state = exception_enter();
+
+ die("Kernel stack overflow", regs, SIGSEGV);
+
+ exception_exit(prev_state);
+}
+
void kernel_fp_unavailable_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
new file mode 100644
index 000000000000..07296bc39166
--- /dev/null
+++ b/arch/powerpc/kernel/ucall.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic code to perform an ultravisor call.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+_GLOBAL(ucall_norets)
+EXPORT_SYMBOL_GPL(ucall_norets)
+ sc 2 /* Invoke the ultravisor */
+ blr /* Return r3 = status */
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index a384e7c8b01c..01595e8cafe7 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -120,13 +120,15 @@ int udbg_write(const char *s, int n)
#define UDBG_BUFSIZE 256
void udbg_printf(const char *fmt, ...)
{
- char buf[UDBG_BUFSIZE];
- va_list args;
+ if (udbg_putc) {
+ char buf[UDBG_BUFSIZE];
+ va_list args;
- va_start(args, fmt);
- vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
- udbg_puts(buf);
- va_end(args);
+ va_start(args, fmt);
+ vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+ udbg_puts(buf);
+ va_end(args);
+ }
}
void __init udbg_progress(char *s, unsigned short hex)
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d60598113a9f..b9a108411c0d 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -94,28 +94,6 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
-#ifdef CONFIG_PPC32
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_gettimeofday", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_gettime", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_getres", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_get_tbfreq", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_time", NULL
- },
-#endif
};
/*
@@ -750,11 +728,6 @@ static int __init vdso_init(void)
*/
vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
-#else
- vdso_data->dcache_block_size = L1_CACHE_BYTES;
- vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
- vdso_data->icache_block_size = L1_CACHE_BYTES;
- vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 06f54d947057..e147bbdc12cd 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -2,9 +2,7 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso32-$(CONFIG_PPC64) = getcpu.o
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
- $(obj-vdso32-y)
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
# Build rules
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index 7f882e7b9f43..3440ddf21c8b 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -8,7 +8,9 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
.text
@@ -22,42 +24,62 @@
*/
V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
+#ifdef CONFIG_PPC64
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl __get_datapage@local
+ get_datapage r10, r0
mtlr r12
- mr r10,r3
+#endif
+#ifdef CONFIG_PPC64
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+#else
+ li r5, L1_CACHE_BYTES - 1
+#endif
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
+#ifdef CONFIG_PPC64
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
srw. r8,r8,r9 /* compute line count */
+#else
+ srwi. r8, r8, L1_CACHE_SHIFT
+ mr r7, r6
+#endif
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
1: dcbst 0,r6
+#ifdef CONFIG_PPC64
add r6,r6,r7
+#else
+ addi r6, r6, L1_CACHE_BYTES
+#endif
bdnz 1b
sync
/* Now invalidate the instruction cache */
+#ifdef CONFIG_PPC64
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
srw. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
+#endif
mtctr r8
+#ifdef CONFIG_PPC64
2: icbi 0,r6
add r6,r6,r7
+#else
+2: icbi 0, r7
+ addi r7, r7, L1_CACHE_BYTES
+#endif
bdnz 2b
isync
li r3,0
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..217bb630f8f9 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -10,35 +10,13 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
.text
.global __kernel_datapage_offset;
__kernel_datapage_offset:
.long 0
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
*
@@ -52,11 +30,10 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r4,r3
- bl __get_datapage@local
+ mr. r4,r3
+ get_datapage r3, r0
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP32
- cmpli cr0,r4,0
beqlr
li r0,NR_syscalls
stw r0,0(r4)
@@ -70,11 +47,12 @@ V_FUNCTION_END(__kernel_get_syscall_map)
*
* returns the timebase frequency in HZ
*/
+#ifndef CONFIG_PPC_BOOK3S_601
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local
+ get_datapage r3, r0
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
@@ -82,3 +60,4 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
+#endif
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
index 63e914539e1a..ff5e214fec41 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -15,6 +15,7 @@
* int __kernel_getcpu(unsigned *cpu, unsigned *node);
*
*/
+#if defined(CONFIG_PPC64)
V_FUNCTION_BEGIN(__kernel_getcpu)
.cfi_startproc
mfspr r5,SPRN_SPRG_VDSO_READ
@@ -24,10 +25,26 @@ V_FUNCTION_BEGIN(__kernel_getcpu)
rlwinm r7,r5,16,31-15,31-0
beq cr0,1f
stw r6,0(r3)
-1: beq cr1,2f
- stw r7,0(r4)
-2: crclr cr0*4+so
+1: crclr cr0*4+so
li r3,0 /* always success */
+ beqlr cr1
+ stw r7,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
+#elif !defined(CONFIG_SMP)
+V_FUNCTION_BEGIN(__kernel_getcpu)
+ .cfi_startproc
+ cmpwi cr0, r3, 0
+ cmpwi cr1, r4, 0
+ li r5, 0
+ beq cr0, 1f
+ stw r5, 0(r3)
+1: li r3, 0 /* always success */
+ crclr cr0*4+so
+ beqlr cr1
+ stw r5, 0(r4)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_getcpu)
+#endif
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index becd9f8767ed..a3951567118a 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -9,16 +9,15 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART 4
-#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART
#else
#define LOPART 0
-#define TSPEC_TV_SEC TSPC32_TV_SEC
#endif
.text
@@ -33,28 +32,26 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
mflr r12
.cfi_register lr,r12
- mr r10,r3 /* r10 saves tv */
+ mr. r10,r3 /* r10 saves tv */
mr r11,r4 /* r11 saves tz */
- bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
- cmplwi r10,0 /* check if tv is NULL */
+ get_datapage r9, r0
beq 3f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l /* so we get microseconds in r4 */
+ LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */
bl __do_get_tspec@local /* get sec/usec from tb & kernel */
stw r3,TVAL32_TV_SEC(r10)
stw r4,TVAL32_TV_USEC(r10)
3: cmplwi r11,0 /* check if tz is NULL */
- beq 1f
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ beqlr
+
lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
lwz r5,CFG_TZ_DSTTIME(r9)
stw r4,TZONE_TZ_MINWEST(r11)
stw r5,TZONE_TZ_DSTTIME(r11)
-1: mtlr r12
- crclr cr0*4+so
- li r3,0
blr
.cfi_endproc
V_FUNCTION_END(__kernel_gettimeofday)
@@ -71,17 +68,23 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cmpli cr0,r3,CLOCK_REALTIME
cmpli cr1,r3,CLOCK_MONOTONIC
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
+
+ cmpli cr5,r3,CLOCK_REALTIME_COARSE
+ cmpli cr6,r3,CLOCK_MONOTONIC_COARSE
+ cror cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+ cror cr0*4+eq,cr0*4+eq,cr5*4+eq
+ bne cr0, .Lgettime_fallback
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
- bl __get_datapage@local /* get data page */
- mr r9,r3 /* datapage ptr in r9 */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
- bne cr1,80f /* not monotonic -> all done */
+ get_datapage r9, r0
+ LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */
+ beq cr5, .Lcoarse_clocks
+.Lprecise_clocks:
+ bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
+ bne cr1, .Lfinish /* not monotonic -> all done */
/*
* CLOCK_MONOTONIC
@@ -105,12 +108,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
add r9,r9,r0
lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
cmpl cr0,r8,r0 /* check if updated */
- bne- 50b
+ bne- .Lprecise_clocks
+ b .Lfinish_monotonic
+
+ /*
+ * For coarse clocks we get data directly from the vdso data page, so
+ * we don't need to call __do_get_tspec, but we still need to do the
+ * counter trick.
+ */
+.Lcoarse_clocks:
+ lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- .Lcoarse_clocks
+ add r9,r9,r0 /* r0 is already 0 */
+
+ /*
+ * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+ * too
+ */
+ lwz r3,STAMP_XTIME_SEC+LOPART(r9)
+ lwz r4,STAMP_XTIME_NSEC+LOPART(r9)
+ bne cr6,1f
+
+ /* CLOCK_MONOTONIC_COARSE */
+ lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
+ lwz r6,WTOM_CLOCK_NSEC(r9)
+
+ /* check if counter has updated */
+ or r0,r6,r5
+1: or r0,r0,r3
+ or r0,r0,r4
+ xor r0,r0,r0
+ add r3,r3,r0
+ lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9)
+ cmpl cr0,r0,r8 /* check if updated */
+ bne- .Lcoarse_clocks
+
+ /* Counter has not updated, so continue calculating proper values for
+ * sec and nsec if monotonic coarse, or just return with the proper
+ * values for realtime.
+ */
+ bne cr6, .Lfinish
/* Calculate and store result. Note that this mimics the C code,
* which may cause funny results if nsec goes negative... is that
* possible at all ?
*/
+.Lfinish_monotonic:
add r3,r3,r5
add r4,r4,r6
cmpw cr0,r4,r7
@@ -118,11 +162,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
blt 1f
subf r4,r7,r4
addi r3,r3,1
-1: bge cr1,80f
+1: bge cr1, .Lfinish
addi r3,r3,-1
add r4,r4,r7
-80: stw r3,TSPC32_TV_SEC(r11)
+.Lfinish:
+ stw r3,TSPC32_TV_SEC(r11)
stw r4,TSPC32_TV_NSEC(r11)
mtlr r12
@@ -133,7 +178,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
/*
* syscall fallback
*/
-99:
+.Lgettime_fallback:
li r0,__NR_clock_gettime
.cfi_restore lr
sc
@@ -151,27 +196,33 @@ V_FUNCTION_END(__kernel_clock_gettime)
V_FUNCTION_BEGIN(__kernel_clock_getres)
.cfi_startproc
/* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
+ cmplwi cr0, r3, CLOCK_MAX
+ cmpwi cr1, r3, CLOCK_REALTIME_COARSE
+ cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE
+ bgt cr0, 99f
+ LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES)
+ beq cr1, 1f
+ beq cr7, 1f
- li r3,0
+ mflr r12
+ .cfi_register lr,r12
+ get_datapage r3, r0
+ lwz r5, CLOCK_HRTIMER_RES(r3)
+ mtlr r12
+1: li r3,0
cmpli cr0,r4,0
crclr cr0*4+so
beqlr
- lis r5,CLOCK_REALTIME_RES@h
- ori r5,r5,CLOCK_REALTIME_RES@l
stw r3,TSPC32_TV_SEC(r4)
stw r5,TSPC32_TV_NSEC(r4)
blr
/*
- * syscall fallback
+ * invalid clock
*/
99:
- li r0,__NR_clock_getres
- sc
+ li r3, EINVAL
+ crset so
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_getres)
@@ -189,16 +240,15 @@ V_FUNCTION_BEGIN(__kernel_time)
.cfi_register lr,r12
mr r11,r3 /* r11 holds t */
- bl __get_datapage@local
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
- lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
+ lwz r3,STAMP_XTIME_SEC+LOPART(r9)
cmplwi r11,0 /* check if t is NULL */
- beq 2f
- stw r3,0(r11) /* store result at *t */
-2: mtlr r12
+ mtlr r12
crclr cr0*4+so
+ beqlr
+ stw r3,0(r11) /* store result at *t */
blr
.cfi_endproc
V_FUNCTION_END(__kernel_time)
@@ -268,7 +318,7 @@ __do_get_tspec:
* as a 32.32 fixed-point number in r3 and r4.
* Load & add the xtime stamp.
*/
- lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
+ lwz r5,STAMP_XTIME_SEC+LOPART(r9)
lwz r6,STAMP_SEC_FRAC(r9)
addc r4,r4,r6
adde r3,r3,r5
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 099a6db14e67..5206c2eb2a1d 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -144,18 +144,20 @@ VERSION
__kernel_datapage_offset;
__kernel_get_syscall_map;
+#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_time;
__kernel_get_tbfreq;
+#endif
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
__kernel_sigtramp_rt32;
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || !defined(CONFIG_SMP)
__kernel_getcpu;
#endif
- __kernel_time;
local: *;
};
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
index 3f92561a64c4..526f5ba2593e 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -35,7 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
+ srd. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
@@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
+ srd. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 07bfe33fe874..1c9a04703250 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -116,8 +116,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
* CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
* too
*/
- ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
- ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+ ld r4,STAMP_XTIME_SEC(r3)
+ ld r5,STAMP_XTIME_NSEC(r3)
bne cr6,75f
/* CLOCK_MONOTONIC_COARSE */
@@ -186,12 +186,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
+ mflr r12
+ .cfi_register lr,r12
+ bl V_LOCAL_FUNC(__get_datapage)
+ lwz r5, CLOCK_HRTIMER_RES(r3)
+ mtlr r12
li r3,0
cmpldi cr0,r4,0
crclr cr0*4+so
beqlr
- lis r5,CLOCK_REALTIME_RES@h
- ori r5,r5,CLOCK_REALTIME_RES@l
std r3,TSPC64_TV_SEC(r4)
std r5,TSPC64_TV_NSEC(r4)
blr
@@ -220,7 +223,7 @@ V_FUNCTION_BEGIN(__kernel_time)
mr r11,r3 /* r11 holds t */
bl V_LOCAL_FUNC(__get_datapage)
- ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ ld r4,STAMP_XTIME_SEC(r3)
cmpldi r11,0 /* check if t is NULL */
beq 2f
@@ -265,7 +268,7 @@ V_FUNCTION_BEGIN(__do_get_tspec)
mulhdu r6,r6,r5 /* in units of 2^-32 seconds */
/* Add stamp since epoch */
- ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ ld r4,STAMP_XTIME_SEC(r3)
lwz r5,STAMP_SEC_FRAC(r3)
or r0,r4,r5
or r0,r0,r6
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 8eb867dbad5f..25c14a0981bf 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -67,6 +67,9 @@ _GLOBAL(load_up_altivec)
#ifdef CONFIG_PPC32
mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
oris r9,r9,MSR_VEC@h
+#ifdef CONFIG_VMAP_STACK
+ tovirt(r5, r5)
+#endif
#else
ld r4,PACACURRENT(r13)
addi r5,r4,THREAD /* Get THREAD */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 060a1acd7c6d..b4c89a1acebb 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -6,6 +6,8 @@
#endif
#define BSS_FIRST_SECTIONS *(.bss.prominit)
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN 0
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
@@ -18,22 +20,8 @@
ENTRY(_stext)
PHDRS {
- kernel PT_LOAD FLAGS(7); /* RWX */
- notes PT_NOTE FLAGS(0);
- dummy PT_NOTE FLAGS(0);
-
- /* binutils < 2.18 has a bug that makes it misbehave when taking an
- ELF file with all segments at load address 0 as input. This
- happens when running "strip" on vmlinux, because of the AT() magic
- in this linker script. People using GCC >= 4.2 won't run into
- this problem, because the "build-id" support will put some data
- into the "notes" segment (at a non-zero load address).
-
- To work around this, we force some data into both the "dummy"
- segment and the kernel segment, so the dummy segment will get a
- non-zero load address. It's not enough to always create the
- "notes" segment, since if nothing gets assigned to it, its load
- address will be zero. */
+ text PT_LOAD FLAGS(7); /* RWX */
+ note PT_NOTE FLAGS(0);
}
#ifdef CONFIG_PPC64
@@ -77,7 +65,7 @@ SECTIONS
#else /* !CONFIG_PPC64 */
HEAD_TEXT
#endif
- } :kernel
+ } :text
__head_end = .;
@@ -126,7 +114,7 @@ SECTIONS
__got2_end = .;
#endif /* CONFIG_PPC32 */
- } :kernel
+ } :text
. = ALIGN(ETEXT_ALIGN_SIZE);
_etext = .;
@@ -175,17 +163,6 @@ SECTIONS
__stop__btb_flush_fixup = .;
}
#endif
- EXCEPTION_TABLE(0)
-
- NOTES :kernel :notes
-
- /* The dummy segment contents for the bug workaround mentioned above
- near PHDRS. */
- .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
- LONG(0)
- LONG(0)
- LONG(0)
- } :kernel :dummy
/*
* Init sections discarded at runtime
@@ -200,7 +177,7 @@ SECTIONS
#ifdef CONFIG_PPC64
*(.tramp.ftrace.init);
#endif
- } :kernel
+ } :text
/* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
@@ -346,7 +323,7 @@ SECTIONS
#endif
/* The initial task and kernel stack */
- INIT_TASK_DATA_SECTION(THREAD_SIZE)
+ INIT_TASK_DATA_SECTION(THREAD_ALIGN)
.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
PAGE_ALIGNED_DATA(PAGE_SIZE)
OpenPOWER on IntegriCloud